starting page "International Deadly Lots"

This commit is contained in:
frankknoll
2022-02-07 11:06:14 +01:00
parent b5472a2cc2
commit 4cf718c16d
2 changed files with 146 additions and 5 deletions

View File

@@ -41,10 +41,17 @@
" 'VAERSVAX': self._readVAERSVAX(folder + year + \"VAERSVAX.csv\")\n",
" }\n",
"\n",
" def readNonDomesticVaersDescr(self):\n",
" folder = self.dataDir + \"/NonDomesticVAERSData/\"\n",
" return {\n",
" 'VAERSDATA': self._readVAERSDATA(folder + \"NonDomesticVAERSDATA.csv\"),\n",
" 'VAERSVAX': self._readVAERSVAX(folder + \"NonDomesticVAERSVAX.csv\")\n",
" }\n",
"\n",
" def _readVAERSDATA(self, file):\n",
" return self._read_csv(\n",
" file = file,\n",
" usecols = ['VAERS_ID', 'RECVDATE', 'DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'],\n",
" usecols = ['VAERS_ID', 'RECVDATE', 'DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT', 'SPLTTYPE'],\n",
" parse_dates = ['RECVDATE'],\n",
" date_parser = lambda dateStr: pd.to_datetime(dateStr, format = \"%m/%d/%Y\"))\n",
"\n",
@@ -117,7 +124,14 @@
"\n",
" @staticmethod\n",
" def _convertColumnOfDataFrameToNumeric(dataFrame, column):\n",
" dataFrame[column] = np.where(dataFrame[column] == 'Y', 1, 0)\n",
" dataFrame[column] = DataFrameNormalizer._where(\n",
" condition = dataFrame[column] == 'Y',\n",
" trueValue = 1,\n",
" falseValue = 0)\n",
"\n",
" @staticmethod\n",
" def _where(condition, trueValue, falseValue):\n",
" return np.where(condition, trueValue, falseValue) \n",
" "
]
},
@@ -190,6 +204,7 @@
" 'L_THREAT': 'sum',\n",
" 'DISABLE': 'sum'\n",
" },\n",
" # FK-TODO: rename \"ADRs\" and \"Total reports\" to \"Total Number of Adverse Reaction Reports\" in all places\n",
" columnNameMappingsDict = {\n",
" \"DIED_size\": \"ADRs\",\n",
" \"DIED_sum\": \"DEATHS\",\n",
@@ -258,7 +273,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 29,
"id": "41d4fa30",
"metadata": {},
"outputs": [],
@@ -283,6 +298,7 @@
" dataFrame['VAX_DOSE_SERIES'].rename('Dose')\n",
" ]))\n",
"\n",
" # FK-TODO: DRY because it generates a subset of BatchCodeTableHelper.createSevereEffectsBatchCodeTable()\n",
" @staticmethod\n",
" def _getDoseTable(dataFrame):\n",
" doseTable = AggregationHelper.aggregateAndFlattenColumnsAndRenameColumns(\n",
@@ -303,6 +319,38 @@
" return doseTable\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "09e6b511",
"metadata": {},
"outputs": [],
"source": [
"import pycountry\n",
"\n",
"class InternationalLotAnalysis:\n",
" \n",
" @staticmethod\n",
" def getInternationalLotTable(dataFrame):\n",
" dataFrame = DataFrameFilter().filterByCovid19(dataFrame)\n",
" dataFrame['Country'] = dataFrame.apply(InternationalLotAnalysis._fun2, axis = 'columns')\n",
" result = DoseAnalysis._getDoseTable(dataFrame.groupby(dataFrame['Country']))\n",
" return result.sort_values(by = 'Severe reports (%)', ascending = False)\n",
"\n",
" # FK-TODO: refactor\n",
" @staticmethod\n",
" def _fun2(row):\n",
" if isinstance(row['SPLTTYPE'], str):\n",
" country = pycountry.countries.get(alpha_2 = row['SPLTTYPE'][:2])\n",
" if country is None:\n",
" return 'NO-COUNTRY: ' + row['SPLTTYPE'][:2]\n",
" else:\n",
" return country.name\n",
" else:\n",
" # FK-TODO: add missing InternationalLotAnalysisTests for this else branch\n",
" return 'NO-COUNTRY: ' + str(row['SPLTTYPE'])\n"
]
},
{
"cell_type": "code",
"execution_count": null,
@@ -733,6 +781,52 @@
" return pd.DataFrame(index = index, columns = columns, data = data).astype(dtypes)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c784bfef",
"metadata": {},
"outputs": [],
"source": [
"from pandas.testing import assert_frame_equal\n",
"\n",
"class InternationalLotAnalysisTest(unittest.TestCase):\n",
"\n",
" def test_getInternationalLotTable(self):\n",
" # Given\n",
" dataFrame = self.createDataFrame(\n",
" columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES', 'SPLTTYPE'],\n",
" data = [ [1, 0, 0, 'COVID19', 'MODERNA', '016M20A', '2', 'GBPFIZER INC2020486806'],\n",
" [1, 0, 0, 'COVID19', 'MODERNA', '030L20A', '1', 'FRMODERNATX, INC.MOD20224'],\n",
" [1, 1, 1, 'COVID19', 'MODERNA', '030L20B', '1', 'FRMODERNATX, INC.MOD20224']],\n",
" index = [\n",
" \"1048786\",\n",
" \"1048786\",\n",
" \"4711\"])\n",
" \n",
" # When\n",
" internationalLotTable = InternationalLotAnalysis.getInternationalLotTable(dataFrame)\n",
"\n",
" # Then\n",
" assert_frame_equal(\n",
" internationalLotTable,\n",
" self.createDataFrame(\n",
" columns = ['Total reports', 'Deaths', 'Disabilities', 'Life Threatening Illnesses', 'Severe reports (%)'],\n",
" data = [ [2, 2, 1, 1, (2 + 1 + 1) / 2 * 100],\n",
" [1, 1, 0, 0, (1 + 0 + 0) / 1 * 100]],\n",
" index = pd.Index(\n",
" [\n",
" 'France',\n",
" 'United Kingdom'\n",
" \n",
" ],\n",
" name = 'Country')))\n",
"\n",
" # FK-TODO: createDataFrame() is defined in almost every test class: DRY \n",
" def createDataFrame(self, index, columns, data, dtypes = {}):\n",
" return pd.DataFrame(index = index, columns = columns, data = data).astype(dtypes)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
@@ -901,6 +995,53 @@
"doseByMonthTable.to_excel('results/doseByMonthTable.xlsx')\n",
"doseByMonthTable"
]
},
{
"cell_type": "markdown",
"id": "075aa6c9",
"metadata": {},
"source": [
"### International Deadly Lots"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8f8880f4",
"metadata": {},
"outputs": [],
"source": [
"# https://www.howbadismybatch.com/international.html\n",
"\n",
"def getInternationalLotTable():\n",
" vaersDescr = VaersDescrReader(dataDir = 'VAERS').readNonDomesticVaersDescr()\n",
" dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescr(vaersDescr)\n",
" DataFrameNormalizer.normalize(dataFrame)\n",
" return InternationalLotAnalysis.getInternationalLotTable(dataFrame)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "54e03231",
"metadata": {},
"outputs": [],
"source": [
"internationalLotTable = getInternationalLotTable()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7e80e958",
"metadata": {},
"outputs": [],
"source": [
"# FK-TODO: make filter on 'Total reports' a parameter in getInternationalLotTable() \n",
"internationalLotTable = internationalLotTable[internationalLotTable['Total reports'] > 50]\n",
"internationalLotTable.to_excel('results/International_Deadly_Lots.xlsx')\n",
"internationalLotTable"
]
}
],
"metadata": {

View File

@@ -4,8 +4,8 @@ get VAERS data:
- download data (e.g. 2022VAERSData.zip) from https://vaers.hhs.gov/data/datasets.html and save and unzip in VAERS folder
FK-TODO:
- https://www.howbadismybatch.com/firstsecond.html nachprogrammieren
- VAX_LOT-Spalte normalisieren, d.h. mindestens toUpperCase() darauf anwenden
- https://www.howbadismybatch.com/international.html nachprogrammieren
- https://www.howbadismybatch.com/geography.html nachprogrammieren
- handle VAX_DOSE_SERIES = 'UNK' or 'N/A' like '1'?
- Format des jeweiligen Herstellers berücksichtigen und "verschmutzte" Einträge säubern, denn sie stellen alle dieselbe Charge dar:
039k20a