starting page "International Deadly Lots"
This commit is contained in:
@@ -41,10 +41,17 @@
|
||||
" 'VAERSVAX': self._readVAERSVAX(folder + year + \"VAERSVAX.csv\")\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" def readNonDomesticVaersDescr(self):\n",
|
||||
" folder = self.dataDir + \"/NonDomesticVAERSData/\"\n",
|
||||
" return {\n",
|
||||
" 'VAERSDATA': self._readVAERSDATA(folder + \"NonDomesticVAERSDATA.csv\"),\n",
|
||||
" 'VAERSVAX': self._readVAERSVAX(folder + \"NonDomesticVAERSVAX.csv\")\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" def _readVAERSDATA(self, file):\n",
|
||||
" return self._read_csv(\n",
|
||||
" file = file,\n",
|
||||
" usecols = ['VAERS_ID', 'RECVDATE', 'DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'],\n",
|
||||
" usecols = ['VAERS_ID', 'RECVDATE', 'DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT', 'SPLTTYPE'],\n",
|
||||
" parse_dates = ['RECVDATE'],\n",
|
||||
" date_parser = lambda dateStr: pd.to_datetime(dateStr, format = \"%m/%d/%Y\"))\n",
|
||||
"\n",
|
||||
@@ -117,7 +124,14 @@
|
||||
"\n",
|
||||
" @staticmethod\n",
|
||||
" def _convertColumnOfDataFrameToNumeric(dataFrame, column):\n",
|
||||
" dataFrame[column] = np.where(dataFrame[column] == 'Y', 1, 0)\n",
|
||||
" dataFrame[column] = DataFrameNormalizer._where(\n",
|
||||
" condition = dataFrame[column] == 'Y',\n",
|
||||
" trueValue = 1,\n",
|
||||
" falseValue = 0)\n",
|
||||
"\n",
|
||||
" @staticmethod\n",
|
||||
" def _where(condition, trueValue, falseValue):\n",
|
||||
" return np.where(condition, trueValue, falseValue) \n",
|
||||
" "
|
||||
]
|
||||
},
|
||||
@@ -190,6 +204,7 @@
|
||||
" 'L_THREAT': 'sum',\n",
|
||||
" 'DISABLE': 'sum'\n",
|
||||
" },\n",
|
||||
" # FK-TODO: rename \"ADRs\" and \"Total reports\" to \"Total Number of Adverse Reaction Reports\" in all places\n",
|
||||
" columnNameMappingsDict = {\n",
|
||||
" \"DIED_size\": \"ADRs\",\n",
|
||||
" \"DIED_sum\": \"DEATHS\",\n",
|
||||
@@ -258,7 +273,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 29,
|
||||
"id": "41d4fa30",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -283,6 +298,7 @@
|
||||
" dataFrame['VAX_DOSE_SERIES'].rename('Dose')\n",
|
||||
" ]))\n",
|
||||
"\n",
|
||||
" # FK-TODO: DRY because it generates a subset of BatchCodeTableHelper.createSevereEffectsBatchCodeTable()\n",
|
||||
" @staticmethod\n",
|
||||
" def _getDoseTable(dataFrame):\n",
|
||||
" doseTable = AggregationHelper.aggregateAndFlattenColumnsAndRenameColumns(\n",
|
||||
@@ -303,6 +319,38 @@
|
||||
" return doseTable\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "09e6b511",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pycountry\n",
|
||||
"\n",
|
||||
"class InternationalLotAnalysis:\n",
|
||||
" \n",
|
||||
" @staticmethod\n",
|
||||
" def getInternationalLotTable(dataFrame):\n",
|
||||
" dataFrame = DataFrameFilter().filterByCovid19(dataFrame)\n",
|
||||
" dataFrame['Country'] = dataFrame.apply(InternationalLotAnalysis._fun2, axis = 'columns')\n",
|
||||
" result = DoseAnalysis._getDoseTable(dataFrame.groupby(dataFrame['Country']))\n",
|
||||
" return result.sort_values(by = 'Severe reports (%)', ascending = False)\n",
|
||||
"\n",
|
||||
" # FK-TODO: refactor\n",
|
||||
" @staticmethod\n",
|
||||
" def _fun2(row):\n",
|
||||
" if isinstance(row['SPLTTYPE'], str):\n",
|
||||
" country = pycountry.countries.get(alpha_2 = row['SPLTTYPE'][:2])\n",
|
||||
" if country is None:\n",
|
||||
" return 'NO-COUNTRY: ' + row['SPLTTYPE'][:2]\n",
|
||||
" else:\n",
|
||||
" return country.name\n",
|
||||
" else:\n",
|
||||
" # FK-TODO: add missing InternationalLotAnalysisTests for this else branch\n",
|
||||
" return 'NO-COUNTRY: ' + str(row['SPLTTYPE'])\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -733,6 +781,52 @@
|
||||
" return pd.DataFrame(index = index, columns = columns, data = data).astype(dtypes)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c784bfef",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from pandas.testing import assert_frame_equal\n",
|
||||
"\n",
|
||||
"class InternationalLotAnalysisTest(unittest.TestCase):\n",
|
||||
"\n",
|
||||
" def test_getInternationalLotTable(self):\n",
|
||||
" # Given\n",
|
||||
" dataFrame = self.createDataFrame(\n",
|
||||
" columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES', 'SPLTTYPE'],\n",
|
||||
" data = [ [1, 0, 0, 'COVID19', 'MODERNA', '016M20A', '2', 'GBPFIZER INC2020486806'],\n",
|
||||
" [1, 0, 0, 'COVID19', 'MODERNA', '030L20A', '1', 'FRMODERNATX, INC.MOD20224'],\n",
|
||||
" [1, 1, 1, 'COVID19', 'MODERNA', '030L20B', '1', 'FRMODERNATX, INC.MOD20224']],\n",
|
||||
" index = [\n",
|
||||
" \"1048786\",\n",
|
||||
" \"1048786\",\n",
|
||||
" \"4711\"])\n",
|
||||
" \n",
|
||||
" # When\n",
|
||||
" internationalLotTable = InternationalLotAnalysis.getInternationalLotTable(dataFrame)\n",
|
||||
"\n",
|
||||
" # Then\n",
|
||||
" assert_frame_equal(\n",
|
||||
" internationalLotTable,\n",
|
||||
" self.createDataFrame(\n",
|
||||
" columns = ['Total reports', 'Deaths', 'Disabilities', 'Life Threatening Illnesses', 'Severe reports (%)'],\n",
|
||||
" data = [ [2, 2, 1, 1, (2 + 1 + 1) / 2 * 100],\n",
|
||||
" [1, 1, 0, 0, (1 + 0 + 0) / 1 * 100]],\n",
|
||||
" index = pd.Index(\n",
|
||||
" [\n",
|
||||
" 'France',\n",
|
||||
" 'United Kingdom'\n",
|
||||
" \n",
|
||||
" ],\n",
|
||||
" name = 'Country')))\n",
|
||||
"\n",
|
||||
" # FK-TODO: createDataFrame() is defined in almost every test class: DRY \n",
|
||||
" def createDataFrame(self, index, columns, data, dtypes = {}):\n",
|
||||
" return pd.DataFrame(index = index, columns = columns, data = data).astype(dtypes)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -901,6 +995,53 @@
|
||||
"doseByMonthTable.to_excel('results/doseByMonthTable.xlsx')\n",
|
||||
"doseByMonthTable"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "075aa6c9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### International Deadly Lots"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8f8880f4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# https://www.howbadismybatch.com/international.html\n",
|
||||
"\n",
|
||||
"def getInternationalLotTable():\n",
|
||||
" vaersDescr = VaersDescrReader(dataDir = 'VAERS').readNonDomesticVaersDescr()\n",
|
||||
" dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescr(vaersDescr)\n",
|
||||
" DataFrameNormalizer.normalize(dataFrame)\n",
|
||||
" return InternationalLotAnalysis.getInternationalLotTable(dataFrame)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "54e03231",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"internationalLotTable = getInternationalLotTable()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7e80e958",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# FK-TODO: make filter on 'Total reports' a parameter in getInternationalLotTable() \n",
|
||||
"internationalLotTable = internationalLotTable[internationalLotTable['Total reports'] > 50]\n",
|
||||
"internationalLotTable.to_excel('results/International_Deadly_Lots.xlsx')\n",
|
||||
"internationalLotTable"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
4
help.txt
4
help.txt
@@ -4,8 +4,8 @@ get VAERS data:
|
||||
- download data (e.g. 2022VAERSData.zip) from https://vaers.hhs.gov/data/datasets.html and save and unzip in VAERS folder
|
||||
|
||||
FK-TODO:
|
||||
- https://www.howbadismybatch.com/firstsecond.html nachprogrammieren
|
||||
- VAX_LOT-Spalte normalisieren, d.h. mindestens toUpperCase() darauf anwenden
|
||||
- https://www.howbadismybatch.com/international.html nachprogrammieren
|
||||
- https://www.howbadismybatch.com/geography.html nachprogrammieren
|
||||
- handle VAX_DOSE_SERIES = 'UNK' or 'N/A' like '1'?
|
||||
- Format des jeweiligen Herstellers berücksichtigen und "verschmutzte" Einträge säubern, denn sie stellen alle dieselbe Charge dar:
|
||||
039k20a
|
||||
|
||||
Reference in New Issue
Block a user