starting page "International Deadly Lots"
This commit is contained in:
@@ -41,10 +41,17 @@
|
|||||||
" 'VAERSVAX': self._readVAERSVAX(folder + year + \"VAERSVAX.csv\")\n",
|
" 'VAERSVAX': self._readVAERSVAX(folder + year + \"VAERSVAX.csv\")\n",
|
||||||
" }\n",
|
" }\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
" def readNonDomesticVaersDescr(self):\n",
|
||||||
|
" folder = self.dataDir + \"/NonDomesticVAERSData/\"\n",
|
||||||
|
" return {\n",
|
||||||
|
" 'VAERSDATA': self._readVAERSDATA(folder + \"NonDomesticVAERSDATA.csv\"),\n",
|
||||||
|
" 'VAERSVAX': self._readVAERSVAX(folder + \"NonDomesticVAERSVAX.csv\")\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
" def _readVAERSDATA(self, file):\n",
|
" def _readVAERSDATA(self, file):\n",
|
||||||
" return self._read_csv(\n",
|
" return self._read_csv(\n",
|
||||||
" file = file,\n",
|
" file = file,\n",
|
||||||
" usecols = ['VAERS_ID', 'RECVDATE', 'DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'],\n",
|
" usecols = ['VAERS_ID', 'RECVDATE', 'DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT', 'SPLTTYPE'],\n",
|
||||||
" parse_dates = ['RECVDATE'],\n",
|
" parse_dates = ['RECVDATE'],\n",
|
||||||
" date_parser = lambda dateStr: pd.to_datetime(dateStr, format = \"%m/%d/%Y\"))\n",
|
" date_parser = lambda dateStr: pd.to_datetime(dateStr, format = \"%m/%d/%Y\"))\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -117,7 +124,14 @@
|
|||||||
"\n",
|
"\n",
|
||||||
" @staticmethod\n",
|
" @staticmethod\n",
|
||||||
" def _convertColumnOfDataFrameToNumeric(dataFrame, column):\n",
|
" def _convertColumnOfDataFrameToNumeric(dataFrame, column):\n",
|
||||||
" dataFrame[column] = np.where(dataFrame[column] == 'Y', 1, 0)\n",
|
" dataFrame[column] = DataFrameNormalizer._where(\n",
|
||||||
|
" condition = dataFrame[column] == 'Y',\n",
|
||||||
|
" trueValue = 1,\n",
|
||||||
|
" falseValue = 0)\n",
|
||||||
|
"\n",
|
||||||
|
" @staticmethod\n",
|
||||||
|
" def _where(condition, trueValue, falseValue):\n",
|
||||||
|
" return np.where(condition, trueValue, falseValue) \n",
|
||||||
" "
|
" "
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -190,6 +204,7 @@
|
|||||||
" 'L_THREAT': 'sum',\n",
|
" 'L_THREAT': 'sum',\n",
|
||||||
" 'DISABLE': 'sum'\n",
|
" 'DISABLE': 'sum'\n",
|
||||||
" },\n",
|
" },\n",
|
||||||
|
" # FK-TODO: rename \"ADRs\" and \"Total reports\" to \"Total Number of Adverse Reaction Reports\" in all places\n",
|
||||||
" columnNameMappingsDict = {\n",
|
" columnNameMappingsDict = {\n",
|
||||||
" \"DIED_size\": \"ADRs\",\n",
|
" \"DIED_size\": \"ADRs\",\n",
|
||||||
" \"DIED_sum\": \"DEATHS\",\n",
|
" \"DIED_sum\": \"DEATHS\",\n",
|
||||||
@@ -258,7 +273,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 29,
|
||||||
"id": "41d4fa30",
|
"id": "41d4fa30",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@@ -283,6 +298,7 @@
|
|||||||
" dataFrame['VAX_DOSE_SERIES'].rename('Dose')\n",
|
" dataFrame['VAX_DOSE_SERIES'].rename('Dose')\n",
|
||||||
" ]))\n",
|
" ]))\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
" # FK-TODO: DRY because it generates a subset of BatchCodeTableHelper.createSevereEffectsBatchCodeTable()\n",
|
||||||
" @staticmethod\n",
|
" @staticmethod\n",
|
||||||
" def _getDoseTable(dataFrame):\n",
|
" def _getDoseTable(dataFrame):\n",
|
||||||
" doseTable = AggregationHelper.aggregateAndFlattenColumnsAndRenameColumns(\n",
|
" doseTable = AggregationHelper.aggregateAndFlattenColumnsAndRenameColumns(\n",
|
||||||
@@ -303,6 +319,38 @@
|
|||||||
" return doseTable\n"
|
" return doseTable\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "09e6b511",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import pycountry\n",
|
||||||
|
"\n",
|
||||||
|
"class InternationalLotAnalysis:\n",
|
||||||
|
" \n",
|
||||||
|
" @staticmethod\n",
|
||||||
|
" def getInternationalLotTable(dataFrame):\n",
|
||||||
|
" dataFrame = DataFrameFilter().filterByCovid19(dataFrame)\n",
|
||||||
|
" dataFrame['Country'] = dataFrame.apply(InternationalLotAnalysis._fun2, axis = 'columns')\n",
|
||||||
|
" result = DoseAnalysis._getDoseTable(dataFrame.groupby(dataFrame['Country']))\n",
|
||||||
|
" return result.sort_values(by = 'Severe reports (%)', ascending = False)\n",
|
||||||
|
"\n",
|
||||||
|
" # FK-TODO: refactor\n",
|
||||||
|
" @staticmethod\n",
|
||||||
|
" def _fun2(row):\n",
|
||||||
|
" if isinstance(row['SPLTTYPE'], str):\n",
|
||||||
|
" country = pycountry.countries.get(alpha_2 = row['SPLTTYPE'][:2])\n",
|
||||||
|
" if country is None:\n",
|
||||||
|
" return 'NO-COUNTRY: ' + row['SPLTTYPE'][:2]\n",
|
||||||
|
" else:\n",
|
||||||
|
" return country.name\n",
|
||||||
|
" else:\n",
|
||||||
|
" # FK-TODO: add missing InternationalLotAnalysisTests for this else branch\n",
|
||||||
|
" return 'NO-COUNTRY: ' + str(row['SPLTTYPE'])\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
@@ -733,6 +781,52 @@
|
|||||||
" return pd.DataFrame(index = index, columns = columns, data = data).astype(dtypes)\n"
|
" return pd.DataFrame(index = index, columns = columns, data = data).astype(dtypes)\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "c784bfef",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from pandas.testing import assert_frame_equal\n",
|
||||||
|
"\n",
|
||||||
|
"class InternationalLotAnalysisTest(unittest.TestCase):\n",
|
||||||
|
"\n",
|
||||||
|
" def test_getInternationalLotTable(self):\n",
|
||||||
|
" # Given\n",
|
||||||
|
" dataFrame = self.createDataFrame(\n",
|
||||||
|
" columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES', 'SPLTTYPE'],\n",
|
||||||
|
" data = [ [1, 0, 0, 'COVID19', 'MODERNA', '016M20A', '2', 'GBPFIZER INC2020486806'],\n",
|
||||||
|
" [1, 0, 0, 'COVID19', 'MODERNA', '030L20A', '1', 'FRMODERNATX, INC.MOD20224'],\n",
|
||||||
|
" [1, 1, 1, 'COVID19', 'MODERNA', '030L20B', '1', 'FRMODERNATX, INC.MOD20224']],\n",
|
||||||
|
" index = [\n",
|
||||||
|
" \"1048786\",\n",
|
||||||
|
" \"1048786\",\n",
|
||||||
|
" \"4711\"])\n",
|
||||||
|
" \n",
|
||||||
|
" # When\n",
|
||||||
|
" internationalLotTable = InternationalLotAnalysis.getInternationalLotTable(dataFrame)\n",
|
||||||
|
"\n",
|
||||||
|
" # Then\n",
|
||||||
|
" assert_frame_equal(\n",
|
||||||
|
" internationalLotTable,\n",
|
||||||
|
" self.createDataFrame(\n",
|
||||||
|
" columns = ['Total reports', 'Deaths', 'Disabilities', 'Life Threatening Illnesses', 'Severe reports (%)'],\n",
|
||||||
|
" data = [ [2, 2, 1, 1, (2 + 1 + 1) / 2 * 100],\n",
|
||||||
|
" [1, 1, 0, 0, (1 + 0 + 0) / 1 * 100]],\n",
|
||||||
|
" index = pd.Index(\n",
|
||||||
|
" [\n",
|
||||||
|
" 'France',\n",
|
||||||
|
" 'United Kingdom'\n",
|
||||||
|
" \n",
|
||||||
|
" ],\n",
|
||||||
|
" name = 'Country')))\n",
|
||||||
|
"\n",
|
||||||
|
" # FK-TODO: createDataFrame() is defined in almost every test class: DRY \n",
|
||||||
|
" def createDataFrame(self, index, columns, data, dtypes = {}):\n",
|
||||||
|
" return pd.DataFrame(index = index, columns = columns, data = data).astype(dtypes)\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
@@ -901,6 +995,53 @@
|
|||||||
"doseByMonthTable.to_excel('results/doseByMonthTable.xlsx')\n",
|
"doseByMonthTable.to_excel('results/doseByMonthTable.xlsx')\n",
|
||||||
"doseByMonthTable"
|
"doseByMonthTable"
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "075aa6c9",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### International Deadly Lots"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "8f8880f4",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# https://www.howbadismybatch.com/international.html\n",
|
||||||
|
"\n",
|
||||||
|
"def getInternationalLotTable():\n",
|
||||||
|
" vaersDescr = VaersDescrReader(dataDir = 'VAERS').readNonDomesticVaersDescr()\n",
|
||||||
|
" dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescr(vaersDescr)\n",
|
||||||
|
" DataFrameNormalizer.normalize(dataFrame)\n",
|
||||||
|
" return InternationalLotAnalysis.getInternationalLotTable(dataFrame)\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "54e03231",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"internationalLotTable = getInternationalLotTable()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "7e80e958",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# FK-TODO: make filter on 'Total reports' a parameter in getInternationalLotTable() \n",
|
||||||
|
"internationalLotTable = internationalLotTable[internationalLotTable['Total reports'] > 50]\n",
|
||||||
|
"internationalLotTable.to_excel('results/International_Deadly_Lots.xlsx')\n",
|
||||||
|
"internationalLotTable"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
|||||||
4
help.txt
4
help.txt
@@ -4,8 +4,8 @@ get VAERS data:
|
|||||||
- download data (e.g. 2022VAERSData.zip) from https://vaers.hhs.gov/data/datasets.html and save and unzip in VAERS folder
|
- download data (e.g. 2022VAERSData.zip) from https://vaers.hhs.gov/data/datasets.html and save and unzip in VAERS folder
|
||||||
|
|
||||||
FK-TODO:
|
FK-TODO:
|
||||||
- https://www.howbadismybatch.com/firstsecond.html nachprogrammieren
|
- https://www.howbadismybatch.com/international.html nachprogrammieren
|
||||||
- VAX_LOT-Spalte normalisieren, d.h. mindestens toUpperCase() darauf anwenden
|
- https://www.howbadismybatch.com/geography.html nachprogrammieren
|
||||||
- handle VAX_DOSE_SERIES = 'UNK' or 'N/A' like '1'?
|
- handle VAX_DOSE_SERIES = 'UNK' or 'N/A' like '1'?
|
||||||
- Format des jeweiligen Herstellers berücksichtigen und "verschmutzte" Einträge säubern, denn sie stellen alle dieselbe Charge dar:
|
- Format des jeweiligen Herstellers berücksichtigen und "verschmutzte" Einträge säubern, denn sie stellen alle dieselbe Charge dar:
|
||||||
039k20a
|
039k20a
|
||||||
|
|||||||
Reference in New Issue
Block a user