refactoring

This commit is contained in:
frankknoll
2022-01-30 23:34:56 +01:00
parent 1cfe7f0b14
commit 38170520a1

View File

@@ -21,12 +21,6 @@
"metadata": {},
"outputs": [],
"source": [
"def filterDataFrame(df, manufacturer, dose):\n",
" return df[\n",
" (df[\"VAX_TYPE\"] == \"COVID19\") &\n",
" (df[\"VAX_MANU\"] == manufacturer) &\n",
" (df[\"VAX_DOSE_SERIES\"].str.contains(dose))]\n",
"\n",
"def createDataFrameFromDescr(vaersDescr):\n",
" return pd.merge(\n",
" vaersDescr['VAERSDATA'],\n",
@@ -41,7 +35,21 @@
" return pd.concat(dataFrames)\n",
"\n",
"def createAndFilterDataFrameFromDescrs(vaersDescrs, manufacturer, dose):\n",
" return filterDataFrame(createDataFrameFromDescrs(vaersDescrs), manufacturer, dose)\n"
" def filterDataFrame(df):\n",
" return df[\n",
" (df[\"VAX_TYPE\"] == \"COVID19\") &\n",
" (df[\"VAX_MANU\"] == manufacturer) &\n",
" (df[\"VAX_DOSE_SERIES\"].str.contains(dose))]\n",
" \n",
" return filterDataFrame(createDataFrameFromDescrs(vaersDescrs))\n",
"\n",
"def createDataFrameSevereEffectsFromDescrs(vaersDescrs, dose):\n",
" def filterDataFrame(df):\n",
" return df[\n",
" (df[\"VAX_TYPE\"] == \"COVID19\") &\n",
" (df[\"VAX_DOSE_SERIES\"].str.contains(dose))]\n",
"\n",
" return filterDataFrame(createDataFrameFromDescrs(vaersDescrs))"
]
},
{
@@ -66,7 +74,7 @@
" 'VAERSDATA':\n",
" read_csv(\n",
" folder + year + \"VAERSDATA.csv\",\n",
" ['VAERS_ID', 'DIED', 'L_THREAT', 'DISABLE']),\n",
" ['VAERS_ID', 'DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT']),\n",
" 'VAERSVAX':\n",
" read_csv(\n",
" folder + year + \"VAERSVAX.csv\",\n",
@@ -177,6 +185,42 @@
" dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
" assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)\n",
"\n",
" def test_createDataFrameFromForSevereEffects(self):\n",
" # Given\n",
" vaersDescrs = [\n",
" {\n",
" 'VAERSDATA': self.createDataFrame(\n",
" columns = ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'],\n",
" data = [ ['Y', 'Y', np.NaN, 'Y', 'Y'],\n",
" [np.NaN, np.NaN, 'Y', np.NaN, 'Y']],\n",
" index = [\n",
" \"0916600\",\n",
" \"0916601\"]),\n",
" 'VAERSVAX': self.createDataFrame(\n",
" columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
" data = [ ['COVID19', 'MODERNA', '037K20A', '1'],\n",
" ['COVID19', 'PFIZER\\BIONTECH', '025L20A', '1']],\n",
" index = [\n",
" \"0916600\",\n",
" \"0916601\"],\n",
" dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
" }\n",
" ]\n",
" \n",
" # When\n",
" dataFrame = createDataFrameSevereEffectsFromDescrs(vaersDescrs, '1')\n",
" \n",
" # Then\n",
" dataFrameExpected = self.createDataFrame(\n",
" columns = ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
" data = [ ['Y', 'Y', np.NaN, 'Y', 'Y', 'COVID19', 'MODERNA', '037K20A', '1'],\n",
" [np.NaN, np.NaN, 'Y', np.NaN, 'Y', 'COVID19', 'PFIZER\\BIONTECH', '025L20A', '1']],\n",
" index = [\n",
" \"0916600\",\n",
" \"0916601\"],\n",
" dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
" assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)\n",
"\n",
" def test_createAndFilterDataFrameFromDescrsWithFirstDose(self):\n",
" # Given\n",
" vaersDescrs = [\n",