From 38170520a1754c5f3d0da522182c922fd5e32555 Mon Sep 17 00:00:00 2001 From: frankknoll Date: Sun, 30 Jan 2022 23:34:56 +0100 Subject: [PATCH] refactoring --- HowBadIsMyBatch.ipynb | 60 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 52 insertions(+), 8 deletions(-) diff --git a/HowBadIsMyBatch.ipynb b/HowBadIsMyBatch.ipynb index 673fe4d95e0..1921ab0a25e 100644 --- a/HowBadIsMyBatch.ipynb +++ b/HowBadIsMyBatch.ipynb @@ -21,12 +21,6 @@ "metadata": {}, "outputs": [], "source": [ - "def filterDataFrame(df, manufacturer, dose):\n", - " return df[\n", - " (df[\"VAX_TYPE\"] == \"COVID19\") &\n", - " (df[\"VAX_MANU\"] == manufacturer) &\n", - " (df[\"VAX_DOSE_SERIES\"].str.contains(dose))]\n", - "\n", "def createDataFrameFromDescr(vaersDescr):\n", " return pd.merge(\n", " vaersDescr['VAERSDATA'],\n", @@ -41,7 +35,21 @@ " return pd.concat(dataFrames)\n", "\n", "def createAndFilterDataFrameFromDescrs(vaersDescrs, manufacturer, dose):\n", - " return filterDataFrame(createDataFrameFromDescrs(vaersDescrs), manufacturer, dose)\n" + " def filterDataFrame(df):\n", + " return df[\n", + " (df[\"VAX_TYPE\"] == \"COVID19\") &\n", + " (df[\"VAX_MANU\"] == manufacturer) &\n", + " (df[\"VAX_DOSE_SERIES\"].str.contains(dose))]\n", + " \n", + " return filterDataFrame(createDataFrameFromDescrs(vaersDescrs))\n", + "\n", + "def createDataFrameSevereEffectsFromDescrs(vaersDescrs, dose):\n", + " def filterDataFrame(df):\n", + " return df[\n", + " (df[\"VAX_TYPE\"] == \"COVID19\") &\n", + " (df[\"VAX_DOSE_SERIES\"].str.contains(dose))]\n", + "\n", + " return filterDataFrame(createDataFrameFromDescrs(vaersDescrs))" ] }, { @@ -66,7 +74,7 @@ " 'VAERSDATA':\n", " read_csv(\n", " folder + year + \"VAERSDATA.csv\",\n", - " ['VAERS_ID', 'DIED', 'L_THREAT', 'DISABLE']),\n", + " ['VAERS_ID', 'DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT']),\n", " 'VAERSVAX':\n", " read_csv(\n", " folder + year + \"VAERSVAX.csv\",\n", @@ -177,6 +185,42 @@ " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", " assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)\n", "\n", + " def test_createDataFrameFromForSevereEffects(self):\n", + " # Given\n", + " vaersDescrs = [\n", + " {\n", + " 'VAERSDATA': self.createDataFrame(\n", + " columns = ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'],\n", + " data = [ ['Y', 'Y', np.NaN, 'Y', 'Y'],\n", + " [np.NaN, np.NaN, 'Y', np.NaN, 'Y']],\n", + " index = [\n", + " \"0916600\",\n", + " \"0916601\"]),\n", + " 'VAERSVAX': self.createDataFrame(\n", + " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", + " data = [ ['COVID19', 'MODERNA', '037K20A', '1'],\n", + " ['COVID19', 'PFIZER\\BIONTECH', '025L20A', '1']],\n", + " index = [\n", + " \"0916600\",\n", + " \"0916601\"],\n", + " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", + " }\n", + " ]\n", + " \n", + " # When\n", + " dataFrame = createDataFrameSevereEffectsFromDescrs(vaersDescrs, '1')\n", + " \n", + " # Then\n", + " dataFrameExpected = self.createDataFrame(\n", + " columns = ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", + " data = [ ['Y', 'Y', np.NaN, 'Y', 'Y', 'COVID19', 'MODERNA', '037K20A', '1'],\n", + " [np.NaN, np.NaN, 'Y', np.NaN, 'Y', 'COVID19', 'PFIZER\\BIONTECH', '025L20A', '1']],\n", + " index = [\n", + " \"0916600\",\n", + " \"0916601\"],\n", + " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", + " assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)\n", + "\n", " def test_createAndFilterDataFrameFromDescrsWithFirstDose(self):\n", " # Given\n", " vaersDescrs = [\n",