diff --git a/HowBadIsMyBatch.ipynb b/HowBadIsMyBatch.ipynb index 3687098d3cd..a56e8a30030 100644 --- a/HowBadIsMyBatch.ipynb +++ b/HowBadIsMyBatch.ipynb @@ -40,6 +40,7 @@ " 'VAERSDATA':\n", " self._read_csv(\n", " folder + year + \"VAERSDATA.csv\",\n", + " # FK-TODO: use Column enum\n", " ['VAERS_ID', 'DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT']),\n", " 'VAERSVAX':\n", " self._read_csv(\n", @@ -122,51 +123,59 @@ "metadata": {}, "outputs": [], "source": [ - "def createBatchCodeTable(df : pd.DataFrame):\n", - " def filterDataFrame(df, col):\n", - " return df[df[col] == 'Y'][['VAX_LOT']]\n", + "import pandas as pd\n", "\n", - " batchCodeTableDict = {\n", - " 'ADRs': df[['VAX_LOT']].value_counts(),\n", - " 'DEATHS': filterDataFrame(df, 'DIED').value_counts(),\n", - " 'DISABILITIES': filterDataFrame(df, 'DISABLE').value_counts(),\n", - " 'LIFE THREATENING ILLNESSES': filterDataFrame(df, 'L_THREAT').value_counts()\n", - " }\n", - " return pd.concat(batchCodeTableDict, axis = 'columns').replace(to_replace = np.nan, value = 0)\n", + "class BatchCodeTableFactory:\n", + " \n", + " @staticmethod\n", + " def createBatchCodeTable(df : pd.DataFrame):\n", + " def filterDataFrame(df, col):\n", + " return df[df[col] == 'Y'][['VAX_LOT']]\n", "\n", - "def createManufacturerByBatchCodeTable(df):\n", - " manufacturerByBatchCodeTable = df[['VAX_LOT', 'VAX_MANU']]\n", - " manufacturerByBatchCodeTable = manufacturerByBatchCodeTable.drop_duplicates(subset = ['VAX_LOT'])\n", - " return manufacturerByBatchCodeTable.set_index('VAX_LOT')\n", + " batchCodeTableDict = {\n", + " 'ADRs': df[['VAX_LOT']].value_counts(),\n", + " 'DEATHS': filterDataFrame(df, 'DIED').value_counts(),\n", + " 'DISABILITIES': filterDataFrame(df, 'DISABLE').value_counts(),\n", + " 'LIFE THREATENING ILLNESSES': filterDataFrame(df, 'L_THREAT').value_counts()\n", + " }\n", + " return pd.concat(batchCodeTableDict, axis = 'columns').replace(to_replace = np.nan, value = 0)\n", "\n", - "def createCompanyByBatchCodeTable(df):\n", - " return createManufacturerByBatchCodeTable(df).rename(columns = {\"VAX_MANU\": \"COMPANY\"})\n", + " # create table from https://www.howbadismybatch.com/combined.html\n", + " # FK-TODO: DRY with createBatchCodeTable()\n", + " @staticmethod\n", + " def createSevereEffectsBatchCodeTable(df : pd.DataFrame):\n", + " def filterDataFrame(df, col):\n", + " return df[df[col] == 'Y']['VAX_LOT']\n", "\n", - "# create table from https://www.howbadismybatch.com/combined.html\n", - "# FK-TODO: DRY with createBatchCodeTable()\n", - "def createSevereEffectsBatchCodeTable(df):\n", - " def filterDataFrame(df, col):\n", - " return df[df[col] == 'Y']['VAX_LOT']\n", + " batchCodeTableDict = {\n", + " 'ADRs': df['VAX_LOT'].value_counts(),\n", + " 'DEATHS': filterDataFrame(df, 'DIED').value_counts(),\n", + " 'DISABILITIES': filterDataFrame(df, 'DISABLE').value_counts(),\n", + " 'LIFE THREATENING ILLNESSES': filterDataFrame(df, 'L_THREAT').value_counts(),\n", + " 'HOSPITALISATIONS': filterDataFrame(df, 'HOSPITAL').value_counts(),\n", + " 'EMERGENCY ROOM OR DOCTOR VISITS': filterDataFrame(df, 'ER_VISIT').value_counts()\n", + " }\n", + " batchCodeTable = pd.concat(batchCodeTableDict, axis = 'columns')\n", + " batchCodeTable.index.name = 'VAX_LOT'\n", + " # add Company column:\n", + " batchCodeTable = pd.merge(\n", + " batchCodeTable,\n", + " BatchCodeTableFactory._createCompanyByBatchCodeTable(df),\n", + " how = 'left',\n", + " left_index = True,\n", + " right_index = True,\n", + " validate = 'one_to_one')\n", + " return batchCodeTable.replace(to_replace = np.nan, value = 0)\n", "\n", - " batchCodeTableDict = {\n", - " 'ADRs': df['VAX_LOT'].value_counts(),\n", - " 'DEATHS': filterDataFrame(df, 'DIED').value_counts(),\n", - " 'DISABILITIES': filterDataFrame(df, 'DISABLE').value_counts(),\n", - " 'LIFE THREATENING ILLNESSES': filterDataFrame(df, 'L_THREAT').value_counts(),\n", - " 'HOSPITALISATIONS': filterDataFrame(df, 'HOSPITAL').value_counts(),\n", - " 'EMERGENCY ROOM OR DOCTOR VISITS': filterDataFrame(df, 'ER_VISIT').value_counts()\n", - " }\n", - " batchCodeTable = pd.concat(batchCodeTableDict, axis = 'columns')\n", - " batchCodeTable.index.name = 'VAX_LOT'\n", - " # add Company column:\n", - " batchCodeTable = pd.merge(\n", - " batchCodeTable,\n", - " createCompanyByBatchCodeTable(df),\n", - " how = 'left',\n", - " left_index = True,\n", - " right_index = True,\n", - " validate = 'one_to_one')\n", - " return batchCodeTable.replace(to_replace = np.nan, value = 0)\n" + " @staticmethod\n", + " def _createManufacturerByBatchCodeTable(df):\n", + " manufacturerByBatchCodeTable = df[['VAX_LOT', 'VAX_MANU']]\n", + " manufacturerByBatchCodeTable = manufacturerByBatchCodeTable.drop_duplicates(subset = ['VAX_LOT'])\n", + " return manufacturerByBatchCodeTable.set_index('VAX_LOT')\n", + "\n", + " @staticmethod\n", + " def _createCompanyByBatchCodeTable(df):\n", + " return BatchCodeTableFactory._createManufacturerByBatchCodeTable(df).rename(columns = {\"VAX_MANU\": \"COMPANY\"})\n" ] }, { @@ -415,7 +424,7 @@ "\n", " def _test_createBatchCodeTable(self, dataFrame):\n", " # When\n", - " batchCodeTable = createBatchCodeTable(dataFrame)\n", + " batchCodeTable = BatchCodeTableFactory.createBatchCodeTable(dataFrame)\n", "\n", " # Then\n", " batchCodeTableExpected = pd.DataFrame(\n", @@ -469,7 +478,7 @@ " dataFrame = DataFrameFilter(dataFrame).filterForSevereEffects(dose = '1')\n", "\n", " # When\n", - " batchCodeTable = createSevereEffectsBatchCodeTable(dataFrame)\n", + " batchCodeTable = BatchCodeTableFactory.createSevereEffectsBatchCodeTable(dataFrame)\n", "\n", " # Then\n", " batchCodeTableExpected = pd.DataFrame(\n", @@ -510,7 +519,7 @@ " vaersDescrs = VaersDescrReader(\"VAERS\").readAllVaersDescrs()\n", " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n", " dataFrameFilter = DataFrameFilter(dataFrame)\n", - " batchCodeTable = createBatchCodeTable(dataFrameFilter.filterBy(manufacturer = manufacturer, dose = '1'))\n", + " batchCodeTable = BatchCodeTableFactory.createBatchCodeTable(dataFrameFilter.filterBy(manufacturer = manufacturer, dose = '1'))\n", " display(manufacturer, batchCodeTable)\n", " batchCodeTable.to_excel(excelFile)" ] @@ -538,7 +547,7 @@ " vaersDescrs = VaersDescrReader(\"VAERS\").readAllVaersDescrs()\n", " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n", " dataFrameFilter = DataFrameFilter(dataFrame)\n", - " severeEffectsBatchCodeTable = createSevereEffectsBatchCodeTable(dataFrameFilter.filterForSevereEffects(dose = '1'))\n", + " severeEffectsBatchCodeTable = BatchCodeTableFactory.createSevereEffectsBatchCodeTable(dataFrameFilter.filterForSevereEffects(dose = '1'))\n", " display('severeEffectsBatchCodeTable', severeEffectsBatchCodeTable)\n", " severeEffectsBatchCodeTable.to_excel(excelFile)" ]