diff --git a/HowBadIsMyBatch.ipynb b/HowBadIsMyBatch.ipynb index 87410edc323..271895ee9be 100644 --- a/HowBadIsMyBatch.ipynb +++ b/HowBadIsMyBatch.ipynb @@ -91,8 +91,7 @@ "# FK-TODO: DRY with createAndFilterDataFrameFromFiles()\n", "def createDataFrameSevereEffectsFromFiles(dataDir, dose):\n", " return createDataFrameSevereEffectsFromDescrs(\n", - " # FK-TODO: reactivate: [readVaersDescr(dataDir, \"2021\"), readVaersDescr(dataDir, \"2022\")],\n", - " [readVaersDescr(dataDir, \"2022\")],\n", + " [readVaersDescr(dataDir, \"2021\"), readVaersDescr(dataDir, \"2022\")],\n", " dose)" ] }, @@ -115,15 +114,20 @@ " }\n", " return pd.concat(batchCodeTableDict, axis = 'columns').replace(to_replace = np.nan, value = 0)\n", "\n", - "def getManufacturerOfBatchCode(df, batchCode):\n", - " return df[df['VAX_LOT'] == batchCode].iloc[0]['VAX_MANU']\n", + "def createManufacturerByBatchCodeTable(df):\n", + " manufacturerByBatchCodeTable = df[['VAX_LOT', 'VAX_MANU']]\n", + " manufacturerByBatchCodeTable = manufacturerByBatchCodeTable.drop_duplicates(subset = ['VAX_LOT'])\n", + " return manufacturerByBatchCodeTable.set_index('VAX_LOT')\n", + "\n", + "def createCompanyByBatchCodeTable(df):\n", + " return createManufacturerByBatchCodeTable(df).rename(columns = {\"VAX_MANU\": \"COMPANY\"})\n", "\n", "def createSevereEffectsBatchCodeTable(df):\n", " def filterDataFrame(df, col):\n", - " return df[df[col] == 'Y'][['VAX_LOT']]\n", + " return df[df[col] == 'Y']['VAX_LOT']\n", "\n", " batchCodeTableDict = {\n", - " 'ADRs': df[['VAX_LOT']].value_counts(),\n", + " 'ADRs': df['VAX_LOT'].value_counts(),\n", " 'DEATHS': filterDataFrame(df, 'DIED').value_counts(),\n", " 'DISABILITIES': filterDataFrame(df, 'DISABLE').value_counts(),\n", " 'LIFE THREATENING ILLNESSES': filterDataFrame(df, 'L_THREAT').value_counts(),\n", @@ -131,9 +135,14 @@ " 'EMERGENCY ROOM OR DOCTOR VISITS': filterDataFrame(df, 'ER_VISIT').value_counts()\n", " }\n", " batchCodeTable = pd.concat(batchCodeTableDict, axis = 'columns')\n", - " batchCodeTable['COMPANY'] = batchCodeTable.apply(\n", - " lambda row: getManufacturerOfBatchCode(df, row.name[0]),\n", - " axis = 'columns')\n", + " # add Company column:\n", + " batchCodeTable = pd.merge(\n", + " batchCodeTable,\n", + " createCompanyByBatchCodeTable(df),\n", + " how = 'left',\n", + " left_index = True,\n", + " right_index = True,\n", + " validate = 'one_to_one')\n", " return batchCodeTable.replace(to_replace = np.nan, value = 0)\n" ] }, @@ -434,14 +443,14 @@ " batchCodeTableExpected = pd.DataFrame(\n", " data = {\n", " 'ADRs': [1, 1],\n", - " 'DEATHS': [0, 1],\n", - " 'DISABILITIES': [1, 0],\n", - " 'LIFE THREATENING ILLNESSES': [0, 1],\n", - " 'HOSPITALISATIONS': [0, 1],\n", + " 'DEATHS': [1, 0],\n", + " 'DISABILITIES': [0, 1],\n", + " 'LIFE THREATENING ILLNESSES': [1, 0],\n", + " 'HOSPITALISATIONS': [1, 0],\n", " 'EMERGENCY ROOM OR DOCTOR VISITS': [1, 1],\n", - " 'COMPANY': ['PFIZER\\BIONTECH', 'MODERNA']\n", + " 'COMPANY': ['MODERNA', 'PFIZER\\BIONTECH']\n", " },\n", - " index = pd.MultiIndex.from_arrays([['025L20A', '037K20A']], names = ('VAX_LOT',)))\n", + " index = pd.Index(['037K20A', '025L20A']))\n", " assert_frame_equal(batchCodeTable, batchCodeTableExpected, check_dtype = False)\n", "\n", " def createDataFrame(self, index, columns, data, dtypes = {}):\n",