diff --git a/HowBadIsMyBatch.ipynb b/HowBadIsMyBatch.ipynb index 1921ab0a25e..7a636cd246d 100644 --- a/HowBadIsMyBatch.ipynb +++ b/HowBadIsMyBatch.ipynb @@ -97,16 +97,37 @@ "outputs": [], "source": [ "def createBatchCodeTable(df : pd.DataFrame):\n", - " def filter(df, col):\n", + " def filterDataFrame(df, col):\n", " return df[df[col] == 'Y'][['VAX_LOT']]\n", "\n", " batchCodeTableDict = {\n", " 'ADRs': df[['VAX_LOT']].value_counts(),\n", - " 'DEATHS': filter(df, 'DIED').value_counts(),\n", - " 'DISABILITIES': filter(df, 'DISABLE').value_counts(),\n", - " 'LIFE THREATENING ILLNESSES': filter(df, 'L_THREAT').value_counts()\n", + " 'DEATHS': filterDataFrame(df, 'DIED').value_counts(),\n", + " 'DISABILITIES': filterDataFrame(df, 'DISABLE').value_counts(),\n", + " 'LIFE THREATENING ILLNESSES': filterDataFrame(df, 'L_THREAT').value_counts()\n", " }\n", - " return pd.concat(batchCodeTableDict, axis = 'columns').replace(to_replace = np.nan, value = 0)\n" + " return pd.concat(batchCodeTableDict, axis = 'columns').replace(to_replace = np.nan, value = 0)\n", + "\n", + "def getManufacturerOfBatchCode(df, batchCode):\n", + " return df[df['VAX_LOT'] == batchCode].iloc[0]['VAX_MANU']\n", + "\n", + "def createSevereEffectsBatchCodeTable(df):\n", + " def filterDataFrame(df, col):\n", + " return df[df[col] == 'Y'][['VAX_LOT']]\n", + "\n", + " batchCodeTableDict = {\n", + " 'ADRs': df[['VAX_LOT']].value_counts(),\n", + " 'DEATHS': filterDataFrame(df, 'DIED').value_counts(),\n", + " 'DISABILITIES': filterDataFrame(df, 'DISABLE').value_counts(),\n", + " 'LIFE THREATENING ILLNESSES': filterDataFrame(df, 'L_THREAT').value_counts(),\n", + " 'HOSPITALISATIONS': filterDataFrame(df, 'HOSPITAL').value_counts(),\n", + " 'EMERGENCY ROOM OR DOCTOR VISITS': filterDataFrame(df, 'ER_VISIT').value_counts()\n", + " }\n", + " batchCodeTable = pd.concat(batchCodeTableDict, axis = 'columns')\n", + " batchCodeTable['COMPANY'] = batchCodeTable.apply(\n", + " lambda row: getManufacturerOfBatchCode(df, row.name[0]),\n", + " axis = 'columns')\n", + " return batchCodeTable.replace(to_replace = np.nan, value = 0)\n" ] }, { @@ -365,6 +386,61 @@ " return pd.DataFrame(index = index, columns = columns, data = data).astype(dtypes)\n" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "ded70c87", + "metadata": {}, + "outputs": [], + "source": [ + "from pandas.testing import assert_frame_equal\n", + "\n", + "class SevereEffectsBatchCodeTableTest(unittest.TestCase):\n", + "\n", + " def test_createSevereEffectsBatchCodeTable(self):\n", + " dataFrame = createDataFrameSevereEffectsFromDescrs(\n", + " [\n", + " {\n", + " 'VAERSDATA': self.createDataFrame(\n", + " columns = ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'],\n", + " data = [ ['Y', 'Y', np.NaN, 'Y', 'Y'],\n", + " [np.NaN, np.NaN, 'Y', np.NaN, 'Y']],\n", + " index = [\n", + " \"0916600\",\n", + " \"0916601\"]),\n", + " 'VAERSVAX': self.createDataFrame(\n", + " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", + " data = [ ['COVID19', 'MODERNA', '037K20A', '1'],\n", + " ['COVID19', 'PFIZER\\BIONTECH', '025L20A', '1']],\n", + " index = [\n", + " \"0916600\",\n", + " \"0916601\"],\n", + " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", + " }\n", + " ],\n", + " '1')\n", + "\n", + " # When\n", + " batchCodeTable = createSevereEffectsBatchCodeTable(dataFrame)\n", + "\n", + " # Then\n", + " batchCodeTableExpected = pd.DataFrame(\n", + " data = {\n", + " 'ADRs': [1, 1],\n", + " 'DEATHS': [0, 1],\n", + " 'DISABILITIES': [1, 0],\n", + " 'LIFE THREATENING ILLNESSES': [0, 1],\n", + " 'HOSPITALISATIONS': [0, 1],\n", + " 'EMERGENCY ROOM OR DOCTOR VISITS': [1, 1],\n", + " 'COMPANY': ['PFIZER\\BIONTECH', 'MODERNA']\n", + " },\n", + " index = pd.MultiIndex.from_arrays([['025L20A', '037K20A']], names = ('VAX_LOT',)))\n", + " assert_frame_equal(batchCodeTable, batchCodeTableExpected, check_dtype = False)\n", + "\n", + " def createDataFrame(self, index, columns, data, dtypes = {}):\n", + " return pd.DataFrame(index = index, columns = columns, data = data).astype(dtypes)\n" + ] + }, { "cell_type": "code", "execution_count": null,