diff --git a/HowBadIsMyBatch.ipynb b/HowBadIsMyBatch.ipynb index ba14ee280f1..f9a08e764b7 100644 --- a/HowBadIsMyBatch.ipynb +++ b/HowBadIsMyBatch.ipynb @@ -197,27 +197,31 @@ " self.dataFrame = dataFrame\n", "\n", " def createBatchCodeTable(self):\n", - " batchCodeTable = AggregationHelper.aggregateAndFlattenColumnsAndRenameColumns(\n", - " dataFrame = self.dataFrame.groupby('VAX_LOT'),\n", - " aggFunctionsByColumn = {\n", - " 'DIED': ['sum', 'size'],\n", - " 'L_THREAT': 'sum',\n", - " 'DISABLE': 'sum'\n", - " },\n", - " # FK-TODO: rename \"ADRs\" and \"Total reports\" to \"Total Number of Adverse Reaction Reports\" in all places\n", - " columnNameMappingsDict = {\n", - " \"DIED_size\": \"ADRs\",\n", - " \"DIED_sum\": \"DEATHS\",\n", - " \"L_THREAT_sum\": \"LIFE THREATENING ILLNESSES\",\n", - " \"DISABLE_sum\": \"DISABILITIES\"\n", - " })\n", + " batchCodeTable = BatchCodeTableHelper._createSevereEffectsBatchCodeTable(self.dataFrame.groupby('VAX_LOT'))\n", " batchCodeTable = batchCodeTable[['ADRs', 'DEATHS', 'DISABILITIES', 'LIFE THREATENING ILLNESSES']]\n", " return batchCodeTable.sort_values(by = 'ADRs', ascending = False)\n", "\n", " # create table from https://www.howbadismybatch.com/combined.html\n", " def createSevereEffectsBatchCodeTable(self):\n", - " batchCodeTable = AggregationHelper.aggregateAndFlattenColumnsAndRenameColumns(\n", - " dataFrame = self.dataFrame.groupby('VAX_LOT'),\n", + " batchCodeTable = BatchCodeTableHelper._createSevereEffectsBatchCodeTable(self.dataFrame.groupby('VAX_LOT'))\n", + " batchCodeTable = batchCodeTable[['ADRs', 'DEATHS', 'DISABILITIES', 'LIFE THREATENING ILLNESSES', 'HOSPITALISATIONS', 'EMERGENCY ROOM OR DOCTOR VISITS']]\n", + " batchCodeTable = batchCodeTable.sort_values(by = 'ADRs', ascending = False)\n", + " return self._addCompanyColumn(batchCodeTable, self._createCompanyByBatchCodeTable())\n", + "\n", + " @staticmethod\n", + " def _createSevereEffectsBatchCodeTable(\n", + " dataFrame,\n", + " # FK-TODO: rename \"ADRs\" and \"Total reports\" to \"Total Number of Adverse Reaction Reports\" in all places\n", + " columnNameMappingsDict = {\n", + " \"DIED_size\": \"ADRs\",\n", + " \"DIED_sum\": \"DEATHS\",\n", + " \"L_THREAT_sum\": \"LIFE THREATENING ILLNESSES\",\n", + " \"DISABLE_sum\": \"DISABILITIES\",\n", + " 'HOSPITAL_sum': 'HOSPITALISATIONS',\n", + " 'ER_VISIT_sum': 'EMERGENCY ROOM OR DOCTOR VISITS'\n", + " }):\n", + " return AggregationHelper.aggregateAndFlattenColumnsAndRenameColumns(\n", + " dataFrame = dataFrame,\n", " aggFunctionsByColumn = {\n", " 'DIED': ['sum', 'size'],\n", " 'L_THREAT': 'sum',\n", @@ -225,17 +229,7 @@ " 'HOSPITAL': 'sum',\n", " 'ER_VISIT': 'sum'\n", " },\n", - " columnNameMappingsDict = {\n", - " \"DIED_size\": \"ADRs\",\n", - " \"DIED_sum\": \"DEATHS\",\n", - " \"L_THREAT_sum\": \"LIFE THREATENING ILLNESSES\",\n", - " \"DISABLE_sum\": \"DISABILITIES\",\n", - " 'HOSPITAL_sum': 'HOSPITALISATIONS',\n", - " 'ER_VISIT_sum': 'EMERGENCY ROOM OR DOCTOR VISITS'\n", - " })\n", - " batchCodeTable = batchCodeTable[['ADRs', 'DEATHS', 'DISABILITIES', 'LIFE THREATENING ILLNESSES', 'HOSPITALISATIONS', 'EMERGENCY ROOM OR DOCTOR VISITS']]\n", - " batchCodeTable = batchCodeTable.sort_values(by = 'ADRs', ascending = False)\n", - " return self._addCompanyColumn(batchCodeTable, self._createCompanyByBatchCodeTable())\n", + " columnNameMappingsDict = columnNameMappingsDict)\n", "\n", " def _addCompanyColumn(self, batchCodeTable, companyByBatchCodeTable):\n", " return pd.merge(\n", @@ -301,13 +295,8 @@ " # FK-TODO: DRY because it generates a subset of BatchCodeTableHelper.createSevereEffectsBatchCodeTable()\n", " @staticmethod\n", " def _getDoseTable(dataFrame):\n", - " doseTable = AggregationHelper.aggregateAndFlattenColumnsAndRenameColumns(\n", - " dataFrame = dataFrame,\n", - " aggFunctionsByColumn = {\n", - " 'DIED': ['sum', 'size'],\n", - " 'L_THREAT': 'sum',\n", - " 'DISABLE': 'sum'\n", - " },\n", + " doseTable = BatchCodeTableHelper._createSevereEffectsBatchCodeTable(\n", + " dataFrame,\n", " columnNameMappingsDict = {\n", " \"DIED_size\": \"Total reports\",\n", " \"DIED_sum\": \"Deaths\",\n", @@ -729,10 +718,10 @@ " def test_getDoseTable(self):\n", " # Given\n", " dataFrame = TestHelper.createDataFrame(\n", - " columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", - " data = [ [1, 0, 0, 'COVID19', 'MODERNA', '016M20A', '2'],\n", - " [1, 0, 0, 'COVID19', 'MODERNA', '030L20A', '1'],\n", - " [1, 1, 1, 'COVID19', 'MODERNA', '030L20B', '1']],\n", + " columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES', 'HOSPITAL', 'ER_VISIT'],\n", + " data = [ [1, 0, 0, 'COVID19', 'MODERNA', '016M20A', '2', 0, 0],\n", + " [1, 0, 0, 'COVID19', 'MODERNA', '030L20A', '1', 0, 0],\n", + " [1, 1, 1, 'COVID19', 'MODERNA', '030L20B', '1', 0, 0]],\n", " index = [\n", " \"1048786\",\n", " \"1048786\",\n", @@ -759,10 +748,10 @@ " # Given\n", " parseDate = lambda dateStr: pd.to_datetime(dateStr, format = \"%m/%d/%Y\")\n", " dataFrame = TestHelper.createDataFrame(\n", - " columns = ['RECVDATE', 'DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", - " data = [ [parseDate('01/01/2021'), 1, 0, 0, 'COVID19', 'MODERNA', '016M20A', '2'],\n", - " [parseDate('01/01/2021'), 1, 0, 0, 'COVID19', 'MODERNA', '030L20A', '1'],\n", - " [parseDate('01/01/2021'), 1, 1, 1, 'COVID19', 'MODERNA', '030L20B', '1']],\n", + " columns = ['RECVDATE', 'DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES', 'HOSPITAL', 'ER_VISIT'],\n", + " data = [ [parseDate('01/01/2021'), 1, 0, 0, 'COVID19', 'MODERNA', '016M20A', '2', 0, 0],\n", + " [parseDate('01/01/2021'), 1, 0, 0, 'COVID19', 'MODERNA', '030L20A', '1', 0, 0],\n", + " [parseDate('01/01/2021'), 1, 1, 1, 'COVID19', 'MODERNA', '030L20B', '1', 0, 0]],\n", " index = [\n", " \"1048786\",\n", " \"1048786\",\n", @@ -806,10 +795,10 @@ " def test_getInternationalLotTable(self):\n", " # Given\n", " dataFrame = TestHelper.createDataFrame(\n", - " columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES', 'SPLTTYPE'],\n", - " data = [ [1, 0, 0, 'COVID19', 'MODERNA', '016M20A', '2', 'GBPFIZER INC2020486806'],\n", - " [1, 0, 0, 'COVID19', 'MODERNA', '030L20A', '1', 'FRMODERNATX, INC.MOD20224'],\n", - " [1, 1, 1, 'COVID19', 'MODERNA', '030L20B', '1', 'FRMODERNATX, INC.MOD20224'],\n", + " columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES', 'SPLTTYPE', 'HOSPITAL', 'ER_VISIT'],\n", + " data = [ [1, 0, 0, 'COVID19', 'MODERNA', '016M20A', '2', 'GBPFIZER INC2020486806', 0, 0],\n", + " [1, 0, 0, 'COVID19', 'MODERNA', '030L20A', '1', 'FRMODERNATX, INC.MOD20224', 0, 0],\n", + " [1, 1, 1, 'COVID19', 'MODERNA', '030L20B', '1', 'FRMODERNATX, INC.MOD20224', 0, 0],\n", " [0, 0, 0, 'COVID19', 'MODERNA', '030L20B', '1', 'dummy'],\n", " [0, 0, 0, 'COVID19', 'MODERNA', '030L20B', '1', 123]],\n", " index = [\n",