refactoring

This commit is contained in:
frankknoll
2022-02-07 13:54:17 +01:00
parent a2db3a869f
commit 4dbc7f39e9

View File

@@ -197,27 +197,31 @@
" self.dataFrame = dataFrame\n", " self.dataFrame = dataFrame\n",
"\n", "\n",
" def createBatchCodeTable(self):\n", " def createBatchCodeTable(self):\n",
" batchCodeTable = AggregationHelper.aggregateAndFlattenColumnsAndRenameColumns(\n", " batchCodeTable = BatchCodeTableHelper._createSevereEffectsBatchCodeTable(self.dataFrame.groupby('VAX_LOT'))\n",
" dataFrame = self.dataFrame.groupby('VAX_LOT'),\n",
" aggFunctionsByColumn = {\n",
" 'DIED': ['sum', 'size'],\n",
" 'L_THREAT': 'sum',\n",
" 'DISABLE': 'sum'\n",
" },\n",
" # FK-TODO: rename \"ADRs\" and \"Total reports\" to \"Total Number of Adverse Reaction Reports\" in all places\n",
" columnNameMappingsDict = {\n",
" \"DIED_size\": \"ADRs\",\n",
" \"DIED_sum\": \"DEATHS\",\n",
" \"L_THREAT_sum\": \"LIFE THREATENING ILLNESSES\",\n",
" \"DISABLE_sum\": \"DISABILITIES\"\n",
" })\n",
" batchCodeTable = batchCodeTable[['ADRs', 'DEATHS', 'DISABILITIES', 'LIFE THREATENING ILLNESSES']]\n", " batchCodeTable = batchCodeTable[['ADRs', 'DEATHS', 'DISABILITIES', 'LIFE THREATENING ILLNESSES']]\n",
" return batchCodeTable.sort_values(by = 'ADRs', ascending = False)\n", " return batchCodeTable.sort_values(by = 'ADRs', ascending = False)\n",
"\n", "\n",
" # create table from https://www.howbadismybatch.com/combined.html\n", " # create table from https://www.howbadismybatch.com/combined.html\n",
" def createSevereEffectsBatchCodeTable(self):\n", " def createSevereEffectsBatchCodeTable(self):\n",
" batchCodeTable = AggregationHelper.aggregateAndFlattenColumnsAndRenameColumns(\n", " batchCodeTable = BatchCodeTableHelper._createSevereEffectsBatchCodeTable(self.dataFrame.groupby('VAX_LOT'))\n",
" dataFrame = self.dataFrame.groupby('VAX_LOT'),\n", " batchCodeTable = batchCodeTable[['ADRs', 'DEATHS', 'DISABILITIES', 'LIFE THREATENING ILLNESSES', 'HOSPITALISATIONS', 'EMERGENCY ROOM OR DOCTOR VISITS']]\n",
" batchCodeTable = batchCodeTable.sort_values(by = 'ADRs', ascending = False)\n",
" return self._addCompanyColumn(batchCodeTable, self._createCompanyByBatchCodeTable())\n",
"\n",
" @staticmethod\n",
" def _createSevereEffectsBatchCodeTable(\n",
" dataFrame,\n",
" # FK-TODO: rename \"ADRs\" and \"Total reports\" to \"Total Number of Adverse Reaction Reports\" in all places\n",
" columnNameMappingsDict = {\n",
" \"DIED_size\": \"ADRs\",\n",
" \"DIED_sum\": \"DEATHS\",\n",
" \"L_THREAT_sum\": \"LIFE THREATENING ILLNESSES\",\n",
" \"DISABLE_sum\": \"DISABILITIES\",\n",
" 'HOSPITAL_sum': 'HOSPITALISATIONS',\n",
" 'ER_VISIT_sum': 'EMERGENCY ROOM OR DOCTOR VISITS'\n",
" }):\n",
" return AggregationHelper.aggregateAndFlattenColumnsAndRenameColumns(\n",
" dataFrame = dataFrame,\n",
" aggFunctionsByColumn = {\n", " aggFunctionsByColumn = {\n",
" 'DIED': ['sum', 'size'],\n", " 'DIED': ['sum', 'size'],\n",
" 'L_THREAT': 'sum',\n", " 'L_THREAT': 'sum',\n",
@@ -225,17 +229,7 @@
" 'HOSPITAL': 'sum',\n", " 'HOSPITAL': 'sum',\n",
" 'ER_VISIT': 'sum'\n", " 'ER_VISIT': 'sum'\n",
" },\n", " },\n",
" columnNameMappingsDict = {\n", " columnNameMappingsDict = columnNameMappingsDict)\n",
" \"DIED_size\": \"ADRs\",\n",
" \"DIED_sum\": \"DEATHS\",\n",
" \"L_THREAT_sum\": \"LIFE THREATENING ILLNESSES\",\n",
" \"DISABLE_sum\": \"DISABILITIES\",\n",
" 'HOSPITAL_sum': 'HOSPITALISATIONS',\n",
" 'ER_VISIT_sum': 'EMERGENCY ROOM OR DOCTOR VISITS'\n",
" })\n",
" batchCodeTable = batchCodeTable[['ADRs', 'DEATHS', 'DISABILITIES', 'LIFE THREATENING ILLNESSES', 'HOSPITALISATIONS', 'EMERGENCY ROOM OR DOCTOR VISITS']]\n",
" batchCodeTable = batchCodeTable.sort_values(by = 'ADRs', ascending = False)\n",
" return self._addCompanyColumn(batchCodeTable, self._createCompanyByBatchCodeTable())\n",
"\n", "\n",
" def _addCompanyColumn(self, batchCodeTable, companyByBatchCodeTable):\n", " def _addCompanyColumn(self, batchCodeTable, companyByBatchCodeTable):\n",
" return pd.merge(\n", " return pd.merge(\n",
@@ -301,13 +295,8 @@
" # FK-TODO: DRY because it generates a subset of BatchCodeTableHelper.createSevereEffectsBatchCodeTable()\n", " # FK-TODO: DRY because it generates a subset of BatchCodeTableHelper.createSevereEffectsBatchCodeTable()\n",
" @staticmethod\n", " @staticmethod\n",
" def _getDoseTable(dataFrame):\n", " def _getDoseTable(dataFrame):\n",
" doseTable = AggregationHelper.aggregateAndFlattenColumnsAndRenameColumns(\n", " doseTable = BatchCodeTableHelper._createSevereEffectsBatchCodeTable(\n",
" dataFrame = dataFrame,\n", " dataFrame,\n",
" aggFunctionsByColumn = {\n",
" 'DIED': ['sum', 'size'],\n",
" 'L_THREAT': 'sum',\n",
" 'DISABLE': 'sum'\n",
" },\n",
" columnNameMappingsDict = {\n", " columnNameMappingsDict = {\n",
" \"DIED_size\": \"Total reports\",\n", " \"DIED_size\": \"Total reports\",\n",
" \"DIED_sum\": \"Deaths\",\n", " \"DIED_sum\": \"Deaths\",\n",
@@ -729,10 +718,10 @@
" def test_getDoseTable(self):\n", " def test_getDoseTable(self):\n",
" # Given\n", " # Given\n",
" dataFrame = TestHelper.createDataFrame(\n", " dataFrame = TestHelper.createDataFrame(\n",
" columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", " columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES', 'HOSPITAL', 'ER_VISIT'],\n",
" data = [ [1, 0, 0, 'COVID19', 'MODERNA', '016M20A', '2'],\n", " data = [ [1, 0, 0, 'COVID19', 'MODERNA', '016M20A', '2', 0, 0],\n",
" [1, 0, 0, 'COVID19', 'MODERNA', '030L20A', '1'],\n", " [1, 0, 0, 'COVID19', 'MODERNA', '030L20A', '1', 0, 0],\n",
" [1, 1, 1, 'COVID19', 'MODERNA', '030L20B', '1']],\n", " [1, 1, 1, 'COVID19', 'MODERNA', '030L20B', '1', 0, 0]],\n",
" index = [\n", " index = [\n",
" \"1048786\",\n", " \"1048786\",\n",
" \"1048786\",\n", " \"1048786\",\n",
@@ -759,10 +748,10 @@
" # Given\n", " # Given\n",
" parseDate = lambda dateStr: pd.to_datetime(dateStr, format = \"%m/%d/%Y\")\n", " parseDate = lambda dateStr: pd.to_datetime(dateStr, format = \"%m/%d/%Y\")\n",
" dataFrame = TestHelper.createDataFrame(\n", " dataFrame = TestHelper.createDataFrame(\n",
" columns = ['RECVDATE', 'DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", " columns = ['RECVDATE', 'DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES', 'HOSPITAL', 'ER_VISIT'],\n",
" data = [ [parseDate('01/01/2021'), 1, 0, 0, 'COVID19', 'MODERNA', '016M20A', '2'],\n", " data = [ [parseDate('01/01/2021'), 1, 0, 0, 'COVID19', 'MODERNA', '016M20A', '2', 0, 0],\n",
" [parseDate('01/01/2021'), 1, 0, 0, 'COVID19', 'MODERNA', '030L20A', '1'],\n", " [parseDate('01/01/2021'), 1, 0, 0, 'COVID19', 'MODERNA', '030L20A', '1', 0, 0],\n",
" [parseDate('01/01/2021'), 1, 1, 1, 'COVID19', 'MODERNA', '030L20B', '1']],\n", " [parseDate('01/01/2021'), 1, 1, 1, 'COVID19', 'MODERNA', '030L20B', '1', 0, 0]],\n",
" index = [\n", " index = [\n",
" \"1048786\",\n", " \"1048786\",\n",
" \"1048786\",\n", " \"1048786\",\n",
@@ -806,10 +795,10 @@
" def test_getInternationalLotTable(self):\n", " def test_getInternationalLotTable(self):\n",
" # Given\n", " # Given\n",
" dataFrame = TestHelper.createDataFrame(\n", " dataFrame = TestHelper.createDataFrame(\n",
" columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES', 'SPLTTYPE'],\n", " columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES', 'SPLTTYPE', 'HOSPITAL', 'ER_VISIT'],\n",
" data = [ [1, 0, 0, 'COVID19', 'MODERNA', '016M20A', '2', 'GBPFIZER INC2020486806'],\n", " data = [ [1, 0, 0, 'COVID19', 'MODERNA', '016M20A', '2', 'GBPFIZER INC2020486806', 0, 0],\n",
" [1, 0, 0, 'COVID19', 'MODERNA', '030L20A', '1', 'FRMODERNATX, INC.MOD20224'],\n", " [1, 0, 0, 'COVID19', 'MODERNA', '030L20A', '1', 'FRMODERNATX, INC.MOD20224', 0, 0],\n",
" [1, 1, 1, 'COVID19', 'MODERNA', '030L20B', '1', 'FRMODERNATX, INC.MOD20224'],\n", " [1, 1, 1, 'COVID19', 'MODERNA', '030L20B', '1', 'FRMODERNATX, INC.MOD20224', 0, 0],\n",
" [0, 0, 0, 'COVID19', 'MODERNA', '030L20B', '1', 'dummy'],\n", " [0, 0, 0, 'COVID19', 'MODERNA', '030L20B', '1', 'dummy'],\n",
" [0, 0, 0, 'COVID19', 'MODERNA', '030L20B', '1', 123]],\n", " [0, 0, 0, 'COVID19', 'MODERNA', '030L20B', '1', 123]],\n",
" index = [\n", " index = [\n",