From a97068a0ae299f0ad1a93b1fe5a9824265c4b0e2 Mon Sep 17 00:00:00 2001 From: frankknoll Date: Tue, 1 Feb 2022 10:31:14 +0100 Subject: [PATCH] refactoring --- HowBadIsMyBatch.ipynb | 91 ++++++++++++++++++++++++++++--------------- 1 file changed, 60 insertions(+), 31 deletions(-) diff --git a/HowBadIsMyBatch.ipynb b/HowBadIsMyBatch.ipynb index 93d589f2636..5cc9ee77695 100644 --- a/HowBadIsMyBatch.ipynb +++ b/HowBadIsMyBatch.ipynb @@ -129,43 +129,72 @@ " \n", " @staticmethod\n", " def createBatchCodeTable(df : pd.DataFrame):\n", - " batchCodeTableDict = {\n", - " 'ADRs': df['VAX_LOT'].value_counts(),\n", - " 'DEATHS': BatchCodeTableFactory._filterDataFrame(df, 'DIED').value_counts(),\n", - " 'DISABILITIES': BatchCodeTableFactory._filterDataFrame(df, 'DISABLE').value_counts(),\n", - " 'LIFE THREATENING ILLNESSES': BatchCodeTableFactory._filterDataFrame(df, 'L_THREAT').value_counts()\n", - " }\n", - " batchCodeTable = pd.concat(batchCodeTableDict, axis = 'columns')\n", - " batchCodeTable.index.name = 'VAX_LOT'\n", - " return batchCodeTable.replace(to_replace = np.nan, value = 0)\n", + " return BatchCodeTableFactory._asDataFrame(\n", + " {\n", + " 'ADRs': BatchCodeTableFactory._getADRs(df),\n", + " 'DEATHS': BatchCodeTableFactory._getDEATHS(df),\n", + " 'DISABILITIES': BatchCodeTableFactory._getDISABILITIES(df),\n", + " 'LIFE THREATENING ILLNESSES': BatchCodeTableFactory._getLIFE_THREATENING_ILLNESSES(df)\n", + " })\n", "\n", " # create table from https://www.howbadismybatch.com/combined.html\n", - " # FK-TODO: DRY with createBatchCodeTable()\n", " @staticmethod\n", " def createSevereEffectsBatchCodeTable(df : pd.DataFrame):\n", - " batchCodeTableDict = {\n", - " 'ADRs': df['VAX_LOT'].value_counts(),\n", - " 'DEATHS': BatchCodeTableFactory._filterDataFrame(df, 'DIED').value_counts(),\n", - " 'DISABILITIES': BatchCodeTableFactory._filterDataFrame(df, 'DISABLE').value_counts(),\n", - " 'LIFE THREATENING ILLNESSES': BatchCodeTableFactory._filterDataFrame(df, 'L_THREAT').value_counts(),\n", - " 'HOSPITALISATIONS': BatchCodeTableFactory._filterDataFrame(df, 'HOSPITAL').value_counts(),\n", - " 'EMERGENCY ROOM OR DOCTOR VISITS': BatchCodeTableFactory._filterDataFrame(df, 'ER_VISIT').value_counts()\n", - " }\n", - " batchCodeTable = pd.concat(batchCodeTableDict, axis = 'columns')\n", - " batchCodeTable.index.name = 'VAX_LOT'\n", - " # add Company column:\n", - " batchCodeTable = pd.merge(\n", - " batchCodeTable,\n", - " BatchCodeTableFactory._createCompanyByBatchCodeTable(df),\n", - " how = 'left',\n", - " left_index = True,\n", - " right_index = True,\n", - " validate = 'one_to_one')\n", - " return batchCodeTable.replace(to_replace = np.nan, value = 0)\n", + " return BatchCodeTableFactory._addCompanyColumn(\n", + " BatchCodeTableFactory._asDataFrame(\n", + " {\n", + " 'ADRs': BatchCodeTableFactory._getADRs(df),\n", + " 'DEATHS': BatchCodeTableFactory._getDEATHS(df),\n", + " 'DISABILITIES': BatchCodeTableFactory._getDISABILITIES(df),\n", + " 'LIFE THREATENING ILLNESSES': BatchCodeTableFactory._getLIFE_THREATENING_ILLNESSES(df),\n", + " 'HOSPITALISATIONS': BatchCodeTableFactory._getHOSPITALISATIONS(df),\n", + " 'EMERGENCY ROOM OR DOCTOR VISITS': BatchCodeTableFactory._getER_VISITs(df)\n", + " }),\n", + " BatchCodeTableFactory._createCompanyByBatchCodeTable(df))\n", "\n", " @staticmethod\n", - " def _filterDataFrame(df, col):\n", - " return df[df[col] == 'Y']['VAX_LOT']\n", + " def _getADRs(df):\n", + " return df['VAX_LOT'].value_counts()\n", + "\n", + " @staticmethod\n", + " def _getDEATHS(df):\n", + " return BatchCodeTableFactory._countValues(df, 'DIED')\n", + "\n", + " @staticmethod\n", + " def _getDISABILITIES(df):\n", + " return BatchCodeTableFactory._countValues(df, 'DISABLE')\n", + "\n", + " @staticmethod\n", + " def _getLIFE_THREATENING_ILLNESSES(df):\n", + " return BatchCodeTableFactory._countValues(df, 'L_THREAT')\n", + "\n", + " @staticmethod\n", + " def _getHOSPITALISATIONS(df):\n", + " return BatchCodeTableFactory._countValues(df, 'HOSPITAL')\n", + "\n", + " @staticmethod\n", + " def _getER_VISITs(df):\n", + " return BatchCodeTableFactory._countValues(df, 'ER_VISIT')\n", + "\n", + " @staticmethod\n", + " def _countValues(df, column):\n", + " return df[df[column] == 'Y']['VAX_LOT'].value_counts()\n", + "\n", + " @staticmethod\n", + " def _asDataFrame(dict):\n", + " dataFrame = pd.concat(dict, axis = 'columns')\n", + " dataFrame.index.name = 'VAX_LOT'\n", + " return dataFrame.replace(to_replace = np.nan, value = 0)\n", + "\n", + " @staticmethod\n", + " def _addCompanyColumn(batchCodeTable, companyByBatchCodeTable):\n", + " return pd.merge(\n", + " batchCodeTable,\n", + " companyByBatchCodeTable,\n", + " how = 'left',\n", + " left_index = True,\n", + " right_index = True,\n", + " validate = 'one_to_one')\n", "\n", " @staticmethod\n", " def _createManufacturerByBatchCodeTable(df):\n",