diff --git a/HowBadIsMyBatch.ipynb b/HowBadIsMyBatch.ipynb index 7188477e2d3..3fd2d9485ec 100644 --- a/HowBadIsMyBatch.ipynb +++ b/HowBadIsMyBatch.ipynb @@ -125,72 +125,61 @@ "source": [ "import pandas as pd\n", "\n", - "class BatchCodeTableFactory:\n", - "\n", + "class BatchCodeTableHelper:\n", + " \n", " def __init__(self, dataFrame : pd.DataFrame):\n", " self.dataFrame = dataFrame \n", "\n", - " def createBatchCodeTable(self, manufacturer, dose):\n", - " df = DataFrameFilter(self.dataFrame).filterBy(manufacturer = manufacturer, dose = dose)\n", - " return BatchCodeTableFactory._asDataFrame(\n", + " def createBatchCodeTable(self):\n", + " return self._asDataFrame(\n", " {\n", - " 'ADRs': BatchCodeTableFactory._getADRs(df),\n", - " 'DEATHS': BatchCodeTableFactory._getDEATHS(df),\n", - " 'DISABILITIES': BatchCodeTableFactory._getDISABILITIES(df),\n", - " 'LIFE THREATENING ILLNESSES': BatchCodeTableFactory._getLIFE_THREATENING_ILLNESSES(df)\n", + " 'ADRs': self._getADRs(),\n", + " 'DEATHS': self._getDEATHS(),\n", + " 'DISABILITIES': self._getDISABILITIES(),\n", + " 'LIFE THREATENING ILLNESSES': self._getLIFE_THREATENING_ILLNESSES()\n", " })\n", "\n", " # create table from https://www.howbadismybatch.com/combined.html\n", - " def createSevereEffectsBatchCodeTable(self, dose):\n", - " df = DataFrameFilter(self.dataFrame).filterForSevereEffects(dose)\n", - " return BatchCodeTableFactory._addCompanyColumn(\n", - " BatchCodeTableFactory._asDataFrame(\n", + " def createSevereEffectsBatchCodeTable(self):\n", + " return self._addCompanyColumn(\n", + " self._asDataFrame(\n", " {\n", - " 'ADRs': BatchCodeTableFactory._getADRs(df),\n", - " 'DEATHS': BatchCodeTableFactory._getDEATHS(df),\n", - " 'DISABILITIES': BatchCodeTableFactory._getDISABILITIES(df),\n", - " 'LIFE THREATENING ILLNESSES': BatchCodeTableFactory._getLIFE_THREATENING_ILLNESSES(df),\n", - " 'HOSPITALISATIONS': BatchCodeTableFactory._getHOSPITALISATIONS(df),\n", - " 'EMERGENCY ROOM OR DOCTOR VISITS': BatchCodeTableFactory._getER_VISITs(df)\n", + " 'ADRs': self._getADRs(),\n", + " 'DEATHS': self._getDEATHS(),\n", + " 'DISABILITIES': self._getDISABILITIES(),\n", + " 'LIFE THREATENING ILLNESSES': self._getLIFE_THREATENING_ILLNESSES(),\n", + " 'HOSPITALISATIONS': self._getHOSPITALISATIONS(),\n", + " 'EMERGENCY ROOM OR DOCTOR VISITS': self._getER_VISITs()\n", " }),\n", - " BatchCodeTableFactory._createCompanyByBatchCodeTable(df))\n", + " self._createCompanyByBatchCodeTable())\n", "\n", - " @staticmethod\n", - " def _getADRs(df):\n", - " return df['VAX_LOT'].value_counts()\n", + " def _getADRs(self):\n", + " return self.dataFrame['VAX_LOT'].value_counts()\n", "\n", - " @staticmethod\n", - " def _getDEATHS(df):\n", - " return BatchCodeTableFactory._countValues(df, 'DIED')\n", + " def _getDEATHS(self):\n", + " return self._countValues('DIED')\n", "\n", - " @staticmethod\n", - " def _getDISABILITIES(df):\n", - " return BatchCodeTableFactory._countValues(df, 'DISABLE')\n", + " def _getDISABILITIES(self):\n", + " return self._countValues('DISABLE')\n", "\n", - " @staticmethod\n", - " def _getLIFE_THREATENING_ILLNESSES(df):\n", - " return BatchCodeTableFactory._countValues(df, 'L_THREAT')\n", + " def _getLIFE_THREATENING_ILLNESSES(self):\n", + " return self._countValues('L_THREAT')\n", "\n", - " @staticmethod\n", - " def _getHOSPITALISATIONS(df):\n", - " return BatchCodeTableFactory._countValues(df, 'HOSPITAL')\n", + " def _getHOSPITALISATIONS(self):\n", + " return self._countValues('HOSPITAL')\n", "\n", - " @staticmethod\n", - " def _getER_VISITs(df):\n", - " return BatchCodeTableFactory._countValues(df, 'ER_VISIT')\n", + " def _getER_VISITs(self):\n", + " return self._countValues('ER_VISIT')\n", "\n", - " @staticmethod\n", - " def _countValues(df, column):\n", - " return df[df[column] == 'Y']['VAX_LOT'].value_counts()\n", + " def _countValues(self, column):\n", + " return self.dataFrame[self.dataFrame[column] == 'Y']['VAX_LOT'].value_counts()\n", "\n", - " @staticmethod\n", - " def _asDataFrame(dict):\n", + " def _asDataFrame(self, dict):\n", " dataFrame = pd.concat(dict, axis = 'columns')\n", " dataFrame.index.name = 'VAX_LOT'\n", " return dataFrame.replace(to_replace = np.nan, value = 0)\n", "\n", - " @staticmethod\n", - " def _addCompanyColumn(batchCodeTable, companyByBatchCodeTable):\n", + " def _addCompanyColumn(self, batchCodeTable, companyByBatchCodeTable):\n", " return pd.merge(\n", " batchCodeTable,\n", " companyByBatchCodeTable,\n", @@ -199,15 +188,26 @@ " right_index = True,\n", " validate = 'one_to_one')\n", "\n", - " @staticmethod\n", - " def _createManufacturerByBatchCodeTable(df):\n", - " manufacturerByBatchCodeTable = df[['VAX_LOT', 'VAX_MANU']]\n", + " def _createCompanyByBatchCodeTable(self):\n", + " return self._createManufacturerByBatchCodeTable().rename(columns = {\"VAX_MANU\": \"COMPANY\"})\n", + "\n", + " def _createManufacturerByBatchCodeTable(self):\n", + " manufacturerByBatchCodeTable = self.dataFrame[['VAX_LOT', 'VAX_MANU']]\n", " manufacturerByBatchCodeTable = manufacturerByBatchCodeTable.drop_duplicates(subset = ['VAX_LOT'])\n", " return manufacturerByBatchCodeTable.set_index('VAX_LOT')\n", "\n", + "class BatchCodeTableFactory:\n", + "\n", " @staticmethod\n", - " def _createCompanyByBatchCodeTable(df):\n", - " return BatchCodeTableFactory._createManufacturerByBatchCodeTable(df).rename(columns = {\"VAX_MANU\": \"COMPANY\"})\n" + " def createBatchCodeTable(dataFrame : pd.DataFrame, manufacturer, dose):\n", + " filteredDataFrame = DataFrameFilter(dataFrame).filterBy(manufacturer = manufacturer, dose = dose)\n", + " return BatchCodeTableHelper(filteredDataFrame).createBatchCodeTable()\n", + "\n", + " # create table from https://www.howbadismybatch.com/combined.html\n", + " @staticmethod\n", + " def createSevereEffectsBatchCodeTable(dataFrame : pd.DataFrame, dose):\n", + " severeEffectsDataFrame = DataFrameFilter(dataFrame).filterForSevereEffects(dose)\n", + " return BatchCodeTableHelper(severeEffectsDataFrame).createSevereEffectsBatchCodeTable()\n" ] }, { @@ -411,30 +411,29 @@ "\n", " def test_createSevereEffectsBatchCodeTable(self):\n", " # Given\n", - " batchCodeTableFactory = BatchCodeTableFactory(\n", - " VaersDescr2DataFrameConverter.createDataFrameFromDescrs(\n", - " [\n", - " {\n", - " 'VAERSDATA': self.createDataFrame(\n", - " columns = ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'],\n", - " data = [ ['Y', 'Y', np.NaN, 'Y', 'Y'],\n", - " [np.NaN, np.NaN, 'Y', np.NaN, 'Y']],\n", - " index = [\n", - " \"0916600\",\n", - " \"0916601\"]),\n", - " 'VAERSVAX': self.createDataFrame(\n", - " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", - " data = [ ['COVID19', 'MODERNA', '037K20A', '1'],\n", - " ['COVID19', 'PFIZER\\BIONTECH', '025L20A', '1']],\n", - " index = [\n", - " \"0916600\",\n", - " \"0916601\"],\n", - " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", - " }\n", - " ]))\n", + " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(\n", + " [\n", + " {\n", + " 'VAERSDATA': self.createDataFrame(\n", + " columns = ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'],\n", + " data = [ ['Y', 'Y', np.NaN, 'Y', 'Y'],\n", + " [np.NaN, np.NaN, 'Y', np.NaN, 'Y']],\n", + " index = [\n", + " \"0916600\",\n", + " \"0916601\"]),\n", + " 'VAERSVAX': self.createDataFrame(\n", + " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", + " data = [ ['COVID19', 'MODERNA', '037K20A', '1'],\n", + " ['COVID19', 'PFIZER\\BIONTECH', '025L20A', '1']],\n", + " index = [\n", + " \"0916600\",\n", + " \"0916601\"],\n", + " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", + " }\n", + " ])\n", "\n", " # When\n", - " batchCodeTable = batchCodeTableFactory.createSevereEffectsBatchCodeTable('1')\n", + " batchCodeTable = BatchCodeTableFactory.createSevereEffectsBatchCodeTable(dataFrame, '1')\n", "\n", " # Then\n", " batchCodeTableExpected = pd.DataFrame(\n", @@ -496,11 +495,8 @@ " self._test_createBatchCodeTable(dataFrame, \"MODERNA\", '1')\n", "\n", " def _test_createBatchCodeTable(self, dataFrame, manufacturer, dose):\n", - " # Given\n", - " batchCodeTableFactory = BatchCodeTableFactory(dataFrame)\n", - "\n", " # When\n", - " batchCodeTable = batchCodeTableFactory.createBatchCodeTable(manufacturer, dose)\n", + " batchCodeTable = BatchCodeTableFactory.createBatchCodeTable(dataFrame, manufacturer, dose)\n", "\n", " # Then\n", " batchCodeTableExpected = pd.DataFrame(\n", @@ -537,8 +533,7 @@ "def saveBatchCodeTable(manufacturer, excelFile):\n", " vaersDescrs = VaersDescrReader(\"VAERS\").readAllVaersDescrs()\n", " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n", - " batchCodeTableFactory = BatchCodeTableFactory(dataFrame)\n", - " batchCodeTable = batchCodeTableFactory.createBatchCodeTable(manufacturer = manufacturer, dose = '1')\n", + " batchCodeTable = BatchCodeTableFactory.createBatchCodeTable(dataFrame, manufacturer = manufacturer, dose = '1')\n", " display(manufacturer, batchCodeTable)\n", " batchCodeTable.to_excel(excelFile)" ] @@ -565,8 +560,7 @@ "def saveSevereEffectsBatchCodeTable(excelFile):\n", " vaersDescrs = VaersDescrReader(\"VAERS\").readAllVaersDescrs()\n", " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n", - " batchCodeTableFactory = BatchCodeTableFactory(dataFrame)\n", - " severeEffectsBatchCodeTable = batchCodeTableFactory.createSevereEffectsBatchCodeTable(dose = '1')\n", + " severeEffectsBatchCodeTable = BatchCodeTableFactory.createSevereEffectsBatchCodeTable(dataFrame, dose = '1')\n", " display('severeEffectsBatchCodeTable', severeEffectsBatchCodeTable)\n", " severeEffectsBatchCodeTable.to_excel(excelFile)" ]