From 90d26d518d41e6c421812f594a4daa0efd5dd620 Mon Sep 17 00:00:00 2001 From: frankknoll Date: Sun, 6 Feb 2022 10:56:33 +0100 Subject: [PATCH] refactoring --- HowBadIsMyBatch.ipynb | 227 +++++++++++++++++++++--------------------- 1 file changed, 112 insertions(+), 115 deletions(-) diff --git a/HowBadIsMyBatch.ipynb b/HowBadIsMyBatch.ipynb index c5ef1d9f13a..fb9f35fd4e7 100644 --- a/HowBadIsMyBatch.ipynb +++ b/HowBadIsMyBatch.ipynb @@ -125,23 +125,20 @@ "\n", "class DataFrameFilter:\n", " \n", - " def __init__(self, dataFrame):\n", - " self.dataFrame = dataFrame\n", + " def filterByCovid19And(self, dataFrame, manufacturer = None, dose = None):\n", + " return dataFrame[self._isCovid19(dataFrame) & self._isManufacturer(dataFrame, manufacturer) & self._isDose(dataFrame, dose)]\n", "\n", - " def filterByCovid19And(self, manufacturer = None, dose = None):\n", - " return self.dataFrame[self._isCovid19() & self._isManufacturer(manufacturer) & self._isDose(dose)]\n", + " def filterForSevereEffects(self, dataFrame, dose):\n", + " return self.filterByCovid19And(dataFrame, dose = dose)\n", "\n", - " def filterForSevereEffects(self, dose):\n", - " return self.filterByCovid19And(dose = dose)\n", + " def _isCovid19(self, dataFrame):\n", + " return dataFrame[\"VAX_TYPE\"] == \"COVID19\"\n", "\n", - " def _isCovid19(self):\n", - " return self.dataFrame[\"VAX_TYPE\"] == \"COVID19\"\n", + " def _isManufacturer(self, dataFrame, manufacturer):\n", + " return dataFrame[\"VAX_MANU\"] == manufacturer if manufacturer is not None else True\n", "\n", - " def _isManufacturer(self, manufacturer):\n", - " return self.dataFrame[\"VAX_MANU\"] == manufacturer if manufacturer is not None else True\n", - "\n", - " def _isDose(self, dose):\n", - " return self.dataFrame[\"VAX_DOSE_SERIES\"].str.contains(dose) if dose is not None else True\n" + " def _isDose(self, dataFrame, dose):\n", + " return dataFrame[\"VAX_DOSE_SERIES\"].str.contains(dose) if dose is not None else True\n" ] }, { @@ -240,13 +237,13 @@ "\n", " @staticmethod\n", " def createBatchCodeTable(dataFrame : pd.DataFrame, manufacturer, dose):\n", - " filteredDataFrame = DataFrameFilter(dataFrame).filterByCovid19And(manufacturer = manufacturer, dose = dose)\n", + " filteredDataFrame = DataFrameFilter().filterByCovid19And(dataFrame, manufacturer = manufacturer, dose = dose)\n", " return BatchCodeTableHelper(filteredDataFrame).createBatchCodeTable()\n", "\n", " # create table from https://www.howbadismybatch.com/combined.html\n", " @staticmethod\n", " def createSevereEffectsBatchCodeTable(dataFrame : pd.DataFrame, dose):\n", - " severeEffectsDataFrame = DataFrameFilter(dataFrame).filterForSevereEffects(dose)\n", + " severeEffectsDataFrame = DataFrameFilter().filterForSevereEffects(dataFrame, dose)\n", " return BatchCodeTableHelper(severeEffectsDataFrame).createSevereEffectsBatchCodeTable()\n" ] }, @@ -318,47 +315,47 @@ "\n", " def test_filterByCovid19And(self):\n", " # Given\n", - " dataFrameFilter = DataFrameFilter(\n", - " VaersDescr2DataFrameConverter.createDataFrameFromDescrs(\n", - " [\n", - " {\n", + " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(\n", + " [\n", + " {\n", + " 'VAERSDATA': self.createDataFrame(\n", + " columns = ['DIED', 'L_THREAT', 'DISABLE'],\n", + " data = [ [1, 0, 0],\n", + " [0, 0, 1]],\n", + " index = [\n", + " \"0916600\",\n", + " \"0916601\"]),\n", + " 'VAERSVAX': self.createDataFrame(\n", + " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", + " data = [ ['COVID19', 'MODERNA', '037K20A', '1'],\n", + " ['COVID19', 'MODERNA', '025L20A', '1']],\n", + " index = [\n", + " \"0916600\",\n", + " \"0916601\"],\n", + " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", + " },\n", + " {\n", " 'VAERSDATA': self.createDataFrame(\n", - " columns = ['DIED', 'L_THREAT', 'DISABLE'],\n", - " data = [ [1, 0, 0],\n", - " [0, 0, 1]],\n", + " columns = ['DIED', 'L_THREAT', 'DISABLE'],\n", + " data = [ [0, 0, 0],\n", + " [0, 0, 1]],\n", " index = [\n", - " \"0916600\",\n", - " \"0916601\"]),\n", + " \"1996873\",\n", + " \"1996874\"]),\n", " 'VAERSVAX': self.createDataFrame(\n", - " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", - " data = [ ['COVID19', 'MODERNA', '037K20A', '1'],\n", - " ['COVID19', 'MODERNA', '025L20A', '1']],\n", + " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", + " data = [ ['HPV9', 'MERCK & CO. INC.', 'R017624', 'UNK'],\n", + " ['COVID19', 'MODERNA', '025L20A', '1']],\n", " index = [\n", - " \"0916600\",\n", - " \"0916601\"],\n", - " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", - " },\n", - " {\n", - " 'VAERSDATA': self.createDataFrame(\n", - " columns = ['DIED', 'L_THREAT', 'DISABLE'],\n", - " data = [ [0, 0, 0],\n", - " [0, 0, 1]],\n", - " index = [\n", " \"1996873\",\n", - " \"1996874\"]),\n", - " 'VAERSVAX': self.createDataFrame(\n", - " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", - " data = [ ['HPV9', 'MERCK & CO. INC.', 'R017624', 'UNK'],\n", - " ['COVID19', 'MODERNA', '025L20A', '1']],\n", - " index = [\n", - " \"1996873\",\n", - " \"1996874\"],\n", - " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", - " }\n", - " ]))\n", + " \"1996874\"],\n", + " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", + " }\n", + " ])\n", + " dataFrameFilter = DataFrameFilter()\n", " \n", " # When\n", - " dataFrame = dataFrameFilter.filterByCovid19And(manufacturer = \"MODERNA\", dose = '1')\n", + " dataFrame = dataFrameFilter.filterByCovid19And(dataFrame, manufacturer = \"MODERNA\", dose = '1')\n", " \n", " # Then\n", " dataFrameExpected = self.createDataFrame(\n", @@ -375,30 +372,30 @@ "\n", " def test_filterForSevereEffects(self):\n", " # Given\n", - " dataFrameFilter = DataFrameFilter(\n", - " VaersDescr2DataFrameConverter.createDataFrameFromDescrs(\n", - " [\n", - " {\n", - " 'VAERSDATA': self.createDataFrame(\n", - " columns = ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'],\n", - " data = [ [1, 1, 0, 1, 1],\n", - " [0, 0, 1, 0, 1]],\n", - " index = [\n", - " \"0916600\",\n", - " \"0916601\"]),\n", - " 'VAERSVAX': self.createDataFrame(\n", - " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", - " data = [ ['COVID19', 'MODERNA', '037K20A', '1'],\n", - " ['COVID19', 'PFIZER\\BIONTECH', '025L20A', '1']],\n", - " index = [\n", - " \"0916600\",\n", - " \"0916601\"],\n", - " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", - " }\n", - " ]))\n", + " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(\n", + " [\n", + " {\n", + " 'VAERSDATA': self.createDataFrame(\n", + " columns = ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'],\n", + " data = [ [1, 1, 0, 1, 1],\n", + " [0, 0, 1, 0, 1]],\n", + " index = [\n", + " \"0916600\",\n", + " \"0916601\"]),\n", + " 'VAERSVAX': self.createDataFrame(\n", + " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", + " data = [ ['COVID19', 'MODERNA', '037K20A', '1'],\n", + " ['COVID19', 'PFIZER\\BIONTECH', '025L20A', '1']],\n", + " index = [\n", + " \"0916600\",\n", + " \"0916601\"],\n", + " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", + " }\n", + " ])\n", + " dataFrameFilter = DataFrameFilter()\n", "\n", " # When\n", - " dataFrame = dataFrameFilter.filterForSevereEffects(dose = '1')\n", + " dataFrame = dataFrameFilter.filterForSevereEffects(dataFrame, dose = '1')\n", " \n", " # Then\n", " dataFrameExpected = self.createDataFrame(\n", @@ -413,28 +410,28 @@ "\n", " def test_filterByFirstDose(self):\n", " # Given\n", - " dataFrameFilter = DataFrameFilter(\n", - " VaersDescr2DataFrameConverter.createDataFrameFromDescrs(\n", - " [\n", - " {\n", - " 'VAERSDATA': self.createDataFrame(\n", - " columns = ['DIED', 'L_THREAT', 'DISABLE'],\n", - " data = [ [1, 0, 0]],\n", - " index = [\n", - " \"1048786\"]),\n", - " 'VAERSVAX': self.createDataFrame(\n", - " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", - " data = [ ['COVID19', 'MODERNA', '016M20A', '2'],\n", - " ['COVID19', 'MODERNA', '030L20A', '1']],\n", - " index = [\n", - " \"1048786\",\n", - " \"1048786\"],\n", - " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", - " }\n", - " ]))\n", + " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(\n", + " [\n", + " {\n", + " 'VAERSDATA': self.createDataFrame(\n", + " columns = ['DIED', 'L_THREAT', 'DISABLE'],\n", + " data = [ [1, 0, 0]],\n", + " index = [\n", + " \"1048786\"]),\n", + " 'VAERSVAX': self.createDataFrame(\n", + " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", + " data = [ ['COVID19', 'MODERNA', '016M20A', '2'],\n", + " ['COVID19', 'MODERNA', '030L20A', '1']],\n", + " index = [\n", + " \"1048786\",\n", + " \"1048786\"],\n", + " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", + " }\n", + " ])\n", + " dataFrameFilter = DataFrameFilter()\n", " \n", " # When\n", - " dataFrame = dataFrameFilter.filterByCovid19And(manufacturer = \"MODERNA\", dose = '1')\n", + " dataFrame = dataFrameFilter.filterByCovid19And(dataFrame, manufacturer = \"MODERNA\", dose = '1')\n", " \n", " # Then\n", " dataFrameExpected = self.createDataFrame(\n", @@ -447,28 +444,28 @@ "\n", " def test_filterBySecondDose(self):\n", " # Given\n", - " dataFrameFilter = DataFrameFilter(\n", - " VaersDescr2DataFrameConverter.createDataFrameFromDescrs(\n", - " [\n", - " {\n", - " 'VAERSDATA': self.createDataFrame(\n", - " columns = ['DIED', 'L_THREAT', 'DISABLE'],\n", - " data = [ [1, 0, 0]],\n", - " index = [\n", - " \"1048786\"]),\n", - " 'VAERSVAX': self.createDataFrame(\n", - " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", - " data = [ ['COVID19', 'MODERNA', '016M20A', '2'],\n", - " ['COVID19', 'MODERNA', '030L20A', '1']],\n", - " index = [\n", - " \"1048786\",\n", - " \"1048786\"],\n", - " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", - " }\n", - " ]))\n", + " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(\n", + " [\n", + " {\n", + " 'VAERSDATA': self.createDataFrame(\n", + " columns = ['DIED', 'L_THREAT', 'DISABLE'],\n", + " data = [ [1, 0, 0]],\n", + " index = [\n", + " \"1048786\"]),\n", + " 'VAERSVAX': self.createDataFrame(\n", + " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", + " data = [ ['COVID19', 'MODERNA', '016M20A', '2'],\n", + " ['COVID19', 'MODERNA', '030L20A', '1']],\n", + " index = [\n", + " \"1048786\",\n", + " \"1048786\"],\n", + " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", + " }\n", + " ])\n", + " dataFrameFilter = DataFrameFilter()\n", "\n", " # When\n", - " dataFrame = dataFrameFilter.filterByCovid19And(manufacturer = \"MODERNA\", dose = '2')\n", + " dataFrame = dataFrameFilter.filterByCovid19And(dataFrame, manufacturer = \"MODERNA\", dose = '2')\n", " \n", " # Then\n", " dataFrameExpected = self.createDataFrame(\n", @@ -621,7 +618,7 @@ " \"1048786\",\n", " \"4711\"],\n", " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", - " dataFrame = DataFrameFilter(dataFrame).filterByCovid19And()\n", + " dataFrame = DataFrameFilter().filterByCovid19And(dataFrame)\n", " \n", " # When\n", " doseTable = DoseAnalysis.getDoseTable(dataFrame)\n", @@ -652,7 +649,7 @@ " \"1048786\",\n", " \"4711\"],\n", " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", - " dataFrame = DataFrameFilter(dataFrame).filterByCovid19And()\n", + " dataFrame = DataFrameFilter().filterByCovid19And(dataFrame)\n", " \n", " # When\n", " doseByMonthTable = DoseAnalysis.getDoseByMonthTable(dataFrame)\n", @@ -815,13 +812,13 @@ "def getDoseTable():\n", " vaersDescrs = VaersDescrReader(dataDir = \"VAERS\").readAllVaersDescrs()\n", " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n", - " dataFrame = DataFrameFilter(dataFrame).filterByCovid19And()\n", + " dataFrame = DataFrameFilter().filterByCovid19And(dataFrame)\n", " return DoseAnalysis.getDoseTable(dataFrame)\n", "\n", "def getDoseByMonthTable():\n", " vaersDescrs = VaersDescrReader(dataDir = \"VAERS\").readAllVaersDescrs()\n", " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n", - " dataFrame = DataFrameFilter(dataFrame).filterByCovid19And()\n", + " dataFrame = DataFrameFilter().filterByCovid19And(dataFrame)\n", " return DoseAnalysis.getDoseByMonthTable(dataFrame)" ] },