From 90c1359975e01c195c6bd6840fa629f68693ed34 Mon Sep 17 00:00:00 2001 From: frankknoll Date: Mon, 21 Feb 2022 12:51:45 +0100 Subject: [PATCH] removing unused code --- src/HowBadIsMyBatch.ipynb | 227 +------------------------------------- 1 file changed, 4 insertions(+), 223 deletions(-) diff --git a/src/HowBadIsMyBatch.ipynb b/src/HowBadIsMyBatch.ipynb index ec578c2c69a..c0419aa1b7f 100644 --- a/src/HowBadIsMyBatch.ipynb +++ b/src/HowBadIsMyBatch.ipynb @@ -144,29 +144,8 @@ " def filterByCovid19(self, dataFrame):\n", " return dataFrame[self._isCovid19(dataFrame)]\n", "\n", - " def filterByFlu(self, dataFrame):\n", - " return dataFrame[self._isFlu(dataFrame)]\n", - "\n", - " def filterByCountry(self, dataFrame, country, countryColumnName):\n", - " return dataFrame[dataFrame[countryColumnName] == country]\n", - "\n", - " def filterBy(self, dataFrame, manufacturer = None, dose = None):\n", - " if manufacturer is None and dose is None:\n", - " return dataFrame\n", - " \n", - " return dataFrame[self._isManufacturer(dataFrame, manufacturer) & self._isDose(dataFrame, dose)]\n", - "\n", " def _isCovid19(self, dataFrame):\n", - " return dataFrame[\"VAX_TYPE\"] == \"COVID19\"\n", - "\n", - " def _isFlu(self, dataFrame):\n", - " return dataFrame[\"VAX_TYPE\"].str.startswith(\"FLU\")\n", - "\n", - " def _isManufacturer(self, dataFrame, manufacturer):\n", - " return dataFrame[\"VAX_MANU\"] == manufacturer if manufacturer is not None else True\n", - "\n", - " def _isDose(self, dataFrame, dose):\n", - " return dataFrame[\"VAX_DOSE_SERIES\"].str.contains(dose) if dose is not None else True\n" + " return dataFrame[\"VAX_TYPE\"] == \"COVID19\"\n" ] }, { @@ -516,95 +495,7 @@ "\n", "class DataFrameFilterTest(unittest.TestCase):\n", "\n", - " def test_filterByFlu(self):\n", - " # Given\n", - " dataFrame = TestHelper.createDataFrame(\n", - " columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", - " data = [ [0, 0, 0, 'FLU(H1N1)', 'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5', '1'],\n", - " [0, 0, 0, 'FLU3', 'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5', '1'],\n", - " [0, 0, 0, 'FLU4', 'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5', '1'],\n", - " [0, 0, 0, 'FLUA3', 'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5', '1'],\n", - " [0, 0, 0, 'FLUA4', 'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5', '1'],\n", - " [0, 0, 0, 'FLUC3', 'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5', '1'],\n", - " [0, 0, 0, 'FLUC4', 'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5', '1'],\n", - " [0, 0, 0, 'FLUN(H1N1)', 'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5', '1'],\n", - " [0, 0, 0, 'FLUN3', 'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5', '1'],\n", - " [0, 0, 0, 'FLUN4', 'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5', '1'],\n", - " [0, 0, 0, 'FLUR3', 'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5', '1'],\n", - " [0, 0, 0, 'FLUR4', 'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5', '1'],\n", - " [0, 0, 0, 'FLUX', 'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5', '1'],\n", - " [0, 0, 0, 'FLUX(H1N1)', 'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5', '1'],\n", - " [0, 0, 1, 'COVID19', 'MODERNA', '025L20A', '1']],\n", - " index = [\n", - " \"801410\",\n", - " \"801411\",\n", - " \"801412\",\n", - " \"801413\",\n", - " \"801414\",\n", - " \"801415\",\n", - " \"801416\",\n", - " \"801417\",\n", - " \"801418\",\n", - " \"801419\",\n", - " \"801420\",\n", - " \"801421\",\n", - " \"801422\",\n", - " \"801423\",\n", - " \"801424\"])\n", - " dataFrameFilter = DataFrameFilter()\n", - " \n", - " # When\n", - " dataFrameActual = dataFrameFilter.filterByFlu(dataFrame)\n", - " \n", - " # Then\n", - " dataFrameExpected = TestHelper.createDataFrame(\n", - " columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", - " data = [ [0, 0, 0, 'FLU(H1N1)', 'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5', '1'],\n", - " [0, 0, 0, 'FLU3', 'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5', '1'],\n", - " [0, 0, 0, 'FLU4', 'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5', '1'],\n", - " [0, 0, 0, 'FLUA3', 'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5', '1'],\n", - " [0, 0, 0, 'FLUA4', 'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5', '1'],\n", - " [0, 0, 0, 'FLUC3', 'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5', '1'],\n", - " [0, 0, 0, 'FLUC4', 'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5', '1'],\n", - " [0, 0, 0, 'FLUN(H1N1)', 'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5', '1'],\n", - " [0, 0, 0, 'FLUN3', 'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5', '1'],\n", - " [0, 0, 0, 'FLUN4', 'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5', '1'],\n", - " [0, 0, 0, 'FLUR3', 'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5', '1'],\n", - " [0, 0, 0, 'FLUR4', 'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5', '1'],\n", - " [0, 0, 0, 'FLUX', 'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5', '1'],\n", - " [0, 0, 0, 'FLUX(H1N1)', 'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5', '1']],\n", - " index = [\n", - " \"801410\",\n", - " \"801411\",\n", - " \"801412\",\n", - " \"801413\",\n", - " \"801414\",\n", - " \"801415\",\n", - " \"801416\",\n", - " \"801417\",\n", - " \"801418\",\n", - " \"801419\",\n", - " \"801420\",\n", - " \"801421\",\n", - " \"801422\",\n", - " \"801423\"])\n", - " assert_frame_equal(dataFrameActual, dataFrameExpected, check_dtype = False)\n", - "\n", - " def test_filterByNothing(self):\n", - " # Given\n", - " dataFrame = TestHelper.createDataFrame(\n", - " columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", - " data = [ [0, 0, 0, 'FLU(H1N1)', 'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5', '1']],\n", - " index = [\"801410\"])\n", - " dataFrameFilter = DataFrameFilter()\n", - " \n", - " # When\n", - " dataFrameActual = dataFrameFilter.filterBy(dataFrame, manufacturer = None, dose = None)\n", - " \n", - " # Then\n", - " assert_frame_equal(dataFrameActual, dataFrame, check_dtype = True)\n", - "\n", - " def test_filterByCovid19_filterBy(self):\n", + " def test_filterByCovid19(self):\n", " # Given\n", " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(\n", " [\n", @@ -626,14 +517,14 @@ " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", " },\n", " {\n", - " 'VAERSDATA': TestHelper.createDataFrame(\n", + " 'VAERSDATA': TestHelper.createDataFrame(\n", " columns = ['DIED', 'L_THREAT', 'DISABLE'],\n", " data = [ [0, 0, 0],\n", " [0, 0, 1]],\n", " index = [\n", " \"1996873\",\n", " \"1996874\"]),\n", - " 'VAERSVAX': TestHelper.createDataFrame(\n", + " 'VAERSVAX': TestHelper.createDataFrame(\n", " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", " data = [ ['HPV9', 'MERCK & CO. INC.', 'R017624', 'UNK'],\n", " ['COVID19', 'MODERNA', '025L20A', '1']],\n", @@ -647,7 +538,6 @@ " \n", " # When\n", " dataFrame = dataFrameFilter.filterByCovid19(dataFrame)\n", - " dataFrame = dataFrameFilter.filterBy(dataFrame, manufacturer = \"MODERNA\", dose = '1')\n", " \n", " # Then\n", " dataFrameExpected = TestHelper.createDataFrame(\n", @@ -660,115 +550,6 @@ " \"0916601\",\n", " \"1996874\"],\n", " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", - " assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)\n", - "\n", - " def test_filterByDose(self):\n", - " # Given\n", - " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(\n", - " [\n", - " {\n", - " 'VAERSDATA': TestHelper.createDataFrame(\n", - " columns = ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'],\n", - " data = [ [1, 1, 0, 1, 1],\n", - " [0, 0, 1, 0, 1]],\n", - " index = [\n", - " \"0916600\",\n", - " \"0916601\"]),\n", - " 'VAERSVAX': TestHelper.createDataFrame(\n", - " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", - " data = [ ['COVID19', 'MODERNA', '037K20A', '1'],\n", - " ['COVID19', 'PFIZER\\BIONTECH', '025L20A', '1']],\n", - " index = [\n", - " \"0916600\",\n", - " \"0916601\"],\n", - " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", - " }\n", - " ])\n", - " dataFrameFilter = DataFrameFilter()\n", - " dataFrame = dataFrameFilter.filterByCovid19(dataFrame)\n", - "\n", - " # When\n", - " dataFrame = dataFrameFilter.filterBy(dataFrame, dose = '1')\n", - " \n", - " # Then\n", - " dataFrameExpected = TestHelper.createDataFrame(\n", - " columns = ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", - " data = [ [1, 1, 0, 1, 1, 'COVID19', 'MODERNA', '037K20A', '1'],\n", - " [0, 0, 1, 0, 1, 'COVID19', 'PFIZER\\BIONTECH', '025L20A', '1']],\n", - " index = [\n", - " \"0916600\",\n", - " \"0916601\"],\n", - " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", - " assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)\n", - "\n", - " def test_filterByFirstDose(self):\n", - " # Given\n", - " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(\n", - " [\n", - " {\n", - " 'VAERSDATA': TestHelper.createDataFrame(\n", - " columns = ['DIED', 'L_THREAT', 'DISABLE'],\n", - " data = [ [1, 0, 0]],\n", - " index = [\n", - " \"1048786\"]),\n", - " 'VAERSVAX': TestHelper.createDataFrame(\n", - " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", - " data = [ ['COVID19', 'MODERNA', '016M20A', '2'],\n", - " ['COVID19', 'MODERNA', '030L20A', '1']],\n", - " index = [\n", - " \"1048786\",\n", - " \"1048786\"],\n", - " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", - " }\n", - " ])\n", - " dataFrameFilter = DataFrameFilter()\n", - " \n", - " # When\n", - " dataFrame = dataFrameFilter.filterByCovid19(dataFrame)\n", - " dataFrame = dataFrameFilter.filterBy(dataFrame, manufacturer = \"MODERNA\", dose = '1')\n", - " \n", - " # Then\n", - " dataFrameExpected = TestHelper.createDataFrame(\n", - " columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", - " data = [ [1, 0, 0, 'COVID19', 'MODERNA', '030L20A', '1']],\n", - " index = [\n", - " \"1048786\"],\n", - " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", - " assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)\n", - "\n", - " def test_filterBySecondDose(self):\n", - " # Given\n", - " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(\n", - " [\n", - " {\n", - " 'VAERSDATA': TestHelper.createDataFrame(\n", - " columns = ['DIED', 'L_THREAT', 'DISABLE'],\n", - " data = [ [1, 0, 0]],\n", - " index = [\n", - " \"1048786\"]),\n", - " 'VAERSVAX': TestHelper.createDataFrame(\n", - " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", - " data = [ ['COVID19', 'MODERNA', '016M20A', '2'],\n", - " ['COVID19', 'MODERNA', '030L20A', '1']],\n", - " index = [\n", - " \"1048786\",\n", - " \"1048786\"],\n", - " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", - " }\n", - " ])\n", - " dataFrameFilter = DataFrameFilter()\n", - "\n", - " # When\n", - " dataFrame = dataFrameFilter.filterByCovid19(dataFrame)\n", - " dataFrame = dataFrameFilter.filterBy(dataFrame, manufacturer = \"MODERNA\", dose = '2')\n", - " \n", - " # Then\n", - " dataFrameExpected = TestHelper.createDataFrame(\n", - " columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", - " data = [ [1, 0, 0, 'COVID19', 'MODERNA', '016M20A', '2']],\n", - " index = [\n", - " \"1048786\"],\n", - " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", " assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)\n" ] },