From 978bf3d4f8649504b9c559d771b4b20cc7aecb0b Mon Sep 17 00:00:00 2001 From: frankknoll Date: Tue, 1 Feb 2022 10:48:48 +0100 Subject: [PATCH] refactoring --- HowBadIsMyBatch.ipynb | 208 +++++++++++++++++++++--------------------- 1 file changed, 106 insertions(+), 102 deletions(-) diff --git a/HowBadIsMyBatch.ipynb b/HowBadIsMyBatch.ipynb index 5cc9ee77695..0ce071dd7b3 100644 --- a/HowBadIsMyBatch.ipynb +++ b/HowBadIsMyBatch.ipynb @@ -226,50 +226,51 @@ "source": [ "from pandas.testing import assert_frame_equal\n", "\n", - "class CreateAndFilterDataFrameTest(unittest.TestCase):\n", + "class DataFrameFilterTest(unittest.TestCase):\n", "\n", - " def test_createAndFilterDataFrameFromDescrs(self):\n", + " def test_filterBy(self):\n", " # Given\n", - " vaersDescrs = [\n", - " {\n", - " 'VAERSDATA': self.createDataFrame(\n", - " columns = ['DIED', 'L_THREAT', 'DISABLE'],\n", - " data = [ ['Y', np.NaN, np.NaN],\n", - " [np.NaN, np.NaN, 'Y']],\n", - " index = [\n", - " \"0916600\",\n", - " \"0916601\"]),\n", - " 'VAERSVAX': self.createDataFrame(\n", - " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", - " data = [ ['COVID19', 'MODERNA', '037K20A', '1'],\n", - " ['COVID19', 'MODERNA', '025L20A', '1']],\n", - " index = [\n", - " \"0916600\",\n", - " \"0916601\"],\n", - " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", - " },\n", - " {\n", - " 'VAERSDATA': self.createDataFrame(\n", - " columns = ['DIED', 'L_THREAT', 'DISABLE'],\n", - " data = [ [np.NaN, np.NaN, np.NaN],\n", - " [np.NaN, np.NaN, 'Y']],\n", - " index = [\n", - " \"1996873\",\n", - " \"1996874\"]),\n", - " 'VAERSVAX': self.createDataFrame(\n", - " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", - " data = [ ['HPV9', 'MERCK & CO. INC.', 'R017624', 'UNK'],\n", - " ['COVID19', 'MODERNA', '025L20A', '1']],\n", - " index = [\n", - " \"1996873\",\n", - " \"1996874\"],\n", - " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", - " }\n", - " ]\n", - " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n", + " dataFrameFilter = DataFrameFilter(\n", + " VaersDescr2DataFrameConverter.createDataFrameFromDescrs(\n", + " [\n", + " {\n", + " 'VAERSDATA': self.createDataFrame(\n", + " columns = ['DIED', 'L_THREAT', 'DISABLE'],\n", + " data = [ ['Y', np.NaN, np.NaN],\n", + " [np.NaN, np.NaN, 'Y']],\n", + " index = [\n", + " \"0916600\",\n", + " \"0916601\"]),\n", + " 'VAERSVAX': self.createDataFrame(\n", + " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", + " data = [ ['COVID19', 'MODERNA', '037K20A', '1'],\n", + " ['COVID19', 'MODERNA', '025L20A', '1']],\n", + " index = [\n", + " \"0916600\",\n", + " \"0916601\"],\n", + " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", + " },\n", + " {\n", + " 'VAERSDATA': self.createDataFrame(\n", + " columns = ['DIED', 'L_THREAT', 'DISABLE'],\n", + " data = [ [np.NaN, np.NaN, np.NaN],\n", + " [np.NaN, np.NaN, 'Y']],\n", + " index = [\n", + " \"1996873\",\n", + " \"1996874\"]),\n", + " 'VAERSVAX': self.createDataFrame(\n", + " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", + " data = [ ['HPV9', 'MERCK & CO. INC.', 'R017624', 'UNK'],\n", + " ['COVID19', 'MODERNA', '025L20A', '1']],\n", + " index = [\n", + " \"1996873\",\n", + " \"1996874\"],\n", + " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", + " }\n", + " ]))\n", " \n", " # When\n", - " dataFrame = DataFrameFilter(dataFrame).filterBy(manufacturer = \"MODERNA\", dose = '1')\n", + " dataFrame = dataFrameFilter.filterBy(manufacturer = \"MODERNA\", dose = '1')\n", " \n", " # Then\n", " dataFrameExpected = self.createDataFrame(\n", @@ -284,31 +285,32 @@ " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", " assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)\n", "\n", - " def test_createDataFrameFromForSevereEffects(self):\n", + " def test_filterForSevereEffects(self):\n", " # Given\n", - " vaersDescrs = [\n", - " {\n", - " 'VAERSDATA': self.createDataFrame(\n", - " columns = ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'],\n", - " data = [ ['Y', 'Y', np.NaN, 'Y', 'Y'],\n", - " [np.NaN, np.NaN, 'Y', np.NaN, 'Y']],\n", - " index = [\n", - " \"0916600\",\n", - " \"0916601\"]),\n", - " 'VAERSVAX': self.createDataFrame(\n", - " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", - " data = [ ['COVID19', 'MODERNA', '037K20A', '1'],\n", - " ['COVID19', 'PFIZER\\BIONTECH', '025L20A', '1']],\n", - " index = [\n", - " \"0916600\",\n", - " \"0916601\"],\n", - " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", - " }\n", - " ]\n", - " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n", - " \n", + " dataFrameFilter = DataFrameFilter(\n", + " VaersDescr2DataFrameConverter.createDataFrameFromDescrs(\n", + " [\n", + " {\n", + " 'VAERSDATA': self.createDataFrame(\n", + " columns = ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'],\n", + " data = [ ['Y', 'Y', np.NaN, 'Y', 'Y'],\n", + " [np.NaN, np.NaN, 'Y', np.NaN, 'Y']],\n", + " index = [\n", + " \"0916600\",\n", + " \"0916601\"]),\n", + " 'VAERSVAX': self.createDataFrame(\n", + " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", + " data = [ ['COVID19', 'MODERNA', '037K20A', '1'],\n", + " ['COVID19', 'PFIZER\\BIONTECH', '025L20A', '1']],\n", + " index = [\n", + " \"0916600\",\n", + " \"0916601\"],\n", + " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", + " }\n", + " ]))\n", + "\n", " # When\n", - " dataFrame = DataFrameFilter(dataFrame).filterForSevereEffects(dose = '1')\n", + " dataFrame = dataFrameFilter.filterForSevereEffects(dose = '1')\n", " \n", " # Then\n", " dataFrameExpected = self.createDataFrame(\n", @@ -323,27 +325,28 @@ "\n", " def test_createAndFilterDataFrameFromDescrsWithFirstDose(self):\n", " # Given\n", - " vaersDescrs = [\n", - " {\n", - " 'VAERSDATA': self.createDataFrame(\n", - " columns = ['DIED', 'L_THREAT', 'DISABLE'],\n", - " data = [ ['Y', np.NaN, np.NaN]],\n", - " index = [\n", - " \"1048786\"]),\n", - " 'VAERSVAX': self.createDataFrame(\n", - " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", - " data = [ ['COVID19', 'MODERNA', '016M20A', '2'],\n", - " ['COVID19', 'MODERNA', '030L20A', '1']],\n", - " index = [\n", - " \"1048786\",\n", - " \"1048786\"],\n", - " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", - " }\n", - " ]\n", - " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n", + " dataFrameFilter = DataFrameFilter(\n", + " VaersDescr2DataFrameConverter.createDataFrameFromDescrs(\n", + " [\n", + " {\n", + " 'VAERSDATA': self.createDataFrame(\n", + " columns = ['DIED', 'L_THREAT', 'DISABLE'],\n", + " data = [ ['Y', np.NaN, np.NaN]],\n", + " index = [\n", + " \"1048786\"]),\n", + " 'VAERSVAX': self.createDataFrame(\n", + " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", + " data = [ ['COVID19', 'MODERNA', '016M20A', '2'],\n", + " ['COVID19', 'MODERNA', '030L20A', '1']],\n", + " index = [\n", + " \"1048786\",\n", + " \"1048786\"],\n", + " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", + " }\n", + " ]))\n", " \n", " # When\n", - " dataFrame = DataFrameFilter(dataFrame).filterBy(manufacturer = \"MODERNA\", dose = '1')\n", + " dataFrame = dataFrameFilter.filterBy(manufacturer = \"MODERNA\", dose = '1')\n", " \n", " # Then\n", " dataFrameExpected = self.createDataFrame(\n", @@ -356,27 +359,28 @@ "\n", " def test_createAndFilterDataFrameFromDescrsWithSecondDose(self):\n", " # Given\n", - " vaersDescrs = [\n", - " {\n", - " 'VAERSDATA': self.createDataFrame(\n", - " columns = ['DIED', 'L_THREAT', 'DISABLE'],\n", - " data = [ ['Y', np.NaN, np.NaN]],\n", - " index = [\n", - " \"1048786\"]),\n", - " 'VAERSVAX': self.createDataFrame(\n", - " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", - " data = [ ['COVID19', 'MODERNA', '016M20A', '2'],\n", - " ['COVID19', 'MODERNA', '030L20A', '1']],\n", - " index = [\n", - " \"1048786\",\n", - " \"1048786\"],\n", - " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", - " }\n", - " ]\n", - " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n", - " \n", + " dataFrameFilter = DataFrameFilter(\n", + " VaersDescr2DataFrameConverter.createDataFrameFromDescrs(\n", + " [\n", + " {\n", + " 'VAERSDATA': self.createDataFrame(\n", + " columns = ['DIED', 'L_THREAT', 'DISABLE'],\n", + " data = [ ['Y', np.NaN, np.NaN]],\n", + " index = [\n", + " \"1048786\"]),\n", + " 'VAERSVAX': self.createDataFrame(\n", + " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", + " data = [ ['COVID19', 'MODERNA', '016M20A', '2'],\n", + " ['COVID19', 'MODERNA', '030L20A', '1']],\n", + " index = [\n", + " \"1048786\",\n", + " \"1048786\"],\n", + " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", + " }\n", + " ]))\n", + "\n", " # When\n", - " dataFrame = DataFrameFilter(dataFrame).filterBy(manufacturer = \"MODERNA\", dose = '2')\n", + " dataFrame = dataFrameFilter.filterBy(manufacturer = \"MODERNA\", dose = '2')\n", " \n", " # Then\n", " dataFrameExpected = self.createDataFrame(\n",