From a2db3a869f5550bf99d6be78785847c05486055e Mon Sep 17 00:00:00 2001 From: frankknoll Date: Mon, 7 Feb 2022 12:52:14 +0100 Subject: [PATCH] refactoring --- HowBadIsMyBatch.ipynb | 100 +++++++++++++++++++----------------------- 1 file changed, 45 insertions(+), 55 deletions(-) diff --git a/HowBadIsMyBatch.ipynb b/HowBadIsMyBatch.ipynb index 4e520e1e1e0..ba14ee280f1 100644 --- a/HowBadIsMyBatch.ipynb +++ b/HowBadIsMyBatch.ipynb @@ -370,6 +370,20 @@ "import unittest" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "fcc855dd", + "metadata": {}, + "outputs": [], + "source": [ + "class TestHelper:\n", + "\n", + " @staticmethod\n", + " def createDataFrame(index, columns, data, dtypes = {}):\n", + " return pd.DataFrame(index = index, columns = columns, data = data).astype(dtypes)\n" + ] + }, { "cell_type": "code", "execution_count": null, @@ -383,7 +397,7 @@ "\n", " def test_convertVAX_LOTColumnToUpperCase(self):\n", " # Given\n", - " dataFrame = self.createDataFrame(\n", + " dataFrame = TestHelper.createDataFrame(\n", " columns = ['VAX_LOT'],\n", " data = [ ['037K20A'],\n", " ['025l20A'],\n", @@ -397,7 +411,7 @@ " DataFrameNormalizer.convertVAX_LOTColumnToUpperCase(dataFrame)\n", " \n", " # Then\n", - " dataFrameExpected = self.createDataFrame(\n", + " dataFrameExpected = TestHelper.createDataFrame(\n", " columns = ['VAX_LOT'],\n", " data = [ ['037K20A'],\n", " ['025L20A'],\n", @@ -406,10 +420,7 @@ " \"0916600\",\n", " \"0916601\",\n", " \"1996874\"])\n", - " assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)\n", - "\n", - " def createDataFrame(self, index, columns, data, dtypes = {}):\n", - " return pd.DataFrame(index = index, columns = columns, data = data).astype(dtypes)\n" + " assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)\n" ] }, { @@ -428,14 +439,14 @@ " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(\n", " [\n", " {\n", - " 'VAERSDATA': self.createDataFrame(\n", + " 'VAERSDATA': TestHelper.createDataFrame(\n", " columns = ['DIED', 'L_THREAT', 'DISABLE'],\n", " data = [ [1, 0, 0],\n", " [0, 0, 1]],\n", " index = [\n", " \"0916600\",\n", " \"0916601\"]),\n", - " 'VAERSVAX': self.createDataFrame(\n", + " 'VAERSVAX': TestHelper.createDataFrame(\n", " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", " data = [ ['COVID19', 'MODERNA', '037K20A', '1'],\n", " ['COVID19', 'MODERNA', '025L20A', '1']],\n", @@ -445,14 +456,14 @@ " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", " },\n", " {\n", - " 'VAERSDATA': self.createDataFrame(\n", + " 'VAERSDATA': TestHelper.createDataFrame(\n", " columns = ['DIED', 'L_THREAT', 'DISABLE'],\n", " data = [ [0, 0, 0],\n", " [0, 0, 1]],\n", " index = [\n", " \"1996873\",\n", " \"1996874\"]),\n", - " 'VAERSVAX': self.createDataFrame(\n", + " 'VAERSVAX': TestHelper.createDataFrame(\n", " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", " data = [ ['HPV9', 'MERCK & CO. INC.', 'R017624', 'UNK'],\n", " ['COVID19', 'MODERNA', '025L20A', '1']],\n", @@ -469,7 +480,7 @@ " dataFrame = dataFrameFilter.filterBy(dataFrame, manufacturer = \"MODERNA\", dose = '1')\n", " \n", " # Then\n", - " dataFrameExpected = self.createDataFrame(\n", + " dataFrameExpected = TestHelper.createDataFrame(\n", " columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", " data = [ [1, 0, 0, 'COVID19', 'MODERNA', '037K20A', '1'],\n", " [0, 0, 1, 'COVID19', 'MODERNA', '025L20A', '1'],\n", @@ -486,14 +497,14 @@ " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(\n", " [\n", " {\n", - " 'VAERSDATA': self.createDataFrame(\n", + " 'VAERSDATA': TestHelper.createDataFrame(\n", " columns = ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'],\n", " data = [ [1, 1, 0, 1, 1],\n", " [0, 0, 1, 0, 1]],\n", " index = [\n", " \"0916600\",\n", " \"0916601\"]),\n", - " 'VAERSVAX': self.createDataFrame(\n", + " 'VAERSVAX': TestHelper.createDataFrame(\n", " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", " data = [ ['COVID19', 'MODERNA', '037K20A', '1'],\n", " ['COVID19', 'PFIZER\\BIONTECH', '025L20A', '1']],\n", @@ -510,7 +521,7 @@ " dataFrame = dataFrameFilter.filterBy(dataFrame, dose = '1')\n", " \n", " # Then\n", - " dataFrameExpected = self.createDataFrame(\n", + " dataFrameExpected = TestHelper.createDataFrame(\n", " columns = ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", " data = [ [1, 1, 0, 1, 1, 'COVID19', 'MODERNA', '037K20A', '1'],\n", " [0, 0, 1, 0, 1, 'COVID19', 'PFIZER\\BIONTECH', '025L20A', '1']],\n", @@ -525,12 +536,12 @@ " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(\n", " [\n", " {\n", - " 'VAERSDATA': self.createDataFrame(\n", + " 'VAERSDATA': TestHelper.createDataFrame(\n", " columns = ['DIED', 'L_THREAT', 'DISABLE'],\n", " data = [ [1, 0, 0]],\n", " index = [\n", " \"1048786\"]),\n", - " 'VAERSVAX': self.createDataFrame(\n", + " 'VAERSVAX': TestHelper.createDataFrame(\n", " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", " data = [ ['COVID19', 'MODERNA', '016M20A', '2'],\n", " ['COVID19', 'MODERNA', '030L20A', '1']],\n", @@ -547,7 +558,7 @@ " dataFrame = dataFrameFilter.filterBy(dataFrame, manufacturer = \"MODERNA\", dose = '1')\n", " \n", " # Then\n", - " dataFrameExpected = self.createDataFrame(\n", + " dataFrameExpected = TestHelper.createDataFrame(\n", " columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", " data = [ [1, 0, 0, 'COVID19', 'MODERNA', '030L20A', '1']],\n", " index = [\n", @@ -560,12 +571,12 @@ " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(\n", " [\n", " {\n", - " 'VAERSDATA': self.createDataFrame(\n", + " 'VAERSDATA': TestHelper.createDataFrame(\n", " columns = ['DIED', 'L_THREAT', 'DISABLE'],\n", " data = [ [1, 0, 0]],\n", " index = [\n", " \"1048786\"]),\n", - " 'VAERSVAX': self.createDataFrame(\n", + " 'VAERSVAX': TestHelper.createDataFrame(\n", " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", " data = [ ['COVID19', 'MODERNA', '016M20A', '2'],\n", " ['COVID19', 'MODERNA', '030L20A', '1']],\n", @@ -582,16 +593,13 @@ " dataFrame = dataFrameFilter.filterBy(dataFrame, manufacturer = \"MODERNA\", dose = '2')\n", " \n", " # Then\n", - " dataFrameExpected = self.createDataFrame(\n", + " dataFrameExpected = TestHelper.createDataFrame(\n", " columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", " data = [ [1, 0, 0, 'COVID19', 'MODERNA', '016M20A', '2']],\n", " index = [\n", " \"1048786\"],\n", " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", - " assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)\n", - "\n", - " def createDataFrame(self, index, columns, data, dtypes = {}):\n", - " return pd.DataFrame(index = index, columns = columns, data = data).astype(dtypes)\n" + " assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)\n" ] }, { @@ -610,14 +618,14 @@ " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(\n", " [\n", " {\n", - " 'VAERSDATA': self.createDataFrame(\n", + " 'VAERSDATA': TestHelper.createDataFrame(\n", " columns = ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'],\n", " data = [ [1, 1, 0, 1, 1],\n", " [0, 0, 1, 0, 1]],\n", " index = [\n", " \"0916600\",\n", " \"0916601\"]),\n", - " 'VAERSVAX': self.createDataFrame(\n", + " 'VAERSVAX': TestHelper.createDataFrame(\n", " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", " data = [ ['COVID19', 'MODERNA', '037K20A', '1'],\n", " ['COVID19', 'PFIZER\\BIONTECH', '025L20A', '1']],\n", @@ -649,14 +657,14 @@ " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(\n", " [\n", " {\n", - " 'VAERSDATA': self.createDataFrame(\n", + " 'VAERSDATA': TestHelper.createDataFrame(\n", " columns = ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'],\n", " data = [ [1, 0, 0, 0, 0],\n", " [0, 0, 1, 0, 0]],\n", " index = [\n", " \"0916600\",\n", " \"0916601\"]),\n", - " 'VAERSVAX': self.createDataFrame(\n", + " 'VAERSVAX': TestHelper.createDataFrame(\n", " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", " data = [ ['COVID19', 'MODERNA', '037K20A', '1'],\n", " ['COVID19', 'MODERNA', '025L20A', '1']],\n", @@ -666,14 +674,14 @@ " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", " },\n", " {\n", - " 'VAERSDATA': self.createDataFrame(\n", + " 'VAERSDATA': TestHelper.createDataFrame(\n", " columns = ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'],\n", " data = [ [0, 0, 0, 0, 0],\n", " [0, 0, 1, 0, 0]],\n", " index = [\n", " \"1996873\",\n", " \"1996874\"]),\n", - " 'VAERSVAX': self.createDataFrame(\n", + " 'VAERSVAX': TestHelper.createDataFrame(\n", " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", " data = [ ['HPV9', 'MERCK & CO. INC.', 'R017624', 'UNK'],\n", " ['COVID19', 'MODERNA', '025L20A', '1']],\n", @@ -704,10 +712,7 @@ " 'LIFE THREATENING ILLNESSES': [0, 0]\n", " },\n", " index = pd.Index(['025L20A', '037K20A'], name = 'VAX_LOT'))\n", - " assert_frame_equal(batchCodeTable, batchCodeTableExpected, check_dtype = False)\n", - "\n", - " def createDataFrame(self, index, columns, data, dtypes = {}):\n", - " return pd.DataFrame(index = index, columns = columns, data = data).astype(dtypes)\n" + " assert_frame_equal(batchCodeTable, batchCodeTableExpected, check_dtype = False)\n" ] }, { @@ -723,7 +728,7 @@ "\n", " def test_getDoseTable(self):\n", " # Given\n", - " dataFrame = self.createDataFrame(\n", + " dataFrame = TestHelper.createDataFrame(\n", " columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", " data = [ [1, 0, 0, 'COVID19', 'MODERNA', '016M20A', '2'],\n", " [1, 0, 0, 'COVID19', 'MODERNA', '030L20A', '1'],\n", @@ -753,7 +758,7 @@ " def test_getDoseByMonthTable(self):\n", " # Given\n", " parseDate = lambda dateStr: pd.to_datetime(dateStr, format = \"%m/%d/%Y\")\n", - " dataFrame = self.createDataFrame(\n", + " dataFrame = TestHelper.createDataFrame(\n", " columns = ['RECVDATE', 'DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", " data = [ [parseDate('01/01/2021'), 1, 0, 0, 'COVID19', 'MODERNA', '016M20A', '2'],\n", " [parseDate('01/01/2021'), 1, 0, 0, 'COVID19', 'MODERNA', '030L20A', '1'],\n", @@ -784,10 +789,7 @@ " (2021, 1, '2'),\n", " ],\n", " names = ('Year', 'Month', 'Dose'))),\n", - " check_index_type = False)\n", - "\n", - " def createDataFrame(self, index, columns, data, dtypes = {}):\n", - " return pd.DataFrame(index = index, columns = columns, data = data).astype(dtypes)\n" + " check_index_type = False)\n" ] }, { @@ -803,7 +805,7 @@ "\n", " def test_getInternationalLotTable(self):\n", " # Given\n", - " dataFrame = self.createDataFrame(\n", + " dataFrame = TestHelper.createDataFrame(\n", " columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES', 'SPLTTYPE'],\n", " data = [ [1, 0, 0, 'COVID19', 'MODERNA', '016M20A', '2', 'GBPFIZER INC2020486806'],\n", " [1, 0, 0, 'COVID19', 'MODERNA', '030L20A', '1', 'FRMODERNATX, INC.MOD20224'],\n", @@ -823,7 +825,7 @@ " # Then\n", " assert_frame_equal(\n", " internationalLotTable,\n", - " self.createDataFrame(\n", + " TestHelper.createDataFrame(\n", " columns = ['Total reports', 'Deaths', 'Disabilities', 'Life Threatening Illnesses', 'Severe reports (%)'],\n", " data = [ [2, 2, 1, 1, (2 + 1 + 1) / 2 * 100],\n", " [1, 1, 0, 0, (1 + 0 + 0) / 1 * 100],\n", @@ -834,11 +836,7 @@ " 'United Kingdom',\n", " 'Unknown Country'\n", " ],\n", - " name = 'Country')))\n", - "\n", - " # FK-TODO: createDataFrame() is defined in almost every test class: DRY \n", - " def createDataFrame(self, index, columns, data, dtypes = {}):\n", - " return pd.DataFrame(index = index, columns = columns, data = data).astype(dtypes)\n" + " name = 'Country')))\n" ] }, { @@ -1056,14 +1054,6 @@ "internationalLotTable.to_excel('results/International_Deadly_Lots.xlsx')\n", "internationalLotTable" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "664ebc44", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": {