From 6e46087bc1598e59f49102f01ed9376cc20b4f55 Mon Sep 17 00:00:00 2001 From: frankknoll Date: Sun, 6 Feb 2022 12:46:54 +0100 Subject: [PATCH] refactoring --- HowBadIsMyBatch.ipynb | 50 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/HowBadIsMyBatch.ipynb b/HowBadIsMyBatch.ipynb index 47f8c2a3978..a942762e787 100644 --- a/HowBadIsMyBatch.ipynb +++ b/HowBadIsMyBatch.ipynb @@ -24,6 +24,10 @@ "class DataFrameNormalizer:\n", " \n", " @staticmethod\n", + " def convertVAX_LOTColumnToUpperCase(dataFrame):\n", + " dataFrame['VAX_LOT'] = dataFrame['VAX_LOT'].str.upper()\n", + "\n", + " @staticmethod\n", " def convertColumnsOfDataFrameToNumerics(dataFrame, columns):\n", " for column in columns:\n", " DataFrameNormalizer._convertColumnOfDataFrameToNumeric(dataFrame, column)\n", @@ -73,10 +77,12 @@ " return VAERSDATA\n", "\n", " def _readVAERSVAX(self, file):\n", - " return self._read_csv(\n", + " VAERSVAX = self._read_csv(\n", " file = file,\n", " usecols = ['VAERS_ID', 'VAX_DOSE_SERIES', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT'],\n", " dtype = {\"VAX_DOSE_SERIES\": \"string\"})\n", + " DataFrameNormalizer.convertVAX_LOTColumnToUpperCase(VAERSVAX)\n", + " return VAERSVAX\n", "\n", " def _read_csv(self, file, **kwargs):\n", " return pd.read_csv(\n", @@ -306,6 +312,48 @@ "import unittest" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "ccb9838d", + "metadata": {}, + "outputs": [], + "source": [ + "from pandas.testing import assert_frame_equal\n", + "\n", + "class DataFrameNormalizerTest(unittest.TestCase):\n", + "\n", + " def test_convertVAX_LOTColumnToUpperCase(self):\n", + " # Given\n", + " dataFrame = self.createDataFrame(\n", + " columns = ['VAX_LOT'],\n", + " data = [ ['037K20A'],\n", + " ['025l20A'],\n", + " ['025L20A']],\n", + " index = [\n", + " \"0916600\",\n", + " \"0916601\",\n", + " \"1996874\"])\n", + " \n", + " # When\n", + " DataFrameNormalizer.convertVAX_LOTColumnToUpperCase(dataFrame)\n", + " \n", + " # Then\n", + " dataFrameExpected = self.createDataFrame(\n", + " columns = ['VAX_LOT'],\n", + " data = [ ['037K20A'],\n", + " ['025L20A'],\n", + " ['025L20A']],\n", + " index = [\n", + " \"0916600\",\n", + " \"0916601\",\n", + " \"1996874\"])\n", + " assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)\n", + "\n", + " def createDataFrame(self, index, columns, data, dtypes = {}):\n", + " return pd.DataFrame(index = index, columns = columns, data = data).astype(dtypes)\n" + ] + }, { "cell_type": "code", "execution_count": null,