refactoring

This commit is contained in:
frankknoll
2022-02-06 12:46:54 +01:00
parent a33603ca91
commit 6e46087bc1

View File

@@ -24,6 +24,10 @@
"class DataFrameNormalizer:\n",
" \n",
" @staticmethod\n",
" def convertVAX_LOTColumnToUpperCase(dataFrame):\n",
" dataFrame['VAX_LOT'] = dataFrame['VAX_LOT'].str.upper()\n",
"\n",
" @staticmethod\n",
" def convertColumnsOfDataFrameToNumerics(dataFrame, columns):\n",
" for column in columns:\n",
" DataFrameNormalizer._convertColumnOfDataFrameToNumeric(dataFrame, column)\n",
@@ -73,10 +77,12 @@
" return VAERSDATA\n",
"\n",
" def _readVAERSVAX(self, file):\n",
" return self._read_csv(\n",
" VAERSVAX = self._read_csv(\n",
" file = file,\n",
" usecols = ['VAERS_ID', 'VAX_DOSE_SERIES', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT'],\n",
" dtype = {\"VAX_DOSE_SERIES\": \"string\"})\n",
" DataFrameNormalizer.convertVAX_LOTColumnToUpperCase(VAERSVAX)\n",
" return VAERSVAX\n",
"\n",
" def _read_csv(self, file, **kwargs):\n",
" return pd.read_csv(\n",
@@ -306,6 +312,48 @@
"import unittest"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ccb9838d",
"metadata": {},
"outputs": [],
"source": [
"from pandas.testing import assert_frame_equal\n",
"\n",
"class DataFrameNormalizerTest(unittest.TestCase):\n",
"\n",
" def test_convertVAX_LOTColumnToUpperCase(self):\n",
" # Given\n",
" dataFrame = self.createDataFrame(\n",
" columns = ['VAX_LOT'],\n",
" data = [ ['037K20A'],\n",
" ['025l20A'],\n",
" ['025L20A']],\n",
" index = [\n",
" \"0916600\",\n",
" \"0916601\",\n",
" \"1996874\"])\n",
" \n",
" # When\n",
" DataFrameNormalizer.convertVAX_LOTColumnToUpperCase(dataFrame)\n",
" \n",
" # Then\n",
" dataFrameExpected = self.createDataFrame(\n",
" columns = ['VAX_LOT'],\n",
" data = [ ['037K20A'],\n",
" ['025L20A'],\n",
" ['025L20A']],\n",
" index = [\n",
" \"0916600\",\n",
" \"0916601\",\n",
" \"1996874\"])\n",
" assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)\n",
"\n",
" def createDataFrame(self, index, columns, data, dtypes = {}):\n",
" return pd.DataFrame(index = index, columns = columns, data = data).astype(dtypes)\n"
]
},
{
"cell_type": "code",
"execution_count": null,