refactoring
This commit is contained in:
@@ -24,6 +24,10 @@
|
||||
"class DataFrameNormalizer:\n",
|
||||
" \n",
|
||||
" @staticmethod\n",
|
||||
" def convertVAX_LOTColumnToUpperCase(dataFrame):\n",
|
||||
" dataFrame['VAX_LOT'] = dataFrame['VAX_LOT'].str.upper()\n",
|
||||
"\n",
|
||||
" @staticmethod\n",
|
||||
" def convertColumnsOfDataFrameToNumerics(dataFrame, columns):\n",
|
||||
" for column in columns:\n",
|
||||
" DataFrameNormalizer._convertColumnOfDataFrameToNumeric(dataFrame, column)\n",
|
||||
@@ -73,10 +77,12 @@
|
||||
" return VAERSDATA\n",
|
||||
"\n",
|
||||
" def _readVAERSVAX(self, file):\n",
|
||||
" return self._read_csv(\n",
|
||||
" VAERSVAX = self._read_csv(\n",
|
||||
" file = file,\n",
|
||||
" usecols = ['VAERS_ID', 'VAX_DOSE_SERIES', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT'],\n",
|
||||
" dtype = {\"VAX_DOSE_SERIES\": \"string\"})\n",
|
||||
" DataFrameNormalizer.convertVAX_LOTColumnToUpperCase(VAERSVAX)\n",
|
||||
" return VAERSVAX\n",
|
||||
"\n",
|
||||
" def _read_csv(self, file, **kwargs):\n",
|
||||
" return pd.read_csv(\n",
|
||||
@@ -306,6 +312,48 @@
|
||||
"import unittest"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ccb9838d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from pandas.testing import assert_frame_equal\n",
|
||||
"\n",
|
||||
"class DataFrameNormalizerTest(unittest.TestCase):\n",
|
||||
"\n",
|
||||
" def test_convertVAX_LOTColumnToUpperCase(self):\n",
|
||||
" # Given\n",
|
||||
" dataFrame = self.createDataFrame(\n",
|
||||
" columns = ['VAX_LOT'],\n",
|
||||
" data = [ ['037K20A'],\n",
|
||||
" ['025l20A'],\n",
|
||||
" ['025L20A']],\n",
|
||||
" index = [\n",
|
||||
" \"0916600\",\n",
|
||||
" \"0916601\",\n",
|
||||
" \"1996874\"])\n",
|
||||
" \n",
|
||||
" # When\n",
|
||||
" DataFrameNormalizer.convertVAX_LOTColumnToUpperCase(dataFrame)\n",
|
||||
" \n",
|
||||
" # Then\n",
|
||||
" dataFrameExpected = self.createDataFrame(\n",
|
||||
" columns = ['VAX_LOT'],\n",
|
||||
" data = [ ['037K20A'],\n",
|
||||
" ['025L20A'],\n",
|
||||
" ['025L20A']],\n",
|
||||
" index = [\n",
|
||||
" \"0916600\",\n",
|
||||
" \"0916601\",\n",
|
||||
" \"1996874\"])\n",
|
||||
" assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)\n",
|
||||
"\n",
|
||||
" def createDataFrame(self, index, columns, data, dtypes = {}):\n",
|
||||
" return pd.DataFrame(index = index, columns = columns, data = data).astype(dtypes)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
|
||||
Reference in New Issue
Block a user