refactoring

This commit is contained in:
frankknoll
2022-01-30 22:31:01 +01:00
parent 9092013db2
commit 05eea3e207

View File

@@ -21,27 +21,28 @@
"metadata": {},
"outputs": [],
"source": [
"def filter(df, manufacturer, dose):\n",
"def filterDataFrame(df, manufacturer, dose):\n",
" return df[\n",
" (df[\"VAX_TYPE\"] == \"COVID19\") &\n",
" (df[\"VAX_MANU\"] == manufacturer) &\n",
" (df[\"VAX_DOSE_SERIES\"].str.contains(dose))]\n",
"\n",
"def createDataFrameFromDescr(vaersDescr, manufacturer, dose):\n",
" return filter(\n",
" pd.merge(\n",
"def createDataFrameFromDescr(vaersDescr):\n",
" return pd.merge(\n",
" vaersDescr['VAERSDATA'],\n",
" vaersDescr['VAERSVAX'],\n",
" how='left',\n",
" left_index = True,\n",
" right_index = True,\n",
" validate = 'one_to_many'),\n",
" manufacturer,\n",
" dose)\n",
" validate = 'one_to_many')\n",
"\n",
"def createDataFrameFromDescrs(vaersDescrs, manufacturer, dose):\n",
" _createDataFrameFromDescr = lambda vaersDescr: createDataFrameFromDescr(vaersDescr, manufacturer, dose)\n",
" dataFrames = map(_createDataFrameFromDescr, vaersDescrs)\n",
"def createAndFilterDataFrameFromDescr(vaersDescr, manufacturer, dose):\n",
" dataFrame = createDataFrameFromDescr(vaersDescr)\n",
" return filterDataFrame(dataFrame, manufacturer, dose)\n",
"\n",
"def createAndFilterDataFrameFromDescrs(vaersDescrs, manufacturer, dose):\n",
" _createAndFilterDataFrameFromDescr = lambda vaersDescr: createAndFilterDataFrameFromDescr(vaersDescr, manufacturer, dose)\n",
" dataFrames = map(_createAndFilterDataFrameFromDescr, vaersDescrs)\n",
" return pd.concat(dataFrames)\n"
]
},
@@ -75,8 +76,8 @@
" dtype = {\"VAX_DOSE_SERIES\": \"string\"})\n",
" }\n",
"\n",
"def createDataFrameFromFiles(dataDir, manufacturer, dose):\n",
" return createDataFrameFromDescrs(\n",
"def createAndFilterDataFrameFromFiles(dataDir, manufacturer, dose):\n",
" return createAndFilterDataFrameFromDescrs(\n",
" [readVaersDescr(dataDir, \"2021\"), readVaersDescr(dataDir, \"2022\")],\n",
" manufacturer,\n",
" dose)"
@@ -121,9 +122,9 @@
"source": [
"from pandas.testing import assert_frame_equal\n",
"\n",
"class CreateDataFrameTest(unittest.TestCase):\n",
"class CreateAndFilterDataFrameTest(unittest.TestCase):\n",
"\n",
" def test_createDataFrameFromDescrs(self):\n",
" def test_createAndFilterDataFrameFromDescrs(self):\n",
" # Given\n",
" vaersDescrs = [\n",
" {\n",
@@ -163,7 +164,7 @@
" ]\n",
" \n",
" # When\n",
" dataFrame = createDataFrameFromDescrs(vaersDescrs, \"MODERNA\", '1')\n",
" dataFrame = createAndFilterDataFrameFromDescrs(vaersDescrs, \"MODERNA\", '1')\n",
" \n",
" # Then\n",
" dataFrameExpected = self.createDataFrame(\n",
@@ -178,7 +179,7 @@
" dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
" assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)\n",
"\n",
" def test_createDataFrameFromDescrsWithFirstDose(self):\n",
" def test_createAndFilterDataFrameFromDescrsWithFirstDose(self):\n",
" # Given\n",
" vaersDescrs = [\n",
" {\n",
@@ -199,7 +200,7 @@
" ]\n",
" \n",
" # When\n",
" dataFrame = createDataFrameFromDescrs(vaersDescrs, \"MODERNA\", '1')\n",
" dataFrame = createAndFilterDataFrameFromDescrs(vaersDescrs, \"MODERNA\", '1')\n",
" \n",
" # Then\n",
" dataFrameExpected = self.createDataFrame(\n",
@@ -210,7 +211,7 @@
" dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
" assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)\n",
"\n",
" def test_createDataFrameFromDescrsWithSecondDose(self):\n",
" def test_createAndFilterDataFrameFromDescrsWithSecondDose(self):\n",
" # Given\n",
" vaersDescrs = [\n",
" {\n",
@@ -231,7 +232,7 @@
" ]\n",
" \n",
" # When\n",
" dataFrame = createDataFrameFromDescrs(vaersDescrs, \"MODERNA\", '2')\n",
" dataFrame = createAndFilterDataFrameFromDescrs(vaersDescrs, \"MODERNA\", '2')\n",
" \n",
" # Then\n",
" dataFrameExpected = self.createDataFrame(\n",
@@ -258,7 +259,7 @@
"class BatchCodeTableTest(unittest.TestCase):\n",
"\n",
" def test_createBatchCodeTable2(self):\n",
" dataFrame = createDataFrameFromDescrs(\n",
" dataFrame = createAndFilterDataFrameFromDescrs(\n",
" [\n",
" {\n",
" 'VAERSDATA': self.createDataFrame(\n",
@@ -301,7 +302,7 @@
" self._test_createBatchCodeTable(dataFrame);\n",
"\n",
" def test_createBatchCodeTable(self):\n",
" self._test_createBatchCodeTable(createDataFrameFromFiles(\"test/VAERS\", \"MODERNA\", '1'));\n",
" self._test_createBatchCodeTable(createAndFilterDataFrameFromFiles(\"test/VAERS\", \"MODERNA\", '1'));\n",
"\n",
" def _test_createBatchCodeTable(self, dataFrame):\n",
" # When\n",
@@ -340,7 +341,7 @@
"outputs": [],
"source": [
"def saveBatchCodeTable(manufacturer, excelFile):\n",
" batchCodeTable = createBatchCodeTable(createDataFrameFromFiles(\"VAERS\", manufacturer, '1'))\n",
" batchCodeTable = createBatchCodeTable(createAndFilterDataFrameFromFiles(\"VAERS\", manufacturer, '1'))\n",
" display(manufacturer, batchCodeTable)\n",
" batchCodeTable.to_excel(excelFile)"
]