refactoring

This commit is contained in:
frankknoll
2022-01-30 22:31:01 +01:00
parent 9092013db2
commit 05eea3e207

View File

@@ -21,27 +21,28 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"def filter(df, manufacturer, dose):\n", "def filterDataFrame(df, manufacturer, dose):\n",
" return df[\n", " return df[\n",
" (df[\"VAX_TYPE\"] == \"COVID19\") &\n", " (df[\"VAX_TYPE\"] == \"COVID19\") &\n",
" (df[\"VAX_MANU\"] == manufacturer) &\n", " (df[\"VAX_MANU\"] == manufacturer) &\n",
" (df[\"VAX_DOSE_SERIES\"].str.contains(dose))]\n", " (df[\"VAX_DOSE_SERIES\"].str.contains(dose))]\n",
"\n", "\n",
"def createDataFrameFromDescr(vaersDescr, manufacturer, dose):\n", "def createDataFrameFromDescr(vaersDescr):\n",
" return filter(\n", " return pd.merge(\n",
" pd.merge(\n",
" vaersDescr['VAERSDATA'],\n", " vaersDescr['VAERSDATA'],\n",
" vaersDescr['VAERSVAX'],\n", " vaersDescr['VAERSVAX'],\n",
" how='left',\n", " how='left',\n",
" left_index = True,\n", " left_index = True,\n",
" right_index = True,\n", " right_index = True,\n",
" validate = 'one_to_many'),\n", " validate = 'one_to_many')\n",
" manufacturer,\n",
" dose)\n",
"\n", "\n",
"def createDataFrameFromDescrs(vaersDescrs, manufacturer, dose):\n", "def createAndFilterDataFrameFromDescr(vaersDescr, manufacturer, dose):\n",
" _createDataFrameFromDescr = lambda vaersDescr: createDataFrameFromDescr(vaersDescr, manufacturer, dose)\n", " dataFrame = createDataFrameFromDescr(vaersDescr)\n",
" dataFrames = map(_createDataFrameFromDescr, vaersDescrs)\n", " return filterDataFrame(dataFrame, manufacturer, dose)\n",
"\n",
"def createAndFilterDataFrameFromDescrs(vaersDescrs, manufacturer, dose):\n",
" _createAndFilterDataFrameFromDescr = lambda vaersDescr: createAndFilterDataFrameFromDescr(vaersDescr, manufacturer, dose)\n",
" dataFrames = map(_createAndFilterDataFrameFromDescr, vaersDescrs)\n",
" return pd.concat(dataFrames)\n" " return pd.concat(dataFrames)\n"
] ]
}, },
@@ -75,8 +76,8 @@
" dtype = {\"VAX_DOSE_SERIES\": \"string\"})\n", " dtype = {\"VAX_DOSE_SERIES\": \"string\"})\n",
" }\n", " }\n",
"\n", "\n",
"def createDataFrameFromFiles(dataDir, manufacturer, dose):\n", "def createAndFilterDataFrameFromFiles(dataDir, manufacturer, dose):\n",
" return createDataFrameFromDescrs(\n", " return createAndFilterDataFrameFromDescrs(\n",
" [readVaersDescr(dataDir, \"2021\"), readVaersDescr(dataDir, \"2022\")],\n", " [readVaersDescr(dataDir, \"2021\"), readVaersDescr(dataDir, \"2022\")],\n",
" manufacturer,\n", " manufacturer,\n",
" dose)" " dose)"
@@ -121,9 +122,9 @@
"source": [ "source": [
"from pandas.testing import assert_frame_equal\n", "from pandas.testing import assert_frame_equal\n",
"\n", "\n",
"class CreateDataFrameTest(unittest.TestCase):\n", "class CreateAndFilterDataFrameTest(unittest.TestCase):\n",
"\n", "\n",
" def test_createDataFrameFromDescrs(self):\n", " def test_createAndFilterDataFrameFromDescrs(self):\n",
" # Given\n", " # Given\n",
" vaersDescrs = [\n", " vaersDescrs = [\n",
" {\n", " {\n",
@@ -163,7 +164,7 @@
" ]\n", " ]\n",
" \n", " \n",
" # When\n", " # When\n",
" dataFrame = createDataFrameFromDescrs(vaersDescrs, \"MODERNA\", '1')\n", " dataFrame = createAndFilterDataFrameFromDescrs(vaersDescrs, \"MODERNA\", '1')\n",
" \n", " \n",
" # Then\n", " # Then\n",
" dataFrameExpected = self.createDataFrame(\n", " dataFrameExpected = self.createDataFrame(\n",
@@ -178,7 +179,7 @@
" dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
" assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)\n", " assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)\n",
"\n", "\n",
" def test_createDataFrameFromDescrsWithFirstDose(self):\n", " def test_createAndFilterDataFrameFromDescrsWithFirstDose(self):\n",
" # Given\n", " # Given\n",
" vaersDescrs = [\n", " vaersDescrs = [\n",
" {\n", " {\n",
@@ -199,7 +200,7 @@
" ]\n", " ]\n",
" \n", " \n",
" # When\n", " # When\n",
" dataFrame = createDataFrameFromDescrs(vaersDescrs, \"MODERNA\", '1')\n", " dataFrame = createAndFilterDataFrameFromDescrs(vaersDescrs, \"MODERNA\", '1')\n",
" \n", " \n",
" # Then\n", " # Then\n",
" dataFrameExpected = self.createDataFrame(\n", " dataFrameExpected = self.createDataFrame(\n",
@@ -210,7 +211,7 @@
" dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
" assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)\n", " assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)\n",
"\n", "\n",
" def test_createDataFrameFromDescrsWithSecondDose(self):\n", " def test_createAndFilterDataFrameFromDescrsWithSecondDose(self):\n",
" # Given\n", " # Given\n",
" vaersDescrs = [\n", " vaersDescrs = [\n",
" {\n", " {\n",
@@ -231,7 +232,7 @@
" ]\n", " ]\n",
" \n", " \n",
" # When\n", " # When\n",
" dataFrame = createDataFrameFromDescrs(vaersDescrs, \"MODERNA\", '2')\n", " dataFrame = createAndFilterDataFrameFromDescrs(vaersDescrs, \"MODERNA\", '2')\n",
" \n", " \n",
" # Then\n", " # Then\n",
" dataFrameExpected = self.createDataFrame(\n", " dataFrameExpected = self.createDataFrame(\n",
@@ -258,7 +259,7 @@
"class BatchCodeTableTest(unittest.TestCase):\n", "class BatchCodeTableTest(unittest.TestCase):\n",
"\n", "\n",
" def test_createBatchCodeTable2(self):\n", " def test_createBatchCodeTable2(self):\n",
" dataFrame = createDataFrameFromDescrs(\n", " dataFrame = createAndFilterDataFrameFromDescrs(\n",
" [\n", " [\n",
" {\n", " {\n",
" 'VAERSDATA': self.createDataFrame(\n", " 'VAERSDATA': self.createDataFrame(\n",
@@ -301,7 +302,7 @@
" self._test_createBatchCodeTable(dataFrame);\n", " self._test_createBatchCodeTable(dataFrame);\n",
"\n", "\n",
" def test_createBatchCodeTable(self):\n", " def test_createBatchCodeTable(self):\n",
" self._test_createBatchCodeTable(createDataFrameFromFiles(\"test/VAERS\", \"MODERNA\", '1'));\n", " self._test_createBatchCodeTable(createAndFilterDataFrameFromFiles(\"test/VAERS\", \"MODERNA\", '1'));\n",
"\n", "\n",
" def _test_createBatchCodeTable(self, dataFrame):\n", " def _test_createBatchCodeTable(self, dataFrame):\n",
" # When\n", " # When\n",
@@ -340,7 +341,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"def saveBatchCodeTable(manufacturer, excelFile):\n", "def saveBatchCodeTable(manufacturer, excelFile):\n",
" batchCodeTable = createBatchCodeTable(createDataFrameFromFiles(\"VAERS\", manufacturer, '1'))\n", " batchCodeTable = createBatchCodeTable(createAndFilterDataFrameFromFiles(\"VAERS\", manufacturer, '1'))\n",
" display(manufacturer, batchCodeTable)\n", " display(manufacturer, batchCodeTable)\n",
" batchCodeTable.to_excel(excelFile)" " batchCodeTable.to_excel(excelFile)"
] ]