From 05eea3e207ee50b94637d22a31fe1fb75a37a21b Mon Sep 17 00:00:00 2001 From: frankknoll Date: Sun, 30 Jan 2022 22:31:01 +0100 Subject: [PATCH] refactoring --- HowBadIsMyBatch.ipynb | 45 ++++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/HowBadIsMyBatch.ipynb b/HowBadIsMyBatch.ipynb index ecd994e1bcc..87f429166d4 100644 --- a/HowBadIsMyBatch.ipynb +++ b/HowBadIsMyBatch.ipynb @@ -21,27 +21,28 @@ "metadata": {}, "outputs": [], "source": [ - "def filter(df, manufacturer, dose):\n", + "def filterDataFrame(df, manufacturer, dose):\n", " return df[\n", " (df[\"VAX_TYPE\"] == \"COVID19\") &\n", " (df[\"VAX_MANU\"] == manufacturer) &\n", " (df[\"VAX_DOSE_SERIES\"].str.contains(dose))]\n", "\n", - "def createDataFrameFromDescr(vaersDescr, manufacturer, dose):\n", - " return filter(\n", - " pd.merge(\n", + "def createDataFrameFromDescr(vaersDescr):\n", + " return pd.merge(\n", " vaersDescr['VAERSDATA'],\n", " vaersDescr['VAERSVAX'],\n", " how='left',\n", " left_index = True,\n", " right_index = True,\n", - " validate = 'one_to_many'),\n", - " manufacturer,\n", - " dose)\n", + " validate = 'one_to_many')\n", "\n", - "def createDataFrameFromDescrs(vaersDescrs, manufacturer, dose):\n", - " _createDataFrameFromDescr = lambda vaersDescr: createDataFrameFromDescr(vaersDescr, manufacturer, dose)\n", - " dataFrames = map(_createDataFrameFromDescr, vaersDescrs)\n", + "def createAndFilterDataFrameFromDescr(vaersDescr, manufacturer, dose):\n", + " dataFrame = createDataFrameFromDescr(vaersDescr)\n", + " return filterDataFrame(dataFrame, manufacturer, dose)\n", + "\n", + "def createAndFilterDataFrameFromDescrs(vaersDescrs, manufacturer, dose):\n", + " _createAndFilterDataFrameFromDescr = lambda vaersDescr: createAndFilterDataFrameFromDescr(vaersDescr, manufacturer, dose)\n", + " dataFrames = map(_createAndFilterDataFrameFromDescr, vaersDescrs)\n", " return pd.concat(dataFrames)\n" ] }, @@ -75,8 +76,8 @@ " dtype = {\"VAX_DOSE_SERIES\": \"string\"})\n", " }\n", "\n", - "def createDataFrameFromFiles(dataDir, manufacturer, dose):\n", - " return createDataFrameFromDescrs(\n", + "def createAndFilterDataFrameFromFiles(dataDir, manufacturer, dose):\n", + " return createAndFilterDataFrameFromDescrs(\n", " [readVaersDescr(dataDir, \"2021\"), readVaersDescr(dataDir, \"2022\")],\n", " manufacturer,\n", " dose)" @@ -121,9 +122,9 @@ "source": [ "from pandas.testing import assert_frame_equal\n", "\n", - "class CreateDataFrameTest(unittest.TestCase):\n", + "class CreateAndFilterDataFrameTest(unittest.TestCase):\n", "\n", - " def test_createDataFrameFromDescrs(self):\n", + " def test_createAndFilterDataFrameFromDescrs(self):\n", " # Given\n", " vaersDescrs = [\n", " {\n", @@ -163,7 +164,7 @@ " ]\n", " \n", " # When\n", - " dataFrame = createDataFrameFromDescrs(vaersDescrs, \"MODERNA\", '1')\n", + " dataFrame = createAndFilterDataFrameFromDescrs(vaersDescrs, \"MODERNA\", '1')\n", " \n", " # Then\n", " dataFrameExpected = self.createDataFrame(\n", @@ -178,7 +179,7 @@ " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", " assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)\n", "\n", - " def test_createDataFrameFromDescrsWithFirstDose(self):\n", + " def test_createAndFilterDataFrameFromDescrsWithFirstDose(self):\n", " # Given\n", " vaersDescrs = [\n", " {\n", @@ -199,7 +200,7 @@ " ]\n", " \n", " # When\n", - " dataFrame = createDataFrameFromDescrs(vaersDescrs, \"MODERNA\", '1')\n", + " dataFrame = createAndFilterDataFrameFromDescrs(vaersDescrs, \"MODERNA\", '1')\n", " \n", " # Then\n", " dataFrameExpected = self.createDataFrame(\n", @@ -210,7 +211,7 @@ " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", " assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)\n", "\n", - " def test_createDataFrameFromDescrsWithSecondDose(self):\n", + " def test_createAndFilterDataFrameFromDescrsWithSecondDose(self):\n", " # Given\n", " vaersDescrs = [\n", " {\n", @@ -231,7 +232,7 @@ " ]\n", " \n", " # When\n", - " dataFrame = createDataFrameFromDescrs(vaersDescrs, \"MODERNA\", '2')\n", + " dataFrame = createAndFilterDataFrameFromDescrs(vaersDescrs, \"MODERNA\", '2')\n", " \n", " # Then\n", " dataFrameExpected = self.createDataFrame(\n", @@ -258,7 +259,7 @@ "class BatchCodeTableTest(unittest.TestCase):\n", "\n", " def test_createBatchCodeTable2(self):\n", - " dataFrame = createDataFrameFromDescrs(\n", + " dataFrame = createAndFilterDataFrameFromDescrs(\n", " [\n", " {\n", " 'VAERSDATA': self.createDataFrame(\n", @@ -301,7 +302,7 @@ " self._test_createBatchCodeTable(dataFrame);\n", "\n", " def test_createBatchCodeTable(self):\n", - " self._test_createBatchCodeTable(createDataFrameFromFiles(\"test/VAERS\", \"MODERNA\", '1'));\n", + " self._test_createBatchCodeTable(createAndFilterDataFrameFromFiles(\"test/VAERS\", \"MODERNA\", '1'));\n", "\n", " def _test_createBatchCodeTable(self, dataFrame):\n", " # When\n", @@ -340,7 +341,7 @@ "outputs": [], "source": [ "def saveBatchCodeTable(manufacturer, excelFile):\n", - " batchCodeTable = createBatchCodeTable(createDataFrameFromFiles(\"VAERS\", manufacturer, '1'))\n", + " batchCodeTable = createBatchCodeTable(createAndFilterDataFrameFromFiles(\"VAERS\", manufacturer, '1'))\n", " display(manufacturer, batchCodeTable)\n", " batchCodeTable.to_excel(excelFile)" ]