From 9f9ae08d3c3dc0d97ead1f4378376a182a6c9809 Mon Sep 17 00:00:00 2001 From: frankknoll Date: Sat, 5 Feb 2022 17:39:46 +0100 Subject: [PATCH] refactoring --- HowBadIsMyBatch.ipynb | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/HowBadIsMyBatch.ipynb b/HowBadIsMyBatch.ipynb index b0c79621539..ec135989ac0 100644 --- a/HowBadIsMyBatch.ipynb +++ b/HowBadIsMyBatch.ipynb @@ -261,35 +261,23 @@ " \n", " @staticmethod\n", " def getDoseTable(dataFrame):\n", - " doseTable = AggregationHelper.aggregateAndFlattenColumnsAndRenameColumns(\n", - " dataFrame = dataFrame.groupby('VAX_DOSE_SERIES'),\n", - " aggFunctionsByColumn = {\n", - " 'DIED': ['sum', 'size'],\n", - " 'L_THREAT': 'sum',\n", - " 'DISABLE': 'sum'\n", - " },\n", - " columnNameMappingsDict = {\n", - " \"DIED_size\": \"Total reports\",\n", - " \"DIED_sum\": \"Deaths\",\n", - " \"L_THREAT_sum\": \"Life Threatening Illnesses\",\n", - " \"DISABLE_sum\": \"Disabilities\"\n", - " })\n", - " doseTable = doseTable[['Total reports', 'Deaths', 'Disabilities', 'Life Threatening Illnesses']]\n", - " doseTable['Severe reports (%)'] = (doseTable['Deaths'] + doseTable['Disabilities'] + doseTable['Life Threatening Illnesses']) / doseTable['Total reports'] * 100\n", - " return doseTable\n", + " return DoseAnalysis._getDoseTable(dataFrame.groupby('VAX_DOSE_SERIES'))\n", "\n", " @staticmethod\n", " def getDoseByMonthTable(dataFrame):\n", " # https://stackoverflow.com/questions/61879166/pandas-groupby-month-and-year-date-as-datetime64ns-and-summarized-by-count\n", - " grouped = dataFrame.groupby(\n", + " return DoseAnalysis._getDoseTable(\n", + " dataFrame.groupby(\n", " [\n", " dataFrame['RECVDATE'].dt.year.rename('year'),\n", " dataFrame['RECVDATE'].dt.month.rename('month'),\n", " dataFrame['VAX_DOSE_SERIES']\n", - " ])\n", - " # FK-TODO: DRY with getDoseTable()\n", + " ]))\n", + "\n", + " @staticmethod\n", + " def _getDoseTable(dataFrame):\n", " doseTable = AggregationHelper.aggregateAndFlattenColumnsAndRenameColumns(\n", - " dataFrame = grouped,\n", + " dataFrame = dataFrame,\n", " aggFunctionsByColumn = {\n", " 'DIED': ['sum', 'size'],\n", " 'L_THREAT': 'sum',\n",