From 95e38b2ba318f25e3511999139f708460a61c937 Mon Sep 17 00:00:00 2001 From: frankknoll Date: Sat, 5 Feb 2022 15:01:27 +0100 Subject: [PATCH] refactoring --- HowBadIsMyBatch.ipynb | 66 +++++++++++++++++++++++-------------------- 1 file changed, 36 insertions(+), 30 deletions(-) diff --git a/HowBadIsMyBatch.ipynb b/HowBadIsMyBatch.ipynb index b587a747521..6a2efd5a818 100644 --- a/HowBadIsMyBatch.ipynb +++ b/HowBadIsMyBatch.ipynb @@ -131,6 +131,26 @@ " return self.dataFrame[\"VAX_DOSE_SERIES\"].str.contains(dose) if dose is not None else True\n" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "c62cfaff", + "metadata": {}, + "outputs": [], + "source": [ + "class AggregationHelper:\n", + "\n", + " @staticmethod\n", + " def aggregateFlattenColumnsRenameColumns(dataFrame, aggFunctionsByColumn, columnNameMappingsDict):\n", + " aggregatedDataFrame = dataFrame.agg(aggFunctionsByColumn)\n", + " AggregationHelper._flattenColumns(aggregatedDataFrame)\n", + " return aggregatedDataFrame.rename(columns = columnNameMappingsDict)\n", + "\n", + " @staticmethod\n", + " def _flattenColumns(dataFrame):\n", + " dataFrame.columns = [\"_\".join(a) for a in dataFrame.columns.to_flat_index()]\n" + ] + }, { "cell_type": "code", "execution_count": null, @@ -146,16 +166,14 @@ " self.dataFrame = dataFrame \n", "\n", " def createBatchCodeTable(self):\n", - " batchCodeTable = self.dataFrame.groupby('VAX_LOT').agg(\n", - " {\n", + " batchCodeTable = AggregationHelper.aggregateFlattenColumnsRenameColumns(\n", + " dataFrame = self.dataFrame.groupby('VAX_LOT'),\n", + " aggFunctionsByColumn = {\n", " 'DIED': ['sum', 'size'],\n", " 'L_THREAT': 'sum',\n", " 'DISABLE': 'sum'\n", - " })\n", - " self._flattenColumns(batchCodeTable)\n", - " batchCodeTable = batchCodeTable.rename(\n", - " columns =\n", - " {\n", + " },\n", + " columnNameMappingsDict = {\n", " \"DIED_size\": \"ADRs\",\n", " \"DIED_sum\": \"DEATHS\",\n", " \"L_THREAT_sum\": \"LIFE THREATENING ILLNESSES\",\n", @@ -165,18 +183,16 @@ "\n", " # create table from https://www.howbadismybatch.com/combined.html\n", " def createSevereEffectsBatchCodeTable(self):\n", - " batchCodeTable = self.dataFrame.groupby('VAX_LOT').agg(\n", - " {\n", + " batchCodeTable = AggregationHelper.aggregateFlattenColumnsRenameColumns(\n", + " dataFrame = self.dataFrame.groupby('VAX_LOT'),\n", + " aggFunctionsByColumn = {\n", " 'DIED': ['sum', 'size'],\n", " 'L_THREAT': 'sum',\n", " 'DISABLE': 'sum',\n", " 'HOSPITAL': 'sum',\n", " 'ER_VISIT': 'sum'\n", - " })\n", - " self._flattenColumns(batchCodeTable)\n", - " batchCodeTable = batchCodeTable.rename(\n", - " columns =\n", - " {\n", + " },\n", + " columnNameMappingsDict = {\n", " \"DIED_size\": \"ADRs\",\n", " \"DIED_sum\": \"DEATHS\",\n", " \"L_THREAT_sum\": \"LIFE THREATENING ILLNESSES\",\n", @@ -204,9 +220,6 @@ " manufacturerByBatchCodeTable = manufacturerByBatchCodeTable.drop_duplicates(subset = ['VAX_LOT'])\n", " return manufacturerByBatchCodeTable.set_index('VAX_LOT')\n", "\n", - " def _flattenColumns(self, batchCodeTable):\n", - " batchCodeTable.columns = [\"_\".join(a) for a in batchCodeTable.columns.to_flat_index()]\n", - "\n", "\n", "class BatchCodeTableFactory:\n", "\n", @@ -233,28 +246,21 @@ " \n", " @staticmethod\n", " def getDoseTable(dataFrame):\n", - " doseTable = dataFrame.groupby('VAX_DOSE_SERIES').agg(\n", - " {\n", + " doseTable = AggregationHelper.aggregateFlattenColumnsRenameColumns(\n", + " dataFrame = dataFrame.groupby('VAX_DOSE_SERIES'),\n", + " aggFunctionsByColumn = {\n", " 'DIED': ['sum', 'size'],\n", " 'L_THREAT': 'sum',\n", " 'DISABLE': 'sum'\n", - " })\n", - " DoseAnalysis._flattenColumns(doseTable)\n", - " doseTable = doseTable.rename(\n", - " columns =\n", - " {\n", + " },\n", + " columnNameMappingsDict = {\n", " \"DIED_size\": \"Total reports\",\n", " \"DIED_sum\": \"Deaths\",\n", " \"L_THREAT_sum\": \"Life Threatening Illnesses\",\n", " \"DISABLE_sum\": \"Disabilities\"\n", " })[['Total reports', 'Deaths', 'Disabilities', 'Life Threatening Illnesses']]\n", " doseTable['Severe reports (%)'] = (doseTable['Deaths'] + doseTable['Disabilities'] + doseTable['Life Threatening Illnesses']) / doseTable['Total reports'] * 100\n", - " return doseTable\n", - "\n", - " # FK-TODO: DRY with BatchCodeTableHelper\n", - " @staticmethod\n", - " def _flattenColumns(batchCodeTable):\n", - " batchCodeTable.columns = [\"_\".join(a) for a in batchCodeTable.columns.to_flat_index()]\n" + " return doseTable\n" ] }, {