refactoring

This commit is contained in:
frankknoll
2022-02-05 15:01:27 +01:00
parent 6fc1722930
commit 95e38b2ba3

View File

@@ -131,6 +131,26 @@
" return self.dataFrame[\"VAX_DOSE_SERIES\"].str.contains(dose) if dose is not None else True\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c62cfaff",
"metadata": {},
"outputs": [],
"source": [
"class AggregationHelper:\n",
"\n",
" @staticmethod\n",
" def aggregateFlattenColumnsRenameColumns(dataFrame, aggFunctionsByColumn, columnNameMappingsDict):\n",
" aggregatedDataFrame = dataFrame.agg(aggFunctionsByColumn)\n",
" AggregationHelper._flattenColumns(aggregatedDataFrame)\n",
" return aggregatedDataFrame.rename(columns = columnNameMappingsDict)\n",
"\n",
" @staticmethod\n",
" def _flattenColumns(dataFrame):\n",
" dataFrame.columns = [\"_\".join(a) for a in dataFrame.columns.to_flat_index()]\n"
]
},
{
"cell_type": "code",
"execution_count": null,
@@ -146,16 +166,14 @@
" self.dataFrame = dataFrame \n",
"\n",
" def createBatchCodeTable(self):\n",
" batchCodeTable = self.dataFrame.groupby('VAX_LOT').agg(\n",
" {\n",
" batchCodeTable = AggregationHelper.aggregateFlattenColumnsRenameColumns(\n",
" dataFrame = self.dataFrame.groupby('VAX_LOT'),\n",
" aggFunctionsByColumn = {\n",
" 'DIED': ['sum', 'size'],\n",
" 'L_THREAT': 'sum',\n",
" 'DISABLE': 'sum'\n",
" })\n",
" self._flattenColumns(batchCodeTable)\n",
" batchCodeTable = batchCodeTable.rename(\n",
" columns =\n",
" {\n",
" },\n",
" columnNameMappingsDict = {\n",
" \"DIED_size\": \"ADRs\",\n",
" \"DIED_sum\": \"DEATHS\",\n",
" \"L_THREAT_sum\": \"LIFE THREATENING ILLNESSES\",\n",
@@ -165,18 +183,16 @@
"\n",
" # create table from https://www.howbadismybatch.com/combined.html\n",
" def createSevereEffectsBatchCodeTable(self):\n",
" batchCodeTable = self.dataFrame.groupby('VAX_LOT').agg(\n",
" {\n",
" batchCodeTable = AggregationHelper.aggregateFlattenColumnsRenameColumns(\n",
" dataFrame = self.dataFrame.groupby('VAX_LOT'),\n",
" aggFunctionsByColumn = {\n",
" 'DIED': ['sum', 'size'],\n",
" 'L_THREAT': 'sum',\n",
" 'DISABLE': 'sum',\n",
" 'HOSPITAL': 'sum',\n",
" 'ER_VISIT': 'sum'\n",
" })\n",
" self._flattenColumns(batchCodeTable)\n",
" batchCodeTable = batchCodeTable.rename(\n",
" columns =\n",
" {\n",
" },\n",
" columnNameMappingsDict = {\n",
" \"DIED_size\": \"ADRs\",\n",
" \"DIED_sum\": \"DEATHS\",\n",
" \"L_THREAT_sum\": \"LIFE THREATENING ILLNESSES\",\n",
@@ -204,9 +220,6 @@
" manufacturerByBatchCodeTable = manufacturerByBatchCodeTable.drop_duplicates(subset = ['VAX_LOT'])\n",
" return manufacturerByBatchCodeTable.set_index('VAX_LOT')\n",
"\n",
" def _flattenColumns(self, batchCodeTable):\n",
" batchCodeTable.columns = [\"_\".join(a) for a in batchCodeTable.columns.to_flat_index()]\n",
"\n",
"\n",
"class BatchCodeTableFactory:\n",
"\n",
@@ -233,28 +246,21 @@
" \n",
" @staticmethod\n",
" def getDoseTable(dataFrame):\n",
" doseTable = dataFrame.groupby('VAX_DOSE_SERIES').agg(\n",
" {\n",
" doseTable = AggregationHelper.aggregateFlattenColumnsRenameColumns(\n",
" dataFrame = dataFrame.groupby('VAX_DOSE_SERIES'),\n",
" aggFunctionsByColumn = {\n",
" 'DIED': ['sum', 'size'],\n",
" 'L_THREAT': 'sum',\n",
" 'DISABLE': 'sum'\n",
" })\n",
" DoseAnalysis._flattenColumns(doseTable)\n",
" doseTable = doseTable.rename(\n",
" columns =\n",
" {\n",
" },\n",
" columnNameMappingsDict = {\n",
" \"DIED_size\": \"Total reports\",\n",
" \"DIED_sum\": \"Deaths\",\n",
" \"L_THREAT_sum\": \"Life Threatening Illnesses\",\n",
" \"DISABLE_sum\": \"Disabilities\"\n",
" })[['Total reports', 'Deaths', 'Disabilities', 'Life Threatening Illnesses']]\n",
" doseTable['Severe reports (%)'] = (doseTable['Deaths'] + doseTable['Disabilities'] + doseTable['Life Threatening Illnesses']) / doseTable['Total reports'] * 100\n",
" return doseTable\n",
"\n",
" # FK-TODO: DRY with BatchCodeTableHelper\n",
" @staticmethod\n",
" def _flattenColumns(batchCodeTable):\n",
" batchCodeTable.columns = [\"_\".join(a) for a in batchCodeTable.columns.to_flat_index()]\n"
" return doseTable\n"
]
},
{