refactoring

This commit is contained in:
frankknoll
2022-02-05 15:07:40 +01:00
parent 95e38b2ba3
commit 8345b9b317

View File

@@ -14,6 +14,26 @@
"pd.set_option('display.max_columns', None)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b136967b",
"metadata": {},
"outputs": [],
"source": [
"class DataFrameConverter:\n",
" \n",
" @staticmethod\n",
" def convertColumnsOfDataFrameToNumerics(dataFrame, columns):\n",
" for column in columns:\n",
" DataFrameConverter._convertColumnOfDataFrameToNumeric(dataFrame, column)\n",
"\n",
" @staticmethod\n",
" def _convertColumnOfDataFrameToNumeric(dataFrame, column):\n",
" dataFrame[column] = np.where(dataFrame[column] == 'Y', 1, 0)\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
@@ -45,7 +65,7 @@
" VAERSDATA = self._read_csv(\n",
" file = file,\n",
" usecols = ['VAERS_ID', 'DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'])\n",
" VaersDescrReader._convertColumnsOfDataFrameToNumerics(\n",
" DataFrameConverter.convertColumnsOfDataFrameToNumerics(\n",
" VAERSDATA,\n",
" ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'])\n",
" return VAERSDATA\n",
@@ -62,16 +82,7 @@
" index_col = 'VAERS_ID',\n",
" encoding = 'latin1',\n",
" low_memory = False,\n",
" **kwargs)\n",
"\n",
" @staticmethod\n",
" def _convertColumnsOfDataFrameToNumerics(dataFrame, columns):\n",
" for column in columns:\n",
" VaersDescrReader._convertColumnOfDataFrameToNumeric(dataFrame, column)\n",
"\n",
" @staticmethod\n",
" def _convertColumnOfDataFrameToNumeric(dataFrame, column):\n",
" dataFrame[column] = np.where(dataFrame[column] == 'Y', 1, 0)\n"
" **kwargs)\n"
]
},
{
@@ -141,7 +152,7 @@
"class AggregationHelper:\n",
"\n",
" @staticmethod\n",
" def aggregateFlattenColumnsRenameColumns(dataFrame, aggFunctionsByColumn, columnNameMappingsDict):\n",
" def aggregateAndFlattenColumnsAndRenameColumns(dataFrame, aggFunctionsByColumn, columnNameMappingsDict):\n",
" aggregatedDataFrame = dataFrame.agg(aggFunctionsByColumn)\n",
" AggregationHelper._flattenColumns(aggregatedDataFrame)\n",
" return aggregatedDataFrame.rename(columns = columnNameMappingsDict)\n",
@@ -166,7 +177,7 @@
" self.dataFrame = dataFrame \n",
"\n",
" def createBatchCodeTable(self):\n",
" batchCodeTable = AggregationHelper.aggregateFlattenColumnsRenameColumns(\n",
" batchCodeTable = AggregationHelper.aggregateAndFlattenColumnsAndRenameColumns(\n",
" dataFrame = self.dataFrame.groupby('VAX_LOT'),\n",
" aggFunctionsByColumn = {\n",
" 'DIED': ['sum', 'size'],\n",
@@ -183,7 +194,7 @@
"\n",
" # create table from https://www.howbadismybatch.com/combined.html\n",
" def createSevereEffectsBatchCodeTable(self):\n",
" batchCodeTable = AggregationHelper.aggregateFlattenColumnsRenameColumns(\n",
" batchCodeTable = AggregationHelper.aggregateAndFlattenColumnsAndRenameColumns(\n",
" dataFrame = self.dataFrame.groupby('VAX_LOT'),\n",
" aggFunctionsByColumn = {\n",
" 'DIED': ['sum', 'size'],\n",
@@ -246,7 +257,7 @@
" \n",
" @staticmethod\n",
" def getDoseTable(dataFrame):\n",
" doseTable = AggregationHelper.aggregateFlattenColumnsRenameColumns(\n",
" doseTable = AggregationHelper.aggregateAndFlattenColumnsAndRenameColumns(\n",
" dataFrame = dataFrame.groupby('VAX_DOSE_SERIES'),\n",
" aggFunctionsByColumn = {\n",
" 'DIED': ['sum', 'size'],\n",