refactoring

This commit is contained in:
frankknoll
2022-02-05 15:07:40 +01:00
parent 95e38b2ba3
commit 8345b9b317

View File

@@ -14,6 +14,26 @@
"pd.set_option('display.max_columns', None)" "pd.set_option('display.max_columns', None)"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"id": "b136967b",
"metadata": {},
"outputs": [],
"source": [
"class DataFrameConverter:\n",
" \n",
" @staticmethod\n",
" def convertColumnsOfDataFrameToNumerics(dataFrame, columns):\n",
" for column in columns:\n",
" DataFrameConverter._convertColumnOfDataFrameToNumeric(dataFrame, column)\n",
"\n",
" @staticmethod\n",
" def _convertColumnOfDataFrameToNumeric(dataFrame, column):\n",
" dataFrame[column] = np.where(dataFrame[column] == 'Y', 1, 0)\n",
" "
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
@@ -45,7 +65,7 @@
" VAERSDATA = self._read_csv(\n", " VAERSDATA = self._read_csv(\n",
" file = file,\n", " file = file,\n",
" usecols = ['VAERS_ID', 'DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'])\n", " usecols = ['VAERS_ID', 'DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'])\n",
" VaersDescrReader._convertColumnsOfDataFrameToNumerics(\n", " DataFrameConverter.convertColumnsOfDataFrameToNumerics(\n",
" VAERSDATA,\n", " VAERSDATA,\n",
" ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'])\n", " ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'])\n",
" return VAERSDATA\n", " return VAERSDATA\n",
@@ -62,16 +82,7 @@
" index_col = 'VAERS_ID',\n", " index_col = 'VAERS_ID',\n",
" encoding = 'latin1',\n", " encoding = 'latin1',\n",
" low_memory = False,\n", " low_memory = False,\n",
" **kwargs)\n", " **kwargs)\n"
"\n",
" @staticmethod\n",
" def _convertColumnsOfDataFrameToNumerics(dataFrame, columns):\n",
" for column in columns:\n",
" VaersDescrReader._convertColumnOfDataFrameToNumeric(dataFrame, column)\n",
"\n",
" @staticmethod\n",
" def _convertColumnOfDataFrameToNumeric(dataFrame, column):\n",
" dataFrame[column] = np.where(dataFrame[column] == 'Y', 1, 0)\n"
] ]
}, },
{ {
@@ -141,7 +152,7 @@
"class AggregationHelper:\n", "class AggregationHelper:\n",
"\n", "\n",
" @staticmethod\n", " @staticmethod\n",
" def aggregateFlattenColumnsRenameColumns(dataFrame, aggFunctionsByColumn, columnNameMappingsDict):\n", " def aggregateAndFlattenColumnsAndRenameColumns(dataFrame, aggFunctionsByColumn, columnNameMappingsDict):\n",
" aggregatedDataFrame = dataFrame.agg(aggFunctionsByColumn)\n", " aggregatedDataFrame = dataFrame.agg(aggFunctionsByColumn)\n",
" AggregationHelper._flattenColumns(aggregatedDataFrame)\n", " AggregationHelper._flattenColumns(aggregatedDataFrame)\n",
" return aggregatedDataFrame.rename(columns = columnNameMappingsDict)\n", " return aggregatedDataFrame.rename(columns = columnNameMappingsDict)\n",
@@ -166,7 +177,7 @@
" self.dataFrame = dataFrame \n", " self.dataFrame = dataFrame \n",
"\n", "\n",
" def createBatchCodeTable(self):\n", " def createBatchCodeTable(self):\n",
" batchCodeTable = AggregationHelper.aggregateFlattenColumnsRenameColumns(\n", " batchCodeTable = AggregationHelper.aggregateAndFlattenColumnsAndRenameColumns(\n",
" dataFrame = self.dataFrame.groupby('VAX_LOT'),\n", " dataFrame = self.dataFrame.groupby('VAX_LOT'),\n",
" aggFunctionsByColumn = {\n", " aggFunctionsByColumn = {\n",
" 'DIED': ['sum', 'size'],\n", " 'DIED': ['sum', 'size'],\n",
@@ -183,7 +194,7 @@
"\n", "\n",
" # create table from https://www.howbadismybatch.com/combined.html\n", " # create table from https://www.howbadismybatch.com/combined.html\n",
" def createSevereEffectsBatchCodeTable(self):\n", " def createSevereEffectsBatchCodeTable(self):\n",
" batchCodeTable = AggregationHelper.aggregateFlattenColumnsRenameColumns(\n", " batchCodeTable = AggregationHelper.aggregateAndFlattenColumnsAndRenameColumns(\n",
" dataFrame = self.dataFrame.groupby('VAX_LOT'),\n", " dataFrame = self.dataFrame.groupby('VAX_LOT'),\n",
" aggFunctionsByColumn = {\n", " aggFunctionsByColumn = {\n",
" 'DIED': ['sum', 'size'],\n", " 'DIED': ['sum', 'size'],\n",
@@ -246,7 +257,7 @@
" \n", " \n",
" @staticmethod\n", " @staticmethod\n",
" def getDoseTable(dataFrame):\n", " def getDoseTable(dataFrame):\n",
" doseTable = AggregationHelper.aggregateFlattenColumnsRenameColumns(\n", " doseTable = AggregationHelper.aggregateAndFlattenColumnsAndRenameColumns(\n",
" dataFrame = dataFrame.groupby('VAX_DOSE_SERIES'),\n", " dataFrame = dataFrame.groupby('VAX_DOSE_SERIES'),\n",
" aggFunctionsByColumn = {\n", " aggFunctionsByColumn = {\n",
" 'DIED': ['sum', 'size'],\n", " 'DIED': ['sum', 'size'],\n",