refactoring

This commit is contained in:
frankknoll
2022-02-06 17:09:54 +01:00
parent 6e46087bc1
commit 34b36f18e5

View File

@@ -14,30 +14,6 @@
"pd.set_option('display.max_columns', None)" "pd.set_option('display.max_columns', None)"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"id": "b136967b",
"metadata": {},
"outputs": [],
"source": [
"class DataFrameNormalizer:\n",
" \n",
" @staticmethod\n",
" def convertVAX_LOTColumnToUpperCase(dataFrame):\n",
" dataFrame['VAX_LOT'] = dataFrame['VAX_LOT'].str.upper()\n",
"\n",
" @staticmethod\n",
" def convertColumnsOfDataFrameToNumerics(dataFrame, columns):\n",
" for column in columns:\n",
" DataFrameNormalizer._convertColumnOfDataFrameToNumeric(dataFrame, column)\n",
"\n",
" @staticmethod\n",
" def _convertColumnOfDataFrameToNumeric(dataFrame, column):\n",
" dataFrame[column] = np.where(dataFrame[column] == 'Y', 1, 0)\n",
" "
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
@@ -66,23 +42,17 @@
" }\n", " }\n",
"\n", "\n",
" def _readVAERSDATA(self, file):\n", " def _readVAERSDATA(self, file):\n",
" VAERSDATA = self._read_csv(\n", " return self._read_csv(\n",
" file = file,\n", " file = file,\n",
" usecols = ['VAERS_ID', 'RECVDATE', 'DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'],\n", " usecols = ['VAERS_ID', 'RECVDATE', 'DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'],\n",
" parse_dates = ['RECVDATE'],\n", " parse_dates = ['RECVDATE'],\n",
" date_parser = lambda dateStr: pd.to_datetime(dateStr, format = \"%m/%d/%Y\"))\n", " date_parser = lambda dateStr: pd.to_datetime(dateStr, format = \"%m/%d/%Y\"))\n",
" DataFrameNormalizer.convertColumnsOfDataFrameToNumerics(\n",
" VAERSDATA,\n",
" ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'])\n",
" return VAERSDATA\n",
"\n", "\n",
" def _readVAERSVAX(self, file):\n", " def _readVAERSVAX(self, file):\n",
" VAERSVAX = self._read_csv(\n", " return self._read_csv(\n",
" file = file,\n", " file = file,\n",
" usecols = ['VAERS_ID', 'VAX_DOSE_SERIES', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT'],\n", " usecols = ['VAERS_ID', 'VAX_DOSE_SERIES', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT'],\n",
" dtype = {\"VAX_DOSE_SERIES\": \"string\"})\n", " dtype = {\"VAX_DOSE_SERIES\": \"string\"})\n",
" DataFrameNormalizer.convertVAX_LOTColumnToUpperCase(VAERSVAX)\n",
" return VAERSVAX\n",
"\n", "\n",
" def _read_csv(self, file, **kwargs):\n", " def _read_csv(self, file, **kwargs):\n",
" return pd.read_csv(\n", " return pd.read_csv(\n",
@@ -120,6 +90,37 @@
" return pd.concat(dataFrames)\n" " return pd.concat(dataFrames)\n"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"id": "6b639196",
"metadata": {},
"outputs": [],
"source": [
"class DataFrameNormalizer:\n",
" \n",
" @staticmethod\n",
" def normalize(dataFrame):\n",
" DataFrameNormalizer.convertVAX_LOTColumnToUpperCase(dataFrame)\n",
" DataFrameNormalizer.convertColumnsOfDataFrameToNumerics(\n",
" dataFrame,\n",
" ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'])\n",
"\n",
" @staticmethod\n",
" def convertVAX_LOTColumnToUpperCase(dataFrame):\n",
" dataFrame['VAX_LOT'] = dataFrame['VAX_LOT'].str.upper()\n",
"\n",
" @staticmethod\n",
" def convertColumnsOfDataFrameToNumerics(dataFrame, columns):\n",
" for column in columns:\n",
" DataFrameNormalizer._convertColumnOfDataFrameToNumeric(dataFrame, column)\n",
"\n",
" @staticmethod\n",
" def _convertColumnOfDataFrameToNumeric(dataFrame, column):\n",
" dataFrame[column] = np.where(dataFrame[column] == 'Y', 1, 0)\n",
" "
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
@@ -179,7 +180,7 @@
"class BatchCodeTableHelper:\n", "class BatchCodeTableHelper:\n",
" \n", " \n",
" def __init__(self, dataFrame : pd.DataFrame):\n", " def __init__(self, dataFrame : pd.DataFrame):\n",
" self.dataFrame = dataFrame \n", " self.dataFrame = dataFrame\n",
"\n", "\n",
" def createBatchCodeTable(self):\n", " def createBatchCodeTable(self):\n",
" batchCodeTable = AggregationHelper.aggregateAndFlattenColumnsAndRenameColumns(\n", " batchCodeTable = AggregationHelper.aggregateAndFlattenColumnsAndRenameColumns(\n",
@@ -244,8 +245,8 @@
" @staticmethod\n", " @staticmethod\n",
" def createBatchCodeTable(dataFrame : pd.DataFrame, manufacturer, dose):\n", " def createBatchCodeTable(dataFrame : pd.DataFrame, manufacturer, dose):\n",
" dataFrame = DataFrameFilter().filterByCovid19(dataFrame)\n", " dataFrame = DataFrameFilter().filterByCovid19(dataFrame)\n",
" filteredDataFrame = DataFrameFilter().filterBy(dataFrame, manufacturer = manufacturer, dose = dose)\n", " dataFrame = DataFrameFilter().filterBy(dataFrame, manufacturer = manufacturer, dose = dose)\n",
" return BatchCodeTableHelper(filteredDataFrame).createBatchCodeTable()\n", " return BatchCodeTableHelper(dataFrame).createBatchCodeTable()\n",
"\n", "\n",
" # create table from https://www.howbadismybatch.com/combined.html\n", " # create table from https://www.howbadismybatch.com/combined.html\n",
" @staticmethod\n", " @staticmethod\n",
@@ -630,6 +631,7 @@
" def test_createBatchCodeTable(self):\n", " def test_createBatchCodeTable(self):\n",
" dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(\n", " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(\n",
" VaersDescrReader(dataDir = \"test/VAERS\").readAllVaersDescrs())\n", " VaersDescrReader(dataDir = \"test/VAERS\").readAllVaersDescrs())\n",
" DataFrameNormalizer.normalize(dataFrame)\n",
" self._test_createBatchCodeTable(dataFrame, \"MODERNA\", '1')\n", " self._test_createBatchCodeTable(dataFrame, \"MODERNA\", '1')\n",
"\n", "\n",
" def _test_createBatchCodeTable(self, dataFrame, manufacturer, dose):\n", " def _test_createBatchCodeTable(self, dataFrame, manufacturer, dose):\n",
@@ -751,6 +753,7 @@
"def saveBatchCodeTable(manufacturer, excelFile):\n", "def saveBatchCodeTable(manufacturer, excelFile):\n",
" vaersDescrs = VaersDescrReader(dataDir = \"VAERS\").readAllVaersDescrs()\n", " vaersDescrs = VaersDescrReader(dataDir = \"VAERS\").readAllVaersDescrs()\n",
" dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n", " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n",
" DataFrameNormalizer.normalize(dataFrame)\n",
" batchCodeTable = BatchCodeTableFactory.createBatchCodeTable(dataFrame, manufacturer = manufacturer, dose = '1')\n", " batchCodeTable = BatchCodeTableFactory.createBatchCodeTable(dataFrame, manufacturer = manufacturer, dose = '1')\n",
" display(batchCodeTable)\n", " display(batchCodeTable)\n",
" batchCodeTable.to_excel(excelFile)" " batchCodeTable.to_excel(excelFile)"
@@ -831,6 +834,7 @@
"def saveSevereEffectsBatchCodeTable(excelFile):\n", "def saveSevereEffectsBatchCodeTable(excelFile):\n",
" vaersDescrs = VaersDescrReader(dataDir = \"VAERS\").readAllVaersDescrs()\n", " vaersDescrs = VaersDescrReader(dataDir = \"VAERS\").readAllVaersDescrs()\n",
" dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n", " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n",
" DataFrameNormalizer.normalize(dataFrame)\n",
" severeEffectsBatchCodeTable = BatchCodeTableFactory.createSevereEffectsBatchCodeTable(dataFrame, dose = '1')\n", " severeEffectsBatchCodeTable = BatchCodeTableFactory.createSevereEffectsBatchCodeTable(dataFrame, dose = '1')\n",
" display(severeEffectsBatchCodeTable)\n", " display(severeEffectsBatchCodeTable)\n",
" severeEffectsBatchCodeTable.to_excel(excelFile)" " severeEffectsBatchCodeTable.to_excel(excelFile)"
@@ -866,11 +870,13 @@
"def getDoseTable():\n", "def getDoseTable():\n",
" vaersDescrs = VaersDescrReader(dataDir = \"VAERS\").readAllVaersDescrs()\n", " vaersDescrs = VaersDescrReader(dataDir = \"VAERS\").readAllVaersDescrs()\n",
" dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n", " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n",
" DataFrameNormalizer.normalize(dataFrame)\n",
" return DoseAnalysis.getDoseTable(dataFrame)\n", " return DoseAnalysis.getDoseTable(dataFrame)\n",
"\n", "\n",
"def getDoseByMonthTable():\n", "def getDoseByMonthTable():\n",
" vaersDescrs = VaersDescrReader(dataDir = \"VAERS\").readAllVaersDescrs()\n", " vaersDescrs = VaersDescrReader(dataDir = \"VAERS\").readAllVaersDescrs()\n",
" dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n", " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n",
" DataFrameNormalizer.normalize(dataFrame)\n",
" return DoseAnalysis.getDoseByMonthTable(dataFrame)" " return DoseAnalysis.getDoseByMonthTable(dataFrame)"
] ]
}, },