refactoring

This commit is contained in:
frankknoll
2022-02-01 09:34:35 +01:00
parent 20915c14b8
commit 6a9b8fca89

View File

@@ -40,6 +40,7 @@
" 'VAERSDATA':\n", " 'VAERSDATA':\n",
" self._read_csv(\n", " self._read_csv(\n",
" folder + year + \"VAERSDATA.csv\",\n", " folder + year + \"VAERSDATA.csv\",\n",
" # FK-TODO: use Column enum\n",
" ['VAERS_ID', 'DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT']),\n", " ['VAERS_ID', 'DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT']),\n",
" 'VAERSVAX':\n", " 'VAERSVAX':\n",
" self._read_csv(\n", " self._read_csv(\n",
@@ -122,51 +123,59 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"def createBatchCodeTable(df : pd.DataFrame):\n", "import pandas as pd\n",
" def filterDataFrame(df, col):\n",
" return df[df[col] == 'Y'][['VAX_LOT']]\n",
"\n", "\n",
" batchCodeTableDict = {\n", "class BatchCodeTableFactory:\n",
" 'ADRs': df[['VAX_LOT']].value_counts(),\n", " \n",
" 'DEATHS': filterDataFrame(df, 'DIED').value_counts(),\n", " @staticmethod\n",
" 'DISABILITIES': filterDataFrame(df, 'DISABLE').value_counts(),\n", " def createBatchCodeTable(df : pd.DataFrame):\n",
" 'LIFE THREATENING ILLNESSES': filterDataFrame(df, 'L_THREAT').value_counts()\n", " def filterDataFrame(df, col):\n",
" }\n", " return df[df[col] == 'Y'][['VAX_LOT']]\n",
" return pd.concat(batchCodeTableDict, axis = 'columns').replace(to_replace = np.nan, value = 0)\n",
"\n", "\n",
"def createManufacturerByBatchCodeTable(df):\n", " batchCodeTableDict = {\n",
" manufacturerByBatchCodeTable = df[['VAX_LOT', 'VAX_MANU']]\n", " 'ADRs': df[['VAX_LOT']].value_counts(),\n",
" manufacturerByBatchCodeTable = manufacturerByBatchCodeTable.drop_duplicates(subset = ['VAX_LOT'])\n", " 'DEATHS': filterDataFrame(df, 'DIED').value_counts(),\n",
" return manufacturerByBatchCodeTable.set_index('VAX_LOT')\n", " 'DISABILITIES': filterDataFrame(df, 'DISABLE').value_counts(),\n",
" 'LIFE THREATENING ILLNESSES': filterDataFrame(df, 'L_THREAT').value_counts()\n",
" }\n",
" return pd.concat(batchCodeTableDict, axis = 'columns').replace(to_replace = np.nan, value = 0)\n",
"\n", "\n",
"def createCompanyByBatchCodeTable(df):\n", " # create table from https://www.howbadismybatch.com/combined.html\n",
" return createManufacturerByBatchCodeTable(df).rename(columns = {\"VAX_MANU\": \"COMPANY\"})\n", " # FK-TODO: DRY with createBatchCodeTable()\n",
" @staticmethod\n",
" def createSevereEffectsBatchCodeTable(df : pd.DataFrame):\n",
" def filterDataFrame(df, col):\n",
" return df[df[col] == 'Y']['VAX_LOT']\n",
"\n", "\n",
"# create table from https://www.howbadismybatch.com/combined.html\n", " batchCodeTableDict = {\n",
"# FK-TODO: DRY with createBatchCodeTable()\n", " 'ADRs': df['VAX_LOT'].value_counts(),\n",
"def createSevereEffectsBatchCodeTable(df):\n", " 'DEATHS': filterDataFrame(df, 'DIED').value_counts(),\n",
" def filterDataFrame(df, col):\n", " 'DISABILITIES': filterDataFrame(df, 'DISABLE').value_counts(),\n",
" return df[df[col] == 'Y']['VAX_LOT']\n", " 'LIFE THREATENING ILLNESSES': filterDataFrame(df, 'L_THREAT').value_counts(),\n",
" 'HOSPITALISATIONS': filterDataFrame(df, 'HOSPITAL').value_counts(),\n",
" 'EMERGENCY ROOM OR DOCTOR VISITS': filterDataFrame(df, 'ER_VISIT').value_counts()\n",
" }\n",
" batchCodeTable = pd.concat(batchCodeTableDict, axis = 'columns')\n",
" batchCodeTable.index.name = 'VAX_LOT'\n",
" # add Company column:\n",
" batchCodeTable = pd.merge(\n",
" batchCodeTable,\n",
" BatchCodeTableFactory._createCompanyByBatchCodeTable(df),\n",
" how = 'left',\n",
" left_index = True,\n",
" right_index = True,\n",
" validate = 'one_to_one')\n",
" return batchCodeTable.replace(to_replace = np.nan, value = 0)\n",
"\n", "\n",
" batchCodeTableDict = {\n", " @staticmethod\n",
" 'ADRs': df['VAX_LOT'].value_counts(),\n", " def _createManufacturerByBatchCodeTable(df):\n",
" 'DEATHS': filterDataFrame(df, 'DIED').value_counts(),\n", " manufacturerByBatchCodeTable = df[['VAX_LOT', 'VAX_MANU']]\n",
" 'DISABILITIES': filterDataFrame(df, 'DISABLE').value_counts(),\n", " manufacturerByBatchCodeTable = manufacturerByBatchCodeTable.drop_duplicates(subset = ['VAX_LOT'])\n",
" 'LIFE THREATENING ILLNESSES': filterDataFrame(df, 'L_THREAT').value_counts(),\n", " return manufacturerByBatchCodeTable.set_index('VAX_LOT')\n",
" 'HOSPITALISATIONS': filterDataFrame(df, 'HOSPITAL').value_counts(),\n", "\n",
" 'EMERGENCY ROOM OR DOCTOR VISITS': filterDataFrame(df, 'ER_VISIT').value_counts()\n", " @staticmethod\n",
" }\n", " def _createCompanyByBatchCodeTable(df):\n",
" batchCodeTable = pd.concat(batchCodeTableDict, axis = 'columns')\n", " return BatchCodeTableFactory._createManufacturerByBatchCodeTable(df).rename(columns = {\"VAX_MANU\": \"COMPANY\"})\n"
" batchCodeTable.index.name = 'VAX_LOT'\n",
" # add Company column:\n",
" batchCodeTable = pd.merge(\n",
" batchCodeTable,\n",
" createCompanyByBatchCodeTable(df),\n",
" how = 'left',\n",
" left_index = True,\n",
" right_index = True,\n",
" validate = 'one_to_one')\n",
" return batchCodeTable.replace(to_replace = np.nan, value = 0)\n"
] ]
}, },
{ {
@@ -415,7 +424,7 @@
"\n", "\n",
" def _test_createBatchCodeTable(self, dataFrame):\n", " def _test_createBatchCodeTable(self, dataFrame):\n",
" # When\n", " # When\n",
" batchCodeTable = createBatchCodeTable(dataFrame)\n", " batchCodeTable = BatchCodeTableFactory.createBatchCodeTable(dataFrame)\n",
"\n", "\n",
" # Then\n", " # Then\n",
" batchCodeTableExpected = pd.DataFrame(\n", " batchCodeTableExpected = pd.DataFrame(\n",
@@ -469,7 +478,7 @@
" dataFrame = DataFrameFilter(dataFrame).filterForSevereEffects(dose = '1')\n", " dataFrame = DataFrameFilter(dataFrame).filterForSevereEffects(dose = '1')\n",
"\n", "\n",
" # When\n", " # When\n",
" batchCodeTable = createSevereEffectsBatchCodeTable(dataFrame)\n", " batchCodeTable = BatchCodeTableFactory.createSevereEffectsBatchCodeTable(dataFrame)\n",
"\n", "\n",
" # Then\n", " # Then\n",
" batchCodeTableExpected = pd.DataFrame(\n", " batchCodeTableExpected = pd.DataFrame(\n",
@@ -510,7 +519,7 @@
" vaersDescrs = VaersDescrReader(\"VAERS\").readAllVaersDescrs()\n", " vaersDescrs = VaersDescrReader(\"VAERS\").readAllVaersDescrs()\n",
" dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n", " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n",
" dataFrameFilter = DataFrameFilter(dataFrame)\n", " dataFrameFilter = DataFrameFilter(dataFrame)\n",
" batchCodeTable = createBatchCodeTable(dataFrameFilter.filterBy(manufacturer = manufacturer, dose = '1'))\n", " batchCodeTable = BatchCodeTableFactory.createBatchCodeTable(dataFrameFilter.filterBy(manufacturer = manufacturer, dose = '1'))\n",
" display(manufacturer, batchCodeTable)\n", " display(manufacturer, batchCodeTable)\n",
" batchCodeTable.to_excel(excelFile)" " batchCodeTable.to_excel(excelFile)"
] ]
@@ -538,7 +547,7 @@
" vaersDescrs = VaersDescrReader(\"VAERS\").readAllVaersDescrs()\n", " vaersDescrs = VaersDescrReader(\"VAERS\").readAllVaersDescrs()\n",
" dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n", " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n",
" dataFrameFilter = DataFrameFilter(dataFrame)\n", " dataFrameFilter = DataFrameFilter(dataFrame)\n",
" severeEffectsBatchCodeTable = createSevereEffectsBatchCodeTable(dataFrameFilter.filterForSevereEffects(dose = '1'))\n", " severeEffectsBatchCodeTable = BatchCodeTableFactory.createSevereEffectsBatchCodeTable(dataFrameFilter.filterForSevereEffects(dose = '1'))\n",
" display('severeEffectsBatchCodeTable', severeEffectsBatchCodeTable)\n", " display('severeEffectsBatchCodeTable', severeEffectsBatchCodeTable)\n",
" severeEffectsBatchCodeTable.to_excel(excelFile)" " severeEffectsBatchCodeTable.to_excel(excelFile)"
] ]