refactoring
This commit is contained in:
@@ -40,6 +40,7 @@
|
||||
" 'VAERSDATA':\n",
|
||||
" self._read_csv(\n",
|
||||
" folder + year + \"VAERSDATA.csv\",\n",
|
||||
" # FK-TODO: use Column enum\n",
|
||||
" ['VAERS_ID', 'DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT']),\n",
|
||||
" 'VAERSVAX':\n",
|
||||
" self._read_csv(\n",
|
||||
@@ -122,51 +123,59 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def createBatchCodeTable(df : pd.DataFrame):\n",
|
||||
" def filterDataFrame(df, col):\n",
|
||||
" return df[df[col] == 'Y'][['VAX_LOT']]\n",
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
" batchCodeTableDict = {\n",
|
||||
" 'ADRs': df[['VAX_LOT']].value_counts(),\n",
|
||||
" 'DEATHS': filterDataFrame(df, 'DIED').value_counts(),\n",
|
||||
" 'DISABILITIES': filterDataFrame(df, 'DISABLE').value_counts(),\n",
|
||||
" 'LIFE THREATENING ILLNESSES': filterDataFrame(df, 'L_THREAT').value_counts()\n",
|
||||
" }\n",
|
||||
" return pd.concat(batchCodeTableDict, axis = 'columns').replace(to_replace = np.nan, value = 0)\n",
|
||||
"class BatchCodeTableFactory:\n",
|
||||
" \n",
|
||||
" @staticmethod\n",
|
||||
" def createBatchCodeTable(df : pd.DataFrame):\n",
|
||||
" def filterDataFrame(df, col):\n",
|
||||
" return df[df[col] == 'Y'][['VAX_LOT']]\n",
|
||||
"\n",
|
||||
"def createManufacturerByBatchCodeTable(df):\n",
|
||||
" manufacturerByBatchCodeTable = df[['VAX_LOT', 'VAX_MANU']]\n",
|
||||
" manufacturerByBatchCodeTable = manufacturerByBatchCodeTable.drop_duplicates(subset = ['VAX_LOT'])\n",
|
||||
" return manufacturerByBatchCodeTable.set_index('VAX_LOT')\n",
|
||||
" batchCodeTableDict = {\n",
|
||||
" 'ADRs': df[['VAX_LOT']].value_counts(),\n",
|
||||
" 'DEATHS': filterDataFrame(df, 'DIED').value_counts(),\n",
|
||||
" 'DISABILITIES': filterDataFrame(df, 'DISABLE').value_counts(),\n",
|
||||
" 'LIFE THREATENING ILLNESSES': filterDataFrame(df, 'L_THREAT').value_counts()\n",
|
||||
" }\n",
|
||||
" return pd.concat(batchCodeTableDict, axis = 'columns').replace(to_replace = np.nan, value = 0)\n",
|
||||
"\n",
|
||||
"def createCompanyByBatchCodeTable(df):\n",
|
||||
" return createManufacturerByBatchCodeTable(df).rename(columns = {\"VAX_MANU\": \"COMPANY\"})\n",
|
||||
" # create table from https://www.howbadismybatch.com/combined.html\n",
|
||||
" # FK-TODO: DRY with createBatchCodeTable()\n",
|
||||
" @staticmethod\n",
|
||||
" def createSevereEffectsBatchCodeTable(df : pd.DataFrame):\n",
|
||||
" def filterDataFrame(df, col):\n",
|
||||
" return df[df[col] == 'Y']['VAX_LOT']\n",
|
||||
"\n",
|
||||
"# create table from https://www.howbadismybatch.com/combined.html\n",
|
||||
"# FK-TODO: DRY with createBatchCodeTable()\n",
|
||||
"def createSevereEffectsBatchCodeTable(df):\n",
|
||||
" def filterDataFrame(df, col):\n",
|
||||
" return df[df[col] == 'Y']['VAX_LOT']\n",
|
||||
" batchCodeTableDict = {\n",
|
||||
" 'ADRs': df['VAX_LOT'].value_counts(),\n",
|
||||
" 'DEATHS': filterDataFrame(df, 'DIED').value_counts(),\n",
|
||||
" 'DISABILITIES': filterDataFrame(df, 'DISABLE').value_counts(),\n",
|
||||
" 'LIFE THREATENING ILLNESSES': filterDataFrame(df, 'L_THREAT').value_counts(),\n",
|
||||
" 'HOSPITALISATIONS': filterDataFrame(df, 'HOSPITAL').value_counts(),\n",
|
||||
" 'EMERGENCY ROOM OR DOCTOR VISITS': filterDataFrame(df, 'ER_VISIT').value_counts()\n",
|
||||
" }\n",
|
||||
" batchCodeTable = pd.concat(batchCodeTableDict, axis = 'columns')\n",
|
||||
" batchCodeTable.index.name = 'VAX_LOT'\n",
|
||||
" # add Company column:\n",
|
||||
" batchCodeTable = pd.merge(\n",
|
||||
" batchCodeTable,\n",
|
||||
" BatchCodeTableFactory._createCompanyByBatchCodeTable(df),\n",
|
||||
" how = 'left',\n",
|
||||
" left_index = True,\n",
|
||||
" right_index = True,\n",
|
||||
" validate = 'one_to_one')\n",
|
||||
" return batchCodeTable.replace(to_replace = np.nan, value = 0)\n",
|
||||
"\n",
|
||||
" batchCodeTableDict = {\n",
|
||||
" 'ADRs': df['VAX_LOT'].value_counts(),\n",
|
||||
" 'DEATHS': filterDataFrame(df, 'DIED').value_counts(),\n",
|
||||
" 'DISABILITIES': filterDataFrame(df, 'DISABLE').value_counts(),\n",
|
||||
" 'LIFE THREATENING ILLNESSES': filterDataFrame(df, 'L_THREAT').value_counts(),\n",
|
||||
" 'HOSPITALISATIONS': filterDataFrame(df, 'HOSPITAL').value_counts(),\n",
|
||||
" 'EMERGENCY ROOM OR DOCTOR VISITS': filterDataFrame(df, 'ER_VISIT').value_counts()\n",
|
||||
" }\n",
|
||||
" batchCodeTable = pd.concat(batchCodeTableDict, axis = 'columns')\n",
|
||||
" batchCodeTable.index.name = 'VAX_LOT'\n",
|
||||
" # add Company column:\n",
|
||||
" batchCodeTable = pd.merge(\n",
|
||||
" batchCodeTable,\n",
|
||||
" createCompanyByBatchCodeTable(df),\n",
|
||||
" how = 'left',\n",
|
||||
" left_index = True,\n",
|
||||
" right_index = True,\n",
|
||||
" validate = 'one_to_one')\n",
|
||||
" return batchCodeTable.replace(to_replace = np.nan, value = 0)\n"
|
||||
" @staticmethod\n",
|
||||
" def _createManufacturerByBatchCodeTable(df):\n",
|
||||
" manufacturerByBatchCodeTable = df[['VAX_LOT', 'VAX_MANU']]\n",
|
||||
" manufacturerByBatchCodeTable = manufacturerByBatchCodeTable.drop_duplicates(subset = ['VAX_LOT'])\n",
|
||||
" return manufacturerByBatchCodeTable.set_index('VAX_LOT')\n",
|
||||
"\n",
|
||||
" @staticmethod\n",
|
||||
" def _createCompanyByBatchCodeTable(df):\n",
|
||||
" return BatchCodeTableFactory._createManufacturerByBatchCodeTable(df).rename(columns = {\"VAX_MANU\": \"COMPANY\"})\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -415,7 +424,7 @@
|
||||
"\n",
|
||||
" def _test_createBatchCodeTable(self, dataFrame):\n",
|
||||
" # When\n",
|
||||
" batchCodeTable = createBatchCodeTable(dataFrame)\n",
|
||||
" batchCodeTable = BatchCodeTableFactory.createBatchCodeTable(dataFrame)\n",
|
||||
"\n",
|
||||
" # Then\n",
|
||||
" batchCodeTableExpected = pd.DataFrame(\n",
|
||||
@@ -469,7 +478,7 @@
|
||||
" dataFrame = DataFrameFilter(dataFrame).filterForSevereEffects(dose = '1')\n",
|
||||
"\n",
|
||||
" # When\n",
|
||||
" batchCodeTable = createSevereEffectsBatchCodeTable(dataFrame)\n",
|
||||
" batchCodeTable = BatchCodeTableFactory.createSevereEffectsBatchCodeTable(dataFrame)\n",
|
||||
"\n",
|
||||
" # Then\n",
|
||||
" batchCodeTableExpected = pd.DataFrame(\n",
|
||||
@@ -510,7 +519,7 @@
|
||||
" vaersDescrs = VaersDescrReader(\"VAERS\").readAllVaersDescrs()\n",
|
||||
" dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n",
|
||||
" dataFrameFilter = DataFrameFilter(dataFrame)\n",
|
||||
" batchCodeTable = createBatchCodeTable(dataFrameFilter.filterBy(manufacturer = manufacturer, dose = '1'))\n",
|
||||
" batchCodeTable = BatchCodeTableFactory.createBatchCodeTable(dataFrameFilter.filterBy(manufacturer = manufacturer, dose = '1'))\n",
|
||||
" display(manufacturer, batchCodeTable)\n",
|
||||
" batchCodeTable.to_excel(excelFile)"
|
||||
]
|
||||
@@ -538,7 +547,7 @@
|
||||
" vaersDescrs = VaersDescrReader(\"VAERS\").readAllVaersDescrs()\n",
|
||||
" dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n",
|
||||
" dataFrameFilter = DataFrameFilter(dataFrame)\n",
|
||||
" severeEffectsBatchCodeTable = createSevereEffectsBatchCodeTable(dataFrameFilter.filterForSevereEffects(dose = '1'))\n",
|
||||
" severeEffectsBatchCodeTable = BatchCodeTableFactory.createSevereEffectsBatchCodeTable(dataFrameFilter.filterForSevereEffects(dose = '1'))\n",
|
||||
" display('severeEffectsBatchCodeTable', severeEffectsBatchCodeTable)\n",
|
||||
" severeEffectsBatchCodeTable.to_excel(excelFile)"
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user