refactoring

This commit is contained in:
frankknoll
2022-01-31 12:28:40 +01:00
parent 7b6d619881
commit bf1269b578

View File

@@ -91,8 +91,7 @@
"# FK-TODO: DRY with createAndFilterDataFrameFromFiles()\n",
"def createDataFrameSevereEffectsFromFiles(dataDir, dose):\n",
" return createDataFrameSevereEffectsFromDescrs(\n",
" # FK-TODO: reactivate: [readVaersDescr(dataDir, \"2021\"), readVaersDescr(dataDir, \"2022\")],\n",
" [readVaersDescr(dataDir, \"2022\")],\n",
" [readVaersDescr(dataDir, \"2021\"), readVaersDescr(dataDir, \"2022\")],\n",
" dose)"
]
},
@@ -115,15 +114,20 @@
" }\n",
" return pd.concat(batchCodeTableDict, axis = 'columns').replace(to_replace = np.nan, value = 0)\n",
"\n",
"def getManufacturerOfBatchCode(df, batchCode):\n",
" return df[df['VAX_LOT'] == batchCode].iloc[0]['VAX_MANU']\n",
"def createManufacturerByBatchCodeTable(df):\n",
" manufacturerByBatchCodeTable = df[['VAX_LOT', 'VAX_MANU']]\n",
" manufacturerByBatchCodeTable = manufacturerByBatchCodeTable.drop_duplicates(subset = ['VAX_LOT'])\n",
" return manufacturerByBatchCodeTable.set_index('VAX_LOT')\n",
"\n",
"def createCompanyByBatchCodeTable(df):\n",
" return createManufacturerByBatchCodeTable(df).rename(columns = {\"VAX_MANU\": \"COMPANY\"})\n",
"\n",
"def createSevereEffectsBatchCodeTable(df):\n",
" def filterDataFrame(df, col):\n",
" return df[df[col] == 'Y'][['VAX_LOT']]\n",
" return df[df[col] == 'Y']['VAX_LOT']\n",
"\n",
" batchCodeTableDict = {\n",
" 'ADRs': df[['VAX_LOT']].value_counts(),\n",
" 'ADRs': df['VAX_LOT'].value_counts(),\n",
" 'DEATHS': filterDataFrame(df, 'DIED').value_counts(),\n",
" 'DISABILITIES': filterDataFrame(df, 'DISABLE').value_counts(),\n",
" 'LIFE THREATENING ILLNESSES': filterDataFrame(df, 'L_THREAT').value_counts(),\n",
@@ -131,9 +135,14 @@
" 'EMERGENCY ROOM OR DOCTOR VISITS': filterDataFrame(df, 'ER_VISIT').value_counts()\n",
" }\n",
" batchCodeTable = pd.concat(batchCodeTableDict, axis = 'columns')\n",
" batchCodeTable['COMPANY'] = batchCodeTable.apply(\n",
" lambda row: getManufacturerOfBatchCode(df, row.name[0]),\n",
" axis = 'columns')\n",
" # add Company column:\n",
" batchCodeTable = pd.merge(\n",
" batchCodeTable,\n",
" createCompanyByBatchCodeTable(df),\n",
" how = 'left',\n",
" left_index = True,\n",
" right_index = True,\n",
" validate = 'one_to_one')\n",
" return batchCodeTable.replace(to_replace = np.nan, value = 0)\n"
]
},
@@ -434,14 +443,14 @@
" batchCodeTableExpected = pd.DataFrame(\n",
" data = {\n",
" 'ADRs': [1, 1],\n",
" 'DEATHS': [0, 1],\n",
" 'DISABILITIES': [1, 0],\n",
" 'LIFE THREATENING ILLNESSES': [0, 1],\n",
" 'HOSPITALISATIONS': [0, 1],\n",
" 'DEATHS': [1, 0],\n",
" 'DISABILITIES': [0, 1],\n",
" 'LIFE THREATENING ILLNESSES': [1, 0],\n",
" 'HOSPITALISATIONS': [1, 0],\n",
" 'EMERGENCY ROOM OR DOCTOR VISITS': [1, 1],\n",
" 'COMPANY': ['PFIZER\\BIONTECH', 'MODERNA']\n",
" 'COMPANY': ['MODERNA', 'PFIZER\\BIONTECH']\n",
" },\n",
" index = pd.MultiIndex.from_arrays([['025L20A', '037K20A']], names = ('VAX_LOT',)))\n",
" index = pd.Index(['037K20A', '025L20A']))\n",
" assert_frame_equal(batchCodeTable, batchCodeTableExpected, check_dtype = False)\n",
"\n",
" def createDataFrame(self, index, columns, data, dtypes = {}):\n",