refactoring
This commit is contained in:
@@ -91,8 +91,7 @@
|
||||
"# FK-TODO: DRY with createAndFilterDataFrameFromFiles()\n",
|
||||
"def createDataFrameSevereEffectsFromFiles(dataDir, dose):\n",
|
||||
" return createDataFrameSevereEffectsFromDescrs(\n",
|
||||
" # FK-TODO: reactivate: [readVaersDescr(dataDir, \"2021\"), readVaersDescr(dataDir, \"2022\")],\n",
|
||||
" [readVaersDescr(dataDir, \"2022\")],\n",
|
||||
" [readVaersDescr(dataDir, \"2021\"), readVaersDescr(dataDir, \"2022\")],\n",
|
||||
" dose)"
|
||||
]
|
||||
},
|
||||
@@ -115,15 +114,20 @@
|
||||
" }\n",
|
||||
" return pd.concat(batchCodeTableDict, axis = 'columns').replace(to_replace = np.nan, value = 0)\n",
|
||||
"\n",
|
||||
"def getManufacturerOfBatchCode(df, batchCode):\n",
|
||||
" return df[df['VAX_LOT'] == batchCode].iloc[0]['VAX_MANU']\n",
|
||||
"def createManufacturerByBatchCodeTable(df):\n",
|
||||
" manufacturerByBatchCodeTable = df[['VAX_LOT', 'VAX_MANU']]\n",
|
||||
" manufacturerByBatchCodeTable = manufacturerByBatchCodeTable.drop_duplicates(subset = ['VAX_LOT'])\n",
|
||||
" return manufacturerByBatchCodeTable.set_index('VAX_LOT')\n",
|
||||
"\n",
|
||||
"def createCompanyByBatchCodeTable(df):\n",
|
||||
" return createManufacturerByBatchCodeTable(df).rename(columns = {\"VAX_MANU\": \"COMPANY\"})\n",
|
||||
"\n",
|
||||
"def createSevereEffectsBatchCodeTable(df):\n",
|
||||
" def filterDataFrame(df, col):\n",
|
||||
" return df[df[col] == 'Y'][['VAX_LOT']]\n",
|
||||
" return df[df[col] == 'Y']['VAX_LOT']\n",
|
||||
"\n",
|
||||
" batchCodeTableDict = {\n",
|
||||
" 'ADRs': df[['VAX_LOT']].value_counts(),\n",
|
||||
" 'ADRs': df['VAX_LOT'].value_counts(),\n",
|
||||
" 'DEATHS': filterDataFrame(df, 'DIED').value_counts(),\n",
|
||||
" 'DISABILITIES': filterDataFrame(df, 'DISABLE').value_counts(),\n",
|
||||
" 'LIFE THREATENING ILLNESSES': filterDataFrame(df, 'L_THREAT').value_counts(),\n",
|
||||
@@ -131,9 +135,14 @@
|
||||
" 'EMERGENCY ROOM OR DOCTOR VISITS': filterDataFrame(df, 'ER_VISIT').value_counts()\n",
|
||||
" }\n",
|
||||
" batchCodeTable = pd.concat(batchCodeTableDict, axis = 'columns')\n",
|
||||
" batchCodeTable['COMPANY'] = batchCodeTable.apply(\n",
|
||||
" lambda row: getManufacturerOfBatchCode(df, row.name[0]),\n",
|
||||
" axis = 'columns')\n",
|
||||
" # add Company column:\n",
|
||||
" batchCodeTable = pd.merge(\n",
|
||||
" batchCodeTable,\n",
|
||||
" createCompanyByBatchCodeTable(df),\n",
|
||||
" how = 'left',\n",
|
||||
" left_index = True,\n",
|
||||
" right_index = True,\n",
|
||||
" validate = 'one_to_one')\n",
|
||||
" return batchCodeTable.replace(to_replace = np.nan, value = 0)\n"
|
||||
]
|
||||
},
|
||||
@@ -434,14 +443,14 @@
|
||||
" batchCodeTableExpected = pd.DataFrame(\n",
|
||||
" data = {\n",
|
||||
" 'ADRs': [1, 1],\n",
|
||||
" 'DEATHS': [0, 1],\n",
|
||||
" 'DISABILITIES': [1, 0],\n",
|
||||
" 'LIFE THREATENING ILLNESSES': [0, 1],\n",
|
||||
" 'HOSPITALISATIONS': [0, 1],\n",
|
||||
" 'DEATHS': [1, 0],\n",
|
||||
" 'DISABILITIES': [0, 1],\n",
|
||||
" 'LIFE THREATENING ILLNESSES': [1, 0],\n",
|
||||
" 'HOSPITALISATIONS': [1, 0],\n",
|
||||
" 'EMERGENCY ROOM OR DOCTOR VISITS': [1, 1],\n",
|
||||
" 'COMPANY': ['PFIZER\\BIONTECH', 'MODERNA']\n",
|
||||
" 'COMPANY': ['MODERNA', 'PFIZER\\BIONTECH']\n",
|
||||
" },\n",
|
||||
" index = pd.MultiIndex.from_arrays([['025L20A', '037K20A']], names = ('VAX_LOT',)))\n",
|
||||
" index = pd.Index(['037K20A', '025L20A']))\n",
|
||||
" assert_frame_equal(batchCodeTable, batchCodeTableExpected, check_dtype = False)\n",
|
||||
"\n",
|
||||
" def createDataFrame(self, index, columns, data, dtypes = {}):\n",
|
||||
|
||||
Reference in New Issue
Block a user