refactoring

This commit is contained in:
frankknoll
2022-01-24 14:35:12 +01:00
parent 85663354d1
commit 17875bf89a

View File

@@ -13,104 +13,6 @@
"pd.set_option('display.max_columns', None)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ad0e6044",
"metadata": {},
"outputs": [],
"source": [
"df_patients_22 = pd.read_csv(\"VAERS/2021VAERSData/2021VAERSDATA.csv\", index_col='VAERS_ID', encoding='latin1', low_memory=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4472e902",
"metadata": {},
"outputs": [],
"source": [
"df_patients_22.index\n",
"df_patients_22"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fb54ec47",
"metadata": {},
"outputs": [],
"source": [
"df_vax_22 = pd.read_csv(\"VAERS/2021VAERSData/2021VAERSVAX.csv\", index_col='VAERS_ID', encoding='latin1')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d8942466",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"df_vax_22"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d31c5225",
"metadata": {},
"outputs": [],
"source": [
"df_COVID19_22 = df_vax_22[df_vax_22[\"VAX_TYPE\"] == \"COVID19\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "be2b57c9",
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"df_COVID19_22"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8c432ab2",
"metadata": {},
"outputs": [],
"source": [
"df_patients_COVID19 = pd.merge(df_patients_22, df_COVID19_22, left_index=True, right_index=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fef882ff",
"metadata": {},
"outputs": [],
"source": [
"df_patients_22.shape, df_COVID19_22.shape, df_patients_COVID19.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6ce15322",
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"# pd.set_option('display.max_rows', 100)\n",
"df_patients_COVID19"
]
},
{
"cell_type": "code",
"execution_count": null,
@@ -118,75 +20,26 @@
"metadata": {},
"outputs": [],
"source": [
"df_PFIZER_BIONTECH = df_patients_COVID19[df_patients_COVID19[\"VAX_MANU\"] == \"MODERNA\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "86e0e4f2",
"metadata": {},
"outputs": [],
"source": [
"df_PFIZER_BIONTECH.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0ad4aab2",
"metadata": {},
"outputs": [],
"source": [
"df_PFIZER_BIONTECH"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "349da946",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"df_DIED = df_PFIZER_BIONTECH[df_PFIZER_BIONTECH['DIED']=='Y'][['VAX_LOT']]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2f660707",
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"df_DISABLE = df_PFIZER_BIONTECH[df_PFIZER_BIONTECH['DISABLE']=='Y'][['VAX_LOT']]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9f537710",
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"df_L_THREAT = df_PFIZER_BIONTECH[df_PFIZER_BIONTECH['L_THREAT']=='Y'][['VAX_LOT']]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f675fcfe",
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"df_ADR = df_PFIZER_BIONTECH[['VAX_LOT']]"
"def createDataFrame(manufacturer):\n",
" def read_csv(file):\n",
" return pd.read_csv(file, index_col='VAERS_ID', encoding='latin1', low_memory=False)\n",
" \n",
" def createPatients():\n",
" return pd.concat(\n",
" [\n",
" read_csv(\"VAERS/2021VAERSData/2021VAERSDATA.csv\"),\n",
" read_csv(\"VAERS/2022VAERSData/2022VAERSDATA.csv\")\n",
" ])\n",
"\n",
" def createVax(): \n",
" return pd.concat(\n",
" [\n",
" read_csv(\"VAERS/2021VAERSData/2021VAERSVAX.csv\"),\n",
" read_csv(\"VAERS/2022VAERSData/2022VAERSVAX.csv\")\n",
" ])\n",
"\n",
" df_patients_vax = pd.merge(createPatients(), createVax(), left_index=True, right_index=True)\n",
" return df_patients_vax[(df_patients_vax[\"VAX_TYPE\"] == \"COVID19\") & (df_patients_vax[\"VAX_MANU\"] == manufacturer)]"
]
},
{
@@ -196,14 +49,38 @@
"metadata": {},
"outputs": [],
"source": [
"df = pd.concat(\n",
" {\n",
" 'ADRs': df_ADR.value_counts(),\n",
" 'DEATHS': df_DIED.value_counts(),\n",
" 'DISABILITIES': df_DISABLE.value_counts(),\n",
" 'LIFE THREATENING ILLNESSES': df_L_THREAT.value_counts()\n",
" },\n",
" axis=1)"
"def createPivotTable(df):\n",
" def filter(df, col):\n",
" return df[df[col]=='Y'][['VAX_LOT']]\n",
"\n",
" return pd.concat(\n",
" {\n",
" 'ADRs': df[['VAX_LOT']].value_counts(),\n",
" 'DEATHS': filter(df, 'DIED').value_counts(),\n",
" 'DISABILITIES': filter(df, 'DISABLE').value_counts(),\n",
" 'LIFE THREATENING ILLNESSES': filter(df, 'L_THREAT').value_counts()\n",
" },\n",
" axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "86e0e4f2",
"metadata": {},
"outputs": [],
"source": [
"df_moderna = createDataFrame(\"MODERNA\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ab170c16",
"metadata": {},
"outputs": [],
"source": [
"df_moderna"
]
},
{
@@ -213,7 +90,17 @@
"metadata": {},
"outputs": [],
"source": [
"df"
"pivotTable = createPivotTable(df_moderna)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fdf2ab4b",
"metadata": {},
"outputs": [],
"source": [
"pivotTable"
]
}
],