refactoring

This commit is contained in:
frankknoll
2022-01-24 14:35:12 +01:00
parent 85663354d1
commit 17875bf89a

View File

@@ -13,104 +13,6 @@
"pd.set_option('display.max_columns', None)" "pd.set_option('display.max_columns', None)"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"id": "ad0e6044",
"metadata": {},
"outputs": [],
"source": [
"df_patients_22 = pd.read_csv(\"VAERS/2021VAERSData/2021VAERSDATA.csv\", index_col='VAERS_ID', encoding='latin1', low_memory=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4472e902",
"metadata": {},
"outputs": [],
"source": [
"df_patients_22.index\n",
"df_patients_22"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fb54ec47",
"metadata": {},
"outputs": [],
"source": [
"df_vax_22 = pd.read_csv(\"VAERS/2021VAERSData/2021VAERSVAX.csv\", index_col='VAERS_ID', encoding='latin1')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d8942466",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"df_vax_22"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d31c5225",
"metadata": {},
"outputs": [],
"source": [
"df_COVID19_22 = df_vax_22[df_vax_22[\"VAX_TYPE\"] == \"COVID19\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "be2b57c9",
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"df_COVID19_22"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8c432ab2",
"metadata": {},
"outputs": [],
"source": [
"df_patients_COVID19 = pd.merge(df_patients_22, df_COVID19_22, left_index=True, right_index=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fef882ff",
"metadata": {},
"outputs": [],
"source": [
"df_patients_22.shape, df_COVID19_22.shape, df_patients_COVID19.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6ce15322",
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"# pd.set_option('display.max_rows', 100)\n",
"df_patients_COVID19"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
@@ -118,75 +20,26 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"df_PFIZER_BIONTECH = df_patients_COVID19[df_patients_COVID19[\"VAX_MANU\"] == \"MODERNA\"]" "def createDataFrame(manufacturer):\n",
] " def read_csv(file):\n",
}, " return pd.read_csv(file, index_col='VAERS_ID', encoding='latin1', low_memory=False)\n",
{ " \n",
"cell_type": "code", " def createPatients():\n",
"execution_count": null, " return pd.concat(\n",
"id": "86e0e4f2", " [\n",
"metadata": {}, " read_csv(\"VAERS/2021VAERSData/2021VAERSDATA.csv\"),\n",
"outputs": [], " read_csv(\"VAERS/2022VAERSData/2022VAERSDATA.csv\")\n",
"source": [ " ])\n",
"df_PFIZER_BIONTECH.info()" "\n",
] " def createVax(): \n",
}, " return pd.concat(\n",
{ " [\n",
"cell_type": "code", " read_csv(\"VAERS/2021VAERSData/2021VAERSVAX.csv\"),\n",
"execution_count": null, " read_csv(\"VAERS/2022VAERSData/2022VAERSVAX.csv\")\n",
"id": "0ad4aab2", " ])\n",
"metadata": {}, "\n",
"outputs": [], " df_patients_vax = pd.merge(createPatients(), createVax(), left_index=True, right_index=True)\n",
"source": [ " return df_patients_vax[(df_patients_vax[\"VAX_TYPE\"] == \"COVID19\") & (df_patients_vax[\"VAX_MANU\"] == manufacturer)]"
"df_PFIZER_BIONTECH"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "349da946",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"df_DIED = df_PFIZER_BIONTECH[df_PFIZER_BIONTECH['DIED']=='Y'][['VAX_LOT']]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2f660707",
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"df_DISABLE = df_PFIZER_BIONTECH[df_PFIZER_BIONTECH['DISABLE']=='Y'][['VAX_LOT']]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9f537710",
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"df_L_THREAT = df_PFIZER_BIONTECH[df_PFIZER_BIONTECH['L_THREAT']=='Y'][['VAX_LOT']]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f675fcfe",
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"df_ADR = df_PFIZER_BIONTECH[['VAX_LOT']]"
] ]
}, },
{ {
@@ -196,14 +49,38 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"df = pd.concat(\n", "def createPivotTable(df):\n",
" {\n", " def filter(df, col):\n",
" 'ADRs': df_ADR.value_counts(),\n", " return df[df[col]=='Y'][['VAX_LOT']]\n",
" 'DEATHS': df_DIED.value_counts(),\n", "\n",
" 'DISABILITIES': df_DISABLE.value_counts(),\n", " return pd.concat(\n",
" 'LIFE THREATENING ILLNESSES': df_L_THREAT.value_counts()\n", " {\n",
" },\n", " 'ADRs': df[['VAX_LOT']].value_counts(),\n",
" axis=1)" " 'DEATHS': filter(df, 'DIED').value_counts(),\n",
" 'DISABILITIES': filter(df, 'DISABLE').value_counts(),\n",
" 'LIFE THREATENING ILLNESSES': filter(df, 'L_THREAT').value_counts()\n",
" },\n",
" axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "86e0e4f2",
"metadata": {},
"outputs": [],
"source": [
"df_moderna = createDataFrame(\"MODERNA\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ab170c16",
"metadata": {},
"outputs": [],
"source": [
"df_moderna"
] ]
}, },
{ {
@@ -213,7 +90,17 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"df" "pivotTable = createPivotTable(df_moderna)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fdf2ab4b",
"metadata": {},
"outputs": [],
"source": [
"pivotTable"
] ]
} }
], ],