merging multiple series into a single DataFrame

This commit is contained in:
frankknoll
2022-01-24 12:16:58 +01:00
parent 555b902668
commit 85663354d1
2 changed files with 28 additions and 95 deletions

View File

@@ -121,16 +121,6 @@
"df_PFIZER_BIONTECH = df_patients_COVID19[df_patients_COVID19[\"VAX_MANU\"] == \"MODERNA\"]" "df_PFIZER_BIONTECH = df_patients_COVID19[df_patients_COVID19[\"VAX_MANU\"] == \"MODERNA\"]"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"id": "e41885a8",
"metadata": {},
"outputs": [],
"source": [
"df_PFIZER_BIONTECH"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
@@ -144,21 +134,11 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "4b3cb943", "id": "0ad4aab2",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# table = pd.pivot_table(df_PFIZER_BIONTECH, values='DIED', columns=['DIED'], aggfunc=np.sum)" "df_PFIZER_BIONTECH"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "284bf448",
"metadata": {},
"outputs": [],
"source": [
"pd.set_option('display.max_rows', None)"
] ]
}, },
{ {
@@ -170,24 +150,7 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"df_DIED = df_PFIZER_BIONTECH[df_PFIZER_BIONTECH['DIED']=='Y'][['VAX_LOT']]\n", "df_DIED = df_PFIZER_BIONTECH[df_PFIZER_BIONTECH['DIED']=='Y'][['VAX_LOT']]"
"df_DIED"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9c5ff2fd",
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"# df = df.groupby(['VAX_LOT']).count()\n",
"#df2 = df.sort_values(by=['DIED'], ascending=False)\n",
"#pd.set_option('display.max_rows', None)\n",
"#print(df2)\n",
"df_DIED.value_counts()"
] ]
}, },
{ {
@@ -199,18 +162,7 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"df_DISABLE = df_PFIZER_BIONTECH[df_PFIZER_BIONTECH['DISABLE']=='Y'][['VAX_LOT']]\n", "df_DISABLE = df_PFIZER_BIONTECH[df_PFIZER_BIONTECH['DISABLE']=='Y'][['VAX_LOT']]"
"df_DISABLE"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3d782392",
"metadata": {},
"outputs": [],
"source": [
"df_DISABLE.value_counts()"
] ]
}, },
{ {
@@ -222,20 +174,7 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"df_L_THREAT = df_PFIZER_BIONTECH[df_PFIZER_BIONTECH['L_THREAT']=='Y'][['VAX_LOT']]\n", "df_L_THREAT = df_PFIZER_BIONTECH[df_PFIZER_BIONTECH['L_THREAT']=='Y'][['VAX_LOT']]"
"df_L_THREAT"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1a629007",
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"df_L_THREAT.value_counts()"
] ]
}, },
{ {
@@ -247,41 +186,35 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"df_ADR = df_PFIZER_BIONTECH[['VAX_LOT']]\n", "df_ADR = df_PFIZER_BIONTECH[['VAX_LOT']]"
"df_ADR"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "7b3005db", "id": "99945ca8",
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"df_ADR_valueCounts = df_ADR.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "07c93e74",
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"df_ADR_valueCounts"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1244c494",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [] "source": [
"df = pd.concat(\n",
" {\n",
" 'ADRs': df_ADR.value_counts(),\n",
" 'DEATHS': df_DIED.value_counts(),\n",
" 'DISABILITIES': df_DISABLE.value_counts(),\n",
" 'LIFE THREATENING ILLNESSES': df_L_THREAT.value_counts()\n",
" },\n",
" axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d9191d12",
"metadata": {},
"outputs": [],
"source": [
"df"
]
} }
], ],
"metadata": { "metadata": {

View File

@@ -3,7 +3,7 @@
jupyter notebook jupyter notebook
FK-TODO: FK-TODO:
- VAX_LOT-Spalte normalisieren, d.h. toUpperCase(), Format des jeweiligen Herstellers berücksichtigen und "verschmutzte" Einträge Säubern, denn sie stellen alle dieselbe Charge dar: - VAX_LOT-Spalte normalisieren, d.h. toUpperCase(), Format des jeweiligen Herstellers berücksichtigen und "verschmutzte" Einträge säubern, denn sie stellen alle dieselbe Charge dar:
039k20a 039k20a
MOD039K20A MOD039K20A
#039K20A #039K20A