From 85663354d1c24cf4f14f746c0917426c5f80b100 Mon Sep 17 00:00:00 2001 From: frankknoll Date: Mon, 24 Jan 2022 12:16:58 +0100 Subject: [PATCH] merging multiple series into a single DataFrame --- HowBadIsMyBatch.ipynb | 121 ++++++++++-------------------------------- README.md | 2 +- 2 files changed, 28 insertions(+), 95 deletions(-) diff --git a/HowBadIsMyBatch.ipynb b/HowBadIsMyBatch.ipynb index c24b568f82e..b4c77cfbf05 100644 --- a/HowBadIsMyBatch.ipynb +++ b/HowBadIsMyBatch.ipynb @@ -121,16 +121,6 @@ "df_PFIZER_BIONTECH = df_patients_COVID19[df_patients_COVID19[\"VAX_MANU\"] == \"MODERNA\"]" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "e41885a8", - "metadata": {}, - "outputs": [], - "source": [ - "df_PFIZER_BIONTECH" - ] - }, { "cell_type": "code", "execution_count": null, @@ -144,21 +134,11 @@ { "cell_type": "code", "execution_count": null, - "id": "4b3cb943", + "id": "0ad4aab2", "metadata": {}, "outputs": [], "source": [ - "# table = pd.pivot_table(df_PFIZER_BIONTECH, values='DIED', columns=['DIED'], aggfunc=np.sum)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "284bf448", - "metadata": {}, - "outputs": [], - "source": [ - "pd.set_option('display.max_rows', None)" + "df_PFIZER_BIONTECH" ] }, { @@ -170,24 +150,7 @@ }, "outputs": [], "source": [ - "df_DIED = df_PFIZER_BIONTECH[df_PFIZER_BIONTECH['DIED']=='Y'][['VAX_LOT']]\n", - "df_DIED" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9c5ff2fd", - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "# df = df.groupby(['VAX_LOT']).count()\n", - "#df2 = df.sort_values(by=['DIED'], ascending=False)\n", - "#pd.set_option('display.max_rows', None)\n", - "#print(df2)\n", - "df_DIED.value_counts()" + "df_DIED = df_PFIZER_BIONTECH[df_PFIZER_BIONTECH['DIED']=='Y'][['VAX_LOT']]" ] }, { @@ -199,18 +162,7 @@ }, "outputs": [], "source": [ - "df_DISABLE = df_PFIZER_BIONTECH[df_PFIZER_BIONTECH['DISABLE']=='Y'][['VAX_LOT']]\n", - "df_DISABLE" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3d782392", - "metadata": {}, - "outputs": [], - "source": [ - "df_DISABLE.value_counts()" + "df_DISABLE = df_PFIZER_BIONTECH[df_PFIZER_BIONTECH['DISABLE']=='Y'][['VAX_LOT']]" ] }, { @@ -222,20 +174,7 @@ }, "outputs": [], "source": [ - "df_L_THREAT = df_PFIZER_BIONTECH[df_PFIZER_BIONTECH['L_THREAT']=='Y'][['VAX_LOT']]\n", - "df_L_THREAT" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1a629007", - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "df_L_THREAT.value_counts()" + "df_L_THREAT = df_PFIZER_BIONTECH[df_PFIZER_BIONTECH['L_THREAT']=='Y'][['VAX_LOT']]" ] }, { @@ -247,41 +186,35 @@ }, "outputs": [], "source": [ - "df_ADR = df_PFIZER_BIONTECH[['VAX_LOT']]\n", - "df_ADR" + "df_ADR = df_PFIZER_BIONTECH[['VAX_LOT']]" ] }, { "cell_type": "code", "execution_count": null, - "id": "7b3005db", - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "df_ADR_valueCounts = df_ADR.value_counts()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "07c93e74", - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "df_ADR_valueCounts" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1244c494", + "id": "99945ca8", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "df = pd.concat(\n", + " {\n", + " 'ADRs': df_ADR.value_counts(),\n", + " 'DEATHS': df_DIED.value_counts(),\n", + " 'DISABILITIES': df_DISABLE.value_counts(),\n", + " 'LIFE THREATENING ILLNESSES': df_L_THREAT.value_counts()\n", + " },\n", + " axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d9191d12", + "metadata": {}, + "outputs": [], + "source": [ + "df" + ] } ], "metadata": { diff --git a/README.md b/README.md index 70db6b98e8a..ddbea0ca3a4 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ jupyter notebook FK-TODO: -- VAX_LOT-Spalte normalisieren, d.h. toUpperCase(), Format des jeweiligen Herstellers berücksichtigen und "verschmutzte" Einträge Säubern, denn sie stellen alle dieselbe Charge dar: +- VAX_LOT-Spalte normalisieren, d.h. toUpperCase(), Format des jeweiligen Herstellers berücksichtigen und "verschmutzte" Einträge säubern, denn sie stellen alle dieselbe Charge dar: 039k20a MOD039K20A #039K20A