From 17875bf89af9b28e1946983580366641a35a63ce Mon Sep 17 00:00:00 2001 From: frankknoll Date: Mon, 24 Jan 2022 14:35:12 +0100 Subject: [PATCH] refactoring --- HowBadIsMyBatch.ipynb | 239 +++++++++++------------------------------- 1 file changed, 63 insertions(+), 176 deletions(-) diff --git a/HowBadIsMyBatch.ipynb b/HowBadIsMyBatch.ipynb index b4c77cfbf05..92f219163a8 100644 --- a/HowBadIsMyBatch.ipynb +++ b/HowBadIsMyBatch.ipynb @@ -13,104 +13,6 @@ "pd.set_option('display.max_columns', None)" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "ad0e6044", - "metadata": {}, - "outputs": [], - "source": [ - "df_patients_22 = pd.read_csv(\"VAERS/2021VAERSData/2021VAERSDATA.csv\", index_col='VAERS_ID', encoding='latin1', low_memory=False)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4472e902", - "metadata": {}, - "outputs": [], - "source": [ - "df_patients_22.index\n", - "df_patients_22" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fb54ec47", - "metadata": {}, - "outputs": [], - "source": [ - "df_vax_22 = pd.read_csv(\"VAERS/2021VAERSData/2021VAERSVAX.csv\", index_col='VAERS_ID', encoding='latin1')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d8942466", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "df_vax_22" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d31c5225", - "metadata": {}, - "outputs": [], - "source": [ - "df_COVID19_22 = df_vax_22[df_vax_22[\"VAX_TYPE\"] == \"COVID19\"]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "be2b57c9", - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "df_COVID19_22" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8c432ab2", - "metadata": {}, - "outputs": [], - "source": [ - "df_patients_COVID19 = pd.merge(df_patients_22, df_COVID19_22, left_index=True, right_index=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fef882ff", - "metadata": {}, - "outputs": [], - "source": [ - "df_patients_22.shape, df_COVID19_22.shape, df_patients_COVID19.shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6ce15322", - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "# pd.set_option('display.max_rows', 100)\n", - "df_patients_COVID19" - ] - }, { "cell_type": "code", "execution_count": null, @@ -118,75 +20,26 @@ "metadata": {}, "outputs": [], "source": [ - "df_PFIZER_BIONTECH = df_patients_COVID19[df_patients_COVID19[\"VAX_MANU\"] == \"MODERNA\"]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "86e0e4f2", - "metadata": {}, - "outputs": [], - "source": [ - "df_PFIZER_BIONTECH.info()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0ad4aab2", - "metadata": {}, - "outputs": [], - "source": [ - "df_PFIZER_BIONTECH" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "349da946", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "df_DIED = df_PFIZER_BIONTECH[df_PFIZER_BIONTECH['DIED']=='Y'][['VAX_LOT']]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2f660707", - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "df_DISABLE = df_PFIZER_BIONTECH[df_PFIZER_BIONTECH['DISABLE']=='Y'][['VAX_LOT']]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9f537710", - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "df_L_THREAT = df_PFIZER_BIONTECH[df_PFIZER_BIONTECH['L_THREAT']=='Y'][['VAX_LOT']]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f675fcfe", - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "df_ADR = df_PFIZER_BIONTECH[['VAX_LOT']]" + "def createDataFrame(manufacturer):\n", + " def read_csv(file):\n", + " return pd.read_csv(file, index_col='VAERS_ID', encoding='latin1', low_memory=False)\n", + " \n", + " def createPatients():\n", + " return pd.concat(\n", + " [\n", + " read_csv(\"VAERS/2021VAERSData/2021VAERSDATA.csv\"),\n", + " read_csv(\"VAERS/2022VAERSData/2022VAERSDATA.csv\")\n", + " ])\n", + "\n", + " def createVax(): \n", + " return pd.concat(\n", + " [\n", + " read_csv(\"VAERS/2021VAERSData/2021VAERSVAX.csv\"),\n", + " read_csv(\"VAERS/2022VAERSData/2022VAERSVAX.csv\")\n", + " ])\n", + "\n", + " df_patients_vax = pd.merge(createPatients(), createVax(), left_index=True, right_index=True)\n", + " return df_patients_vax[(df_patients_vax[\"VAX_TYPE\"] == \"COVID19\") & (df_patients_vax[\"VAX_MANU\"] == manufacturer)]" ] }, { @@ -196,14 +49,38 @@ "metadata": {}, "outputs": [], "source": [ - "df = pd.concat(\n", - " {\n", - " 'ADRs': df_ADR.value_counts(),\n", - " 'DEATHS': df_DIED.value_counts(),\n", - " 'DISABILITIES': df_DISABLE.value_counts(),\n", - " 'LIFE THREATENING ILLNESSES': df_L_THREAT.value_counts()\n", - " },\n", - " axis=1)" + "def createPivotTable(df):\n", + " def filter(df, col):\n", + " return df[df[col]=='Y'][['VAX_LOT']]\n", + "\n", + " return pd.concat(\n", + " {\n", + " 'ADRs': df[['VAX_LOT']].value_counts(),\n", + " 'DEATHS': filter(df, 'DIED').value_counts(),\n", + " 'DISABILITIES': filter(df, 'DISABLE').value_counts(),\n", + " 'LIFE THREATENING ILLNESSES': filter(df, 'L_THREAT').value_counts()\n", + " },\n", + " axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "86e0e4f2", + "metadata": {}, + "outputs": [], + "source": [ + "df_moderna = createDataFrame(\"MODERNA\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ab170c16", + "metadata": {}, + "outputs": [], + "source": [ + "df_moderna" ] }, { @@ -213,7 +90,17 @@ "metadata": {}, "outputs": [], "source": [ - "df" + "pivotTable = createPivotTable(df_moderna)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fdf2ab4b", + "metadata": {}, + "outputs": [], + "source": [ + "pivotTable" ] } ],