{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "9de5907f-18f5-4cb1-903e-26028ff1fa03", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "\n", "pd.set_option('display.max_rows', 100)\n", "pd.set_option('display.max_columns', None)" ] }, { "cell_type": "code", "execution_count": null, "id": "233bc590", "metadata": {}, "outputs": [], "source": [ "def createDataFrame(baseDir, manufacturer):\n", " def read_csv(file, usecols):\n", " return pd.read_csv(file, index_col='VAERS_ID', encoding='latin1', low_memory=False, usecols=usecols)\n", " \n", " def createDataFrameForYear(year):\n", " folder = baseDir + \"/VAERS/\" + year + \"VAERSData/\"\n", " return pd.merge(\n", " read_csv(folder + year + \"VAERSDATA.csv\", ['VAERS_ID','DIED', 'L_THREAT', 'DISABLE']),\n", " read_csv(folder + year + \"VAERSVAX.csv\", ['VAERS_ID','VAX_TYPE', 'VAX_MANU', 'VAX_LOT']),\n", " left_index=True,\n", " right_index=True)\n", " \n", " df = pd.concat([createDataFrameForYear(\"2021\"), createDataFrameForYear(\"2022\")])\n", " return df[(df[\"VAX_TYPE\"] == \"COVID19\") & (df[\"VAX_MANU\"] == manufacturer)]" ] }, { "cell_type": "code", "execution_count": null, "id": "99945ca8", "metadata": {}, "outputs": [], "source": [ "def createPivotTable(df):\n", " def filter(df, col):\n", " return df[df[col] == 'Y'][['VAX_LOT']]\n", "\n", " pivotTableDict = {\n", " 'ADRs': df[['VAX_LOT']].value_counts(),\n", " 'DEATHS': filter(df, 'DIED').value_counts(),\n", " 'DISABILITIES': filter(df, 'DISABLE').value_counts(),\n", " 'LIFE THREATENING ILLNESSES': filter(df, 'L_THREAT').value_counts()\n", " }\n", " return pd.concat(pivotTableDict, axis=1).replace(to_replace=np.nan, value=0)\n" ] }, { "cell_type": "code", "execution_count": null, "id": "86e0e4f2", "metadata": {}, "outputs": [], "source": [ "df_moderna = createDataFrame(\".\", \"MODERNA\")" ] }, { "cell_type": "code", "execution_count": null, "id": "ab170c16", "metadata": {}, "outputs": [], "source": [ "df_moderna" ] }, { "cell_type": "code", "execution_count": null, "id": "d9191d12", "metadata": {}, "outputs": [], "source": [ "pivotTable = createPivotTable(df_moderna)" ] }, { "cell_type": "code", "execution_count": null, "id": "bb7b2963", "metadata": { "scrolled": false }, "outputs": [], "source": [ "pivotTable" ] }, { "cell_type": "code", "execution_count": null, "id": "9f506ac8", "metadata": {}, "outputs": [], "source": [ "import unittest" ] }, { "cell_type": "code", "execution_count": null, "id": "e14465d7", "metadata": {}, "outputs": [], "source": [ "from pandas.testing import assert_frame_equal\n", "\n", "\n", "class HowBadIsMyBatchTest(unittest.TestCase):\n", "\n", " def test_createPivotTable(self):\n", " # Given\n", " pivotTable = createPivotTable(createDataFrame(\"test\", \"MODERNA\"))\n", "\n", " # When\n", " pivotTableExpected = pd.DataFrame(\n", " {\n", " 'ADRs': [2, 1],\n", " 'DEATHS': [0, 1],\n", " 'DISABILITIES': [2, 0],\n", " 'LIFE THREATENING ILLNESSES': [0.0, 0.0]\n", " },\n", " index=pd.MultiIndex.from_arrays([['025L20A', '037K20A']], names=('VAX_LOT',)))\n", " display(\"actual:\", pivotTable)\n", " display(\"expected:\", pivotTableExpected)\n", "\n", " # Then\n", " assert_frame_equal(pivotTable, pivotTableExpected, check_dtype=False)\n" ] }, { "cell_type": "code", "execution_count": null, "id": "ef8f99c4", "metadata": {}, "outputs": [], "source": [ "unittest.main(argv=[''], verbosity=2, exit=False)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.7" } }, "nbformat": 4, "nbformat_minor": 5 }