{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "9de5907f-18f5-4cb1-903e-26028ff1fa03", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "\n", "pd.set_option('display.max_rows', 100)\n", "pd.set_option('display.max_columns', None)" ] }, { "cell_type": "code", "execution_count": null, "id": "233bc590", "metadata": {}, "outputs": [], "source": [ "def createDataFrame(dataDir, manufacturer):\n", " def read_csv(file, usecols):\n", " return pd.read_csv(file, index_col = 'VAERS_ID', encoding = 'latin1', low_memory = False, usecols = usecols)\n", "\n", " def createDataFrameForYear(year):\n", " folder = dataDir + \"/\" + year + \"VAERSData/\"\n", " return pd.merge(\n", " read_csv(folder + year + \"VAERSDATA.csv\", ['VAERS_ID', 'DIED', 'L_THREAT', 'DISABLE']),\n", " read_csv(folder + year + \"VAERSVAX.csv\", ['VAERS_ID', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT']),\n", " left_index = True,\n", " right_index = True)\n", "\n", " df = pd.concat([createDataFrameForYear(\"2021\"), createDataFrameForYear(\"2022\")])\n", " return df[(df[\"VAX_TYPE\"] == \"COVID19\") & (df[\"VAX_MANU\"] == manufacturer)]" ] }, { "cell_type": "code", "execution_count": null, "id": "99945ca8", "metadata": {}, "outputs": [], "source": [ "def createBatchCodeTable(df):\n", " def filter(df, col):\n", " return df[df[col] == 'Y'][['VAX_LOT']]\n", "\n", " batchCodeTableDict = {\n", " 'ADRs': df[['VAX_LOT']].value_counts(),\n", " 'DEATHS': filter(df, 'DIED').value_counts(),\n", " 'DISABILITIES': filter(df, 'DISABLE').value_counts(),\n", " 'LIFE THREATENING ILLNESSES': filter(df, 'L_THREAT').value_counts()\n", " }\n", " return pd.concat(batchCodeTableDict, axis=1).replace(to_replace=np.nan, value=0)\n" ] }, { "cell_type": "code", "execution_count": null, "id": "86e0e4f2", "metadata": {}, "outputs": [], "source": [ "def saveBatchCodeTable(manufacturer, excelFile):\n", " createBatchCodeTable(createDataFrame(\"VAERS\", manufacturer)).to_excel(excelFile)\n" ] }, { "cell_type": "code", "execution_count": null, "id": "ab170c16", "metadata": {}, "outputs": [], "source": [ "saveBatchCodeTable(\"MODERNA\", \"results/moderna.xlsx\")\n", "saveBatchCodeTable(\"PFIZER\\BIONTECH\", \"results/pfizer.xlsx\")\n", "saveBatchCodeTable(\"JANSSEN\", \"results/janssen.xlsx\")" ] }, { "cell_type": "code", "execution_count": null, "id": "9f506ac8", "metadata": {}, "outputs": [], "source": [ "import unittest" ] }, { "cell_type": "code", "execution_count": null, "id": "e14465d7", "metadata": {}, "outputs": [], "source": [ "from pandas.testing import assert_frame_equal\n", "\n", "\n", "class HowBadIsMyBatchTest(unittest.TestCase):\n", "\n", " def test_createBatchCodeTable(self):\n", " # Given\n", " dataFrame = createDataFrame(\"test/VAERS\", \"MODERNA\")\n", " display(\"dataFrame:\", dataFrame)\n", "\n", " # When\n", " batchCodeTable = createBatchCodeTable(dataFrame)\n", " display(\"batchCodeTable:\", batchCodeTable)\n", "\n", " # Then\n", " batchCodeTableExpected = pd.DataFrame(\n", " {\n", " 'ADRs': [2, 1],\n", " 'DEATHS': [0, 1],\n", " 'DISABILITIES': [2, 0],\n", " 'LIFE THREATENING ILLNESSES': [0, 0]\n", " },\n", " index = pd.MultiIndex.from_arrays([['025L20A', '037K20A']], names = ('VAX_LOT',)))\n", " display(\"batchCodeTableExpected:\", batchCodeTableExpected)\n", " assert_frame_equal(batchCodeTable, batchCodeTableExpected, check_dtype = False)\n" ] }, { "cell_type": "code", "execution_count": null, "id": "ef8f99c4", "metadata": {}, "outputs": [], "source": [ "unittest.main(argv=[''], verbosity=2, exit=False)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.7" } }, "nbformat": 4, "nbformat_minor": 5 }