{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "9de5907f-18f5-4cb1-903e-26028ff1fa03", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "\n", "pd.set_option('display.max_rows', 100)\n", "pd.set_option('display.max_columns', None)" ] }, { "cell_type": "code", "execution_count": 2, "id": "233bc590", "metadata": {}, "outputs": [], "source": [ "def createDataFrame(dataDir, manufacturer):\n", " def read_csv(file, usecols):\n", " return pd.read_csv(file, index_col = 'VAERS_ID', encoding = 'latin1', low_memory = False, usecols = usecols)\n", "\n", " def createDataFrameForYear(year):\n", " folder = dataDir + \"/\" + year + \"VAERSData/\"\n", " return pd.merge(\n", " read_csv(folder + year + \"VAERSDATA.csv\", ['VAERS_ID', 'DIED', 'L_THREAT', 'DISABLE']),\n", " read_csv(folder + year + \"VAERSVAX.csv\", ['VAERS_ID', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT']),\n", " left_index = True,\n", " right_index = True)\n", "\n", " df = pd.concat([createDataFrameForYear(\"2021\"), createDataFrameForYear(\"2022\")])\n", " return df[(df[\"VAX_TYPE\"] == \"COVID19\") & (df[\"VAX_MANU\"] == manufacturer)]" ] }, { "cell_type": "code", "execution_count": 3, "id": "99945ca8", "metadata": {}, "outputs": [], "source": [ "def createBatchCodeTable(df):\n", " def filter(df, col):\n", " return df[df[col] == 'Y'][['VAX_LOT']]\n", "\n", " batchCodeTableDict = {\n", " 'ADRs': df[['VAX_LOT']].value_counts(),\n", " 'DEATHS': filter(df, 'DIED').value_counts(),\n", " 'DISABILITIES': filter(df, 'DISABLE').value_counts(),\n", " 'LIFE THREATENING ILLNESSES': filter(df, 'L_THREAT').value_counts()\n", " }\n", " return pd.concat(batchCodeTableDict, axis = 1).replace(to_replace = np.nan, value = 0)\n" ] }, { "cell_type": "code", "execution_count": 4, "id": "86e0e4f2", "metadata": {}, "outputs": [], "source": [ "def saveBatchCodeTable(manufacturer, excelFile):\n", " batchCodeTable = createBatchCodeTable(createDataFrame(\"VAERS\", manufacturer))\n", " display(manufacturer, batchCodeTable)\n", " batchCodeTable.to_excel(excelFile)" ] }, { "cell_type": "code", "execution_count": 5, "id": "ab170c16", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'MODERNA'" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ADRsDEATHSDISABILITIESLIFE THREATENING ILLNESSES
VAX_LOT
026L20A417742.035.028.0
039K20A416993.039.037.0
011J20A366137.033.028.0
013L20A319168.047.032.0
012L20A306872.030.030.0
...............
029L2VA10.00.00.0
029L2oa10.00.00.0
029L30A10.00.00.0
029L420A10.00.00.0
Ø38B21A10.00.00.0
\n", "

16284 rows × 4 columns

\n", "
" ], "text/plain": [ " ADRs DEATHS DISABILITIES LIFE THREATENING ILLNESSES\n", "VAX_LOT \n", "026L20A 4177 42.0 35.0 28.0\n", "039K20A 4169 93.0 39.0 37.0\n", "011J20A 3661 37.0 33.0 28.0\n", "013L20A 3191 68.0 47.0 32.0\n", "012L20A 3068 72.0 30.0 30.0\n", "... ... ... ... ...\n", "029L2VA 1 0.0 0.0 0.0\n", "029L2oa 1 0.0 0.0 0.0\n", "029L30A 1 0.0 0.0 0.0\n", "029L420A 1 0.0 0.0 0.0\n", "Ø38B21A 1 0.0 0.0 0.0\n", "\n", "[16284 rows x 4 columns]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "'PFIZER\\\\BIONTECH'" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ADRsDEATHSDISABILITIESLIFE THREATENING ILLNESSES
VAX_LOT
EK9231339248.056.035.0
ER2613334565.062.057.0
EN62012928148.069.055.0
EN53182811114.062.057.0
ER8732270550.051.068.0
...............
EN6203 UPC10.00.00.0
EN6203,10.00.00.0
EN6203, EL324710.00.00.0
EN6203, EN620410.00.00.0
ÉÑ619810.00.00.0
\n", "

12447 rows × 4 columns

\n", "
" ], "text/plain": [ " ADRs DEATHS DISABILITIES LIFE THREATENING ILLNESSES\n", "VAX_LOT \n", "EK9231 3392 48.0 56.0 35.0\n", "ER2613 3345 65.0 62.0 57.0\n", "EN6201 2928 148.0 69.0 55.0\n", "EN5318 2811 114.0 62.0 57.0\n", "ER8732 2705 50.0 51.0 68.0\n", "... ... ... ... ...\n", "EN6203 UPC 1 0.0 0.0 0.0\n", "EN6203, 1 0.0 0.0 0.0\n", "EN6203, EL3247 1 0.0 0.0 0.0\n", "EN6203, EN6204 1 0.0 0.0 0.0\n", "ÉÑ6198 1 0.0 0.0 0.0\n", "\n", "[12447 rows x 4 columns]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "'JANSSEN'" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ADRsDEATHSDISABILITIESLIFE THREATENING ILLNESSES
VAX_LOT
Unknown7312264.019.031.0
043A21A234737.031.049.0
042A21A217044.041.043.0
1805018175635.036.049.0
202A21A172625.021.025.0
...............
180E01810.00.00.0
180D06810.00.00.0
180C06810.00.00.0
180B98210.00.00.0
zz10.00.00.0
\n", "

2456 rows × 4 columns

\n", "
" ], "text/plain": [ " ADRs DEATHS DISABILITIES LIFE THREATENING ILLNESSES\n", "VAX_LOT \n", "Unknown 7312 264.0 19.0 31.0\n", "043A21A 2347 37.0 31.0 49.0\n", "042A21A 2170 44.0 41.0 43.0\n", "1805018 1756 35.0 36.0 49.0\n", "202A21A 1726 25.0 21.0 25.0\n", "... ... ... ... ...\n", "180E018 1 0.0 0.0 0.0\n", "180D068 1 0.0 0.0 0.0\n", "180C068 1 0.0 0.0 0.0\n", "180B982 1 0.0 0.0 0.0\n", "zz 1 0.0 0.0 0.0\n", "\n", "[2456 rows x 4 columns]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "saveBatchCodeTable(\"MODERNA\", \"results/moderna.xlsx\")\n", "saveBatchCodeTable(\"PFIZER\\BIONTECH\", \"results/pfizer.xlsx\")\n", "saveBatchCodeTable(\"JANSSEN\", \"results/janssen.xlsx\")" ] }, { "cell_type": "code", "execution_count": 6, "id": "9f506ac8", "metadata": {}, "outputs": [], "source": [ "import unittest" ] }, { "cell_type": "code", "execution_count": 7, "id": "e14465d7", "metadata": {}, "outputs": [], "source": [ "from pandas.testing import assert_frame_equal\n", "\n", "\n", "class HowBadIsMyBatchTest(unittest.TestCase):\n", "\n", " def test_createBatchCodeTable(self):\n", " # Given\n", " dataFrame = createDataFrame(\"test/VAERS\", \"MODERNA\")\n", " display(\"dataFrame:\", dataFrame)\n", "\n", " # When\n", " batchCodeTable = createBatchCodeTable(dataFrame)\n", " display(\"batchCodeTable:\", batchCodeTable)\n", "\n", " # Then\n", " batchCodeTableExpected = pd.DataFrame(\n", " {\n", " 'ADRs': [2, 1],\n", " 'DEATHS': [0, 1],\n", " 'DISABILITIES': [2, 0],\n", " 'LIFE THREATENING ILLNESSES': [0, 0]\n", " },\n", " index = pd.MultiIndex.from_arrays([['025L20A', '037K20A']], names = ('VAX_LOT',)))\n", " display(\"batchCodeTableExpected:\", batchCodeTableExpected)\n", " assert_frame_equal(batchCodeTable, batchCodeTableExpected, check_dtype = False)\n" ] }, { "cell_type": "code", "execution_count": 8, "id": "ef8f99c4", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "test_createBatchCodeTable (__main__.HowBadIsMyBatchTest) ... " ] }, { "data": { "text/plain": [ "'dataFrame:'" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
DIEDL_THREATDISABLEVAX_TYPEVAX_MANUVAX_LOT
VAERS_ID
916600YNaNNaNCOVID19MODERNA037K20A
916601NaNNaNYCOVID19MODERNA025L20A
1996874NaNNaNYCOVID19MODERNA025L20A
\n", "
" ], "text/plain": [ " DIED L_THREAT DISABLE VAX_TYPE VAX_MANU VAX_LOT\n", "VAERS_ID \n", "916600 Y NaN NaN COVID19 MODERNA 037K20A\n", "916601 NaN NaN Y COVID19 MODERNA 025L20A\n", "1996874 NaN NaN Y COVID19 MODERNA 025L20A" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "'batchCodeTable:'" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ADRsDEATHSDISABILITIESLIFE THREATENING ILLNESSES
VAX_LOT
025L20A20.02.00.0
037K20A11.00.00.0
\n", "
" ], "text/plain": [ " ADRs DEATHS DISABILITIES LIFE THREATENING ILLNESSES\n", "VAX_LOT \n", "025L20A 2 0.0 2.0 0.0\n", "037K20A 1 1.0 0.0 0.0" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "'batchCodeTableExpected:'" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ADRsDEATHSDISABILITIESLIFE THREATENING ILLNESSES
VAX_LOT
025L20A2020
037K20A1100
\n", "
" ], "text/plain": [ " ADRs DEATHS DISABILITIES LIFE THREATENING ILLNESSES\n", "VAX_LOT \n", "025L20A 2 0 2 0\n", "037K20A 1 1 0 0" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "ok\n", "\n", "----------------------------------------------------------------------\n", "Ran 1 test in 0.220s\n", "\n", "OK\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "unittest.main(argv = [''], verbosity = 2, exit = False)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.7" } }, "nbformat": 4, "nbformat_minor": 5 }