{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "9de5907f-18f5-4cb1-903e-26028ff1fa03",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"pd.set_option('display.max_rows', 100)\n",
"pd.set_option('display.max_columns', None)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "233bc590",
"metadata": {},
"outputs": [],
"source": [
"def createDataFrame(dataDir, manufacturer):\n",
" def read_csv(file, usecols):\n",
" return pd.read_csv(file, index_col = 'VAERS_ID', encoding = 'latin1', low_memory = False, usecols = usecols)\n",
"\n",
" def createDataFrameForYear(year):\n",
" folder = dataDir + \"/\" + year + \"VAERSData/\"\n",
" return pd.merge(\n",
" read_csv(folder + year + \"VAERSDATA.csv\", ['VAERS_ID', 'DIED', 'L_THREAT', 'DISABLE']),\n",
" read_csv(folder + year + \"VAERSVAX.csv\", ['VAERS_ID', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT']),\n",
" left_index = True,\n",
" right_index = True)\n",
"\n",
" df = pd.concat([createDataFrameForYear(\"2021\"), createDataFrameForYear(\"2022\")])\n",
" return df[(df[\"VAX_TYPE\"] == \"COVID19\") & (df[\"VAX_MANU\"] == manufacturer)]"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "99945ca8",
"metadata": {},
"outputs": [],
"source": [
"def createBatchCodeTable(df):\n",
" def filter(df, col):\n",
" return df[df[col] == 'Y'][['VAX_LOT']]\n",
"\n",
" batchCodeTableDict = {\n",
" 'ADRs': df[['VAX_LOT']].value_counts(),\n",
" 'DEATHS': filter(df, 'DIED').value_counts(),\n",
" 'DISABILITIES': filter(df, 'DISABLE').value_counts(),\n",
" 'LIFE THREATENING ILLNESSES': filter(df, 'L_THREAT').value_counts()\n",
" }\n",
" return pd.concat(batchCodeTableDict, axis = 1).replace(to_replace = np.nan, value = 0)\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "86e0e4f2",
"metadata": {},
"outputs": [],
"source": [
"def saveBatchCodeTable(manufacturer, excelFile):\n",
" batchCodeTable = createBatchCodeTable(createDataFrame(\"VAERS\", manufacturer))\n",
" display(manufacturer, batchCodeTable)\n",
" batchCodeTable.to_excel(excelFile)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "ab170c16",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'MODERNA'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" ADRs | \n",
" DEATHS | \n",
" DISABILITIES | \n",
" LIFE THREATENING ILLNESSES | \n",
"
\n",
" \n",
" | VAX_LOT | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" | 026L20A | \n",
" 4177 | \n",
" 42.0 | \n",
" 35.0 | \n",
" 28.0 | \n",
"
\n",
" \n",
" | 039K20A | \n",
" 4169 | \n",
" 93.0 | \n",
" 39.0 | \n",
" 37.0 | \n",
"
\n",
" \n",
" | 011J20A | \n",
" 3661 | \n",
" 37.0 | \n",
" 33.0 | \n",
" 28.0 | \n",
"
\n",
" \n",
" | 013L20A | \n",
" 3191 | \n",
" 68.0 | \n",
" 47.0 | \n",
" 32.0 | \n",
"
\n",
" \n",
" | 012L20A | \n",
" 3068 | \n",
" 72.0 | \n",
" 30.0 | \n",
" 30.0 | \n",
"
\n",
" \n",
" | ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" | 029L2VA | \n",
" 1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" | 029L2oa | \n",
" 1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" | 029L30A | \n",
" 1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" | 029L420A | \n",
" 1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" | Ø38B21A | \n",
" 1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
"
\n",
"
16284 rows × 4 columns
\n",
"
"
],
"text/plain": [
" ADRs DEATHS DISABILITIES LIFE THREATENING ILLNESSES\n",
"VAX_LOT \n",
"026L20A 4177 42.0 35.0 28.0\n",
"039K20A 4169 93.0 39.0 37.0\n",
"011J20A 3661 37.0 33.0 28.0\n",
"013L20A 3191 68.0 47.0 32.0\n",
"012L20A 3068 72.0 30.0 30.0\n",
"... ... ... ... ...\n",
"029L2VA 1 0.0 0.0 0.0\n",
"029L2oa 1 0.0 0.0 0.0\n",
"029L30A 1 0.0 0.0 0.0\n",
"029L420A 1 0.0 0.0 0.0\n",
"Ø38B21A 1 0.0 0.0 0.0\n",
"\n",
"[16284 rows x 4 columns]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'PFIZER\\\\BIONTECH'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" ADRs | \n",
" DEATHS | \n",
" DISABILITIES | \n",
" LIFE THREATENING ILLNESSES | \n",
"
\n",
" \n",
" | VAX_LOT | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" | EK9231 | \n",
" 3392 | \n",
" 48.0 | \n",
" 56.0 | \n",
" 35.0 | \n",
"
\n",
" \n",
" | ER2613 | \n",
" 3345 | \n",
" 65.0 | \n",
" 62.0 | \n",
" 57.0 | \n",
"
\n",
" \n",
" | EN6201 | \n",
" 2928 | \n",
" 148.0 | \n",
" 69.0 | \n",
" 55.0 | \n",
"
\n",
" \n",
" | EN5318 | \n",
" 2811 | \n",
" 114.0 | \n",
" 62.0 | \n",
" 57.0 | \n",
"
\n",
" \n",
" | ER8732 | \n",
" 2705 | \n",
" 50.0 | \n",
" 51.0 | \n",
" 68.0 | \n",
"
\n",
" \n",
" | ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" | EN6203 UPC | \n",
" 1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" | EN6203, | \n",
" 1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" | EN6203, EL3247 | \n",
" 1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" | EN6203, EN6204 | \n",
" 1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" | ÉÑ6198 | \n",
" 1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
"
\n",
"
12447 rows × 4 columns
\n",
"
"
],
"text/plain": [
" ADRs DEATHS DISABILITIES LIFE THREATENING ILLNESSES\n",
"VAX_LOT \n",
"EK9231 3392 48.0 56.0 35.0\n",
"ER2613 3345 65.0 62.0 57.0\n",
"EN6201 2928 148.0 69.0 55.0\n",
"EN5318 2811 114.0 62.0 57.0\n",
"ER8732 2705 50.0 51.0 68.0\n",
"... ... ... ... ...\n",
"EN6203 UPC 1 0.0 0.0 0.0\n",
"EN6203, 1 0.0 0.0 0.0\n",
"EN6203, EL3247 1 0.0 0.0 0.0\n",
"EN6203, EN6204 1 0.0 0.0 0.0\n",
"ÉÑ6198 1 0.0 0.0 0.0\n",
"\n",
"[12447 rows x 4 columns]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'JANSSEN'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" ADRs | \n",
" DEATHS | \n",
" DISABILITIES | \n",
" LIFE THREATENING ILLNESSES | \n",
"
\n",
" \n",
" | VAX_LOT | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" | Unknown | \n",
" 7312 | \n",
" 264.0 | \n",
" 19.0 | \n",
" 31.0 | \n",
"
\n",
" \n",
" | 043A21A | \n",
" 2347 | \n",
" 37.0 | \n",
" 31.0 | \n",
" 49.0 | \n",
"
\n",
" \n",
" | 042A21A | \n",
" 2170 | \n",
" 44.0 | \n",
" 41.0 | \n",
" 43.0 | \n",
"
\n",
" \n",
" | 1805018 | \n",
" 1756 | \n",
" 35.0 | \n",
" 36.0 | \n",
" 49.0 | \n",
"
\n",
" \n",
" | 202A21A | \n",
" 1726 | \n",
" 25.0 | \n",
" 21.0 | \n",
" 25.0 | \n",
"
\n",
" \n",
" | ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" | 180E018 | \n",
" 1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" | 180D068 | \n",
" 1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" | 180C068 | \n",
" 1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" | 180B982 | \n",
" 1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" | zz | \n",
" 1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
"
\n",
"
2456 rows × 4 columns
\n",
"
"
],
"text/plain": [
" ADRs DEATHS DISABILITIES LIFE THREATENING ILLNESSES\n",
"VAX_LOT \n",
"Unknown 7312 264.0 19.0 31.0\n",
"043A21A 2347 37.0 31.0 49.0\n",
"042A21A 2170 44.0 41.0 43.0\n",
"1805018 1756 35.0 36.0 49.0\n",
"202A21A 1726 25.0 21.0 25.0\n",
"... ... ... ... ...\n",
"180E018 1 0.0 0.0 0.0\n",
"180D068 1 0.0 0.0 0.0\n",
"180C068 1 0.0 0.0 0.0\n",
"180B982 1 0.0 0.0 0.0\n",
"zz 1 0.0 0.0 0.0\n",
"\n",
"[2456 rows x 4 columns]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"saveBatchCodeTable(\"MODERNA\", \"results/moderna.xlsx\")\n",
"saveBatchCodeTable(\"PFIZER\\BIONTECH\", \"results/pfizer.xlsx\")\n",
"saveBatchCodeTable(\"JANSSEN\", \"results/janssen.xlsx\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "9f506ac8",
"metadata": {},
"outputs": [],
"source": [
"import unittest"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "e14465d7",
"metadata": {},
"outputs": [],
"source": [
"from pandas.testing import assert_frame_equal\n",
"\n",
"\n",
"class HowBadIsMyBatchTest(unittest.TestCase):\n",
"\n",
" def test_createBatchCodeTable(self):\n",
" # Given\n",
" dataFrame = createDataFrame(\"test/VAERS\", \"MODERNA\")\n",
" display(\"dataFrame:\", dataFrame)\n",
"\n",
" # When\n",
" batchCodeTable = createBatchCodeTable(dataFrame)\n",
" display(\"batchCodeTable:\", batchCodeTable)\n",
"\n",
" # Then\n",
" batchCodeTableExpected = pd.DataFrame(\n",
" {\n",
" 'ADRs': [2, 1],\n",
" 'DEATHS': [0, 1],\n",
" 'DISABILITIES': [2, 0],\n",
" 'LIFE THREATENING ILLNESSES': [0, 0]\n",
" },\n",
" index = pd.MultiIndex.from_arrays([['025L20A', '037K20A']], names = ('VAX_LOT',)))\n",
" display(\"batchCodeTableExpected:\", batchCodeTableExpected)\n",
" assert_frame_equal(batchCodeTable, batchCodeTableExpected, check_dtype = False)\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "ef8f99c4",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"test_createBatchCodeTable (__main__.HowBadIsMyBatchTest) ... "
]
},
{
"data": {
"text/plain": [
"'dataFrame:'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" DIED | \n",
" L_THREAT | \n",
" DISABLE | \n",
" VAX_TYPE | \n",
" VAX_MANU | \n",
" VAX_LOT | \n",
"
\n",
" \n",
" | VAERS_ID | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" | 916600 | \n",
" Y | \n",
" NaN | \n",
" NaN | \n",
" COVID19 | \n",
" MODERNA | \n",
" 037K20A | \n",
"
\n",
" \n",
" | 916601 | \n",
" NaN | \n",
" NaN | \n",
" Y | \n",
" COVID19 | \n",
" MODERNA | \n",
" 025L20A | \n",
"
\n",
" \n",
" | 1996874 | \n",
" NaN | \n",
" NaN | \n",
" Y | \n",
" COVID19 | \n",
" MODERNA | \n",
" 025L20A | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" DIED L_THREAT DISABLE VAX_TYPE VAX_MANU VAX_LOT\n",
"VAERS_ID \n",
"916600 Y NaN NaN COVID19 MODERNA 037K20A\n",
"916601 NaN NaN Y COVID19 MODERNA 025L20A\n",
"1996874 NaN NaN Y COVID19 MODERNA 025L20A"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'batchCodeTable:'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" ADRs | \n",
" DEATHS | \n",
" DISABILITIES | \n",
" LIFE THREATENING ILLNESSES | \n",
"
\n",
" \n",
" | VAX_LOT | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" | 025L20A | \n",
" 2 | \n",
" 0.0 | \n",
" 2.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" | 037K20A | \n",
" 1 | \n",
" 1.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" ADRs DEATHS DISABILITIES LIFE THREATENING ILLNESSES\n",
"VAX_LOT \n",
"025L20A 2 0.0 2.0 0.0\n",
"037K20A 1 1.0 0.0 0.0"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'batchCodeTableExpected:'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" ADRs | \n",
" DEATHS | \n",
" DISABILITIES | \n",
" LIFE THREATENING ILLNESSES | \n",
"
\n",
" \n",
" | VAX_LOT | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" | 025L20A | \n",
" 2 | \n",
" 0 | \n",
" 2 | \n",
" 0 | \n",
"
\n",
" \n",
" | 037K20A | \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" ADRs DEATHS DISABILITIES LIFE THREATENING ILLNESSES\n",
"VAX_LOT \n",
"025L20A 2 0 2 0\n",
"037K20A 1 1 0 0"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"ok\n",
"\n",
"----------------------------------------------------------------------\n",
"Ran 1 test in 0.220s\n",
"\n",
"OK\n"
]
},
{
"data": {
"text/plain": [
""
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"unittest.main(argv = [''], verbosity = 2, exit = False)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}