diff --git a/HowBadIsMyBatch.ipynb b/HowBadIsMyBatch.ipynb index ec8fc371664..93d589f2636 100644 --- a/HowBadIsMyBatch.ipynb +++ b/HowBadIsMyBatch.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "id": "9de5907f-18f5-4cb1-903e-26028ff1fa03", "metadata": {}, "outputs": [], @@ -16,7 +16,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "id": "a271254b", "metadata": {}, "outputs": [], @@ -61,7 +61,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "id": "7b5d6df0", "metadata": {}, "outputs": [], @@ -88,7 +88,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "id": "3ebcba86", "metadata": {}, "outputs": [], @@ -118,7 +118,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "id": "99945ca8", "metadata": {}, "outputs": [], @@ -129,14 +129,11 @@ " \n", " @staticmethod\n", " def createBatchCodeTable(df : pd.DataFrame):\n", - " def filterDataFrame(df, col):\n", - " return df[df[col] == 'Y']['VAX_LOT']\n", - "\n", " batchCodeTableDict = {\n", " 'ADRs': df['VAX_LOT'].value_counts(),\n", - " 'DEATHS': filterDataFrame(df, 'DIED').value_counts(),\n", - " 'DISABILITIES': filterDataFrame(df, 'DISABLE').value_counts(),\n", - " 'LIFE THREATENING ILLNESSES': filterDataFrame(df, 'L_THREAT').value_counts()\n", + " 'DEATHS': BatchCodeTableFactory._filterDataFrame(df, 'DIED').value_counts(),\n", + " 'DISABILITIES': BatchCodeTableFactory._filterDataFrame(df, 'DISABLE').value_counts(),\n", + " 'LIFE THREATENING ILLNESSES': BatchCodeTableFactory._filterDataFrame(df, 'L_THREAT').value_counts()\n", " }\n", " batchCodeTable = pd.concat(batchCodeTableDict, axis = 'columns')\n", " batchCodeTable.index.name = 'VAX_LOT'\n", @@ -146,16 +143,13 @@ " # FK-TODO: DRY with createBatchCodeTable()\n", " @staticmethod\n", " def createSevereEffectsBatchCodeTable(df : pd.DataFrame):\n", - " def filterDataFrame(df, col):\n", - " return df[df[col] == 'Y']['VAX_LOT']\n", - "\n", " batchCodeTableDict = {\n", " 'ADRs': df['VAX_LOT'].value_counts(),\n", - " 'DEATHS': filterDataFrame(df, 'DIED').value_counts(),\n", - " 'DISABILITIES': filterDataFrame(df, 'DISABLE').value_counts(),\n", - " 'LIFE THREATENING ILLNESSES': filterDataFrame(df, 'L_THREAT').value_counts(),\n", - " 'HOSPITALISATIONS': filterDataFrame(df, 'HOSPITAL').value_counts(),\n", - " 'EMERGENCY ROOM OR DOCTOR VISITS': filterDataFrame(df, 'ER_VISIT').value_counts()\n", + " 'DEATHS': BatchCodeTableFactory._filterDataFrame(df, 'DIED').value_counts(),\n", + " 'DISABILITIES': BatchCodeTableFactory._filterDataFrame(df, 'DISABLE').value_counts(),\n", + " 'LIFE THREATENING ILLNESSES': BatchCodeTableFactory._filterDataFrame(df, 'L_THREAT').value_counts(),\n", + " 'HOSPITALISATIONS': BatchCodeTableFactory._filterDataFrame(df, 'HOSPITAL').value_counts(),\n", + " 'EMERGENCY ROOM OR DOCTOR VISITS': BatchCodeTableFactory._filterDataFrame(df, 'ER_VISIT').value_counts()\n", " }\n", " batchCodeTable = pd.concat(batchCodeTableDict, axis = 'columns')\n", " batchCodeTable.index.name = 'VAX_LOT'\n", @@ -170,6 +164,10 @@ " return batchCodeTable.replace(to_replace = np.nan, value = 0)\n", "\n", " @staticmethod\n", + " def _filterDataFrame(df, col):\n", + " return df[df[col] == 'Y']['VAX_LOT']\n", + "\n", + " @staticmethod\n", " def _createManufacturerByBatchCodeTable(df):\n", " manufacturerByBatchCodeTable = df[['VAX_LOT', 'VAX_MANU']]\n", " manufacturerByBatchCodeTable = manufacturerByBatchCodeTable.drop_duplicates(subset = ['VAX_LOT'])\n", @@ -182,7 +180,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": null, "id": "3dacedfd", "metadata": {}, "outputs": [], @@ -192,7 +190,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": null, "id": "e59a1825", "metadata": {}, "outputs": [], @@ -366,7 +364,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": null, "id": "e14465d7", "metadata": {}, "outputs": [], @@ -445,7 +443,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": null, "id": "ded70c87", "metadata": {}, "outputs": [], @@ -502,46 +500,17 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "id": "5a8bff1b", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "test_createBatchCodeTable (__main__.BatchCodeTableTest) ... ok\n", - "test_createBatchCodeTable2 (__main__.BatchCodeTableTest) ... ok\n", - "test_createAndFilterDataFrameFromDescrs (__main__.CreateAndFilterDataFrameTest) ... ok\n", - "test_createAndFilterDataFrameFromDescrsWithFirstDose (__main__.CreateAndFilterDataFrameTest) ... ok\n", - "test_createAndFilterDataFrameFromDescrsWithSecondDose (__main__.CreateAndFilterDataFrameTest) ... ok\n", - "test_createDataFrameFromForSevereEffects (__main__.CreateAndFilterDataFrameTest) ... ok\n", - "test_createSevereEffectsBatchCodeTable (__main__.SevereEffectsBatchCodeTableTest) ... ok\n", - "\n", - "----------------------------------------------------------------------\n", - "Ran 7 tests in 0.143s\n", - "\n", - "OK\n" - ] - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "unittest.main(argv = [''], verbosity = 2, exit = False)" ] }, { "cell_type": "code", - "execution_count": 32, + "execution_count": null, "id": "86e0e4f2", "metadata": {}, "outputs": [], @@ -557,452 +526,10 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": null, "id": "ab170c16", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'MODERNA'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ADRsDEATHSDISABILITIESLIFE THREATENING ILLNESSES
VAX_LOT
039K20A333067.022.029.0
026L20A332725.024.021.0
011J20A307233.027.026.0
025L20A239142.016.026.0
012L20A235257.028.022.0
...............
0481az11a10.00.00.0
fc002B21A10.00.00.0
0021321N10.00.00.0
elg26310.00.00.0
0840Z1A10.00.01.0
\n", - "

10248 rows × 4 columns

\n", - "
" - ], - "text/plain": [ - " ADRs DEATHS DISABILITIES LIFE THREATENING ILLNESSES\n", - "VAX_LOT \n", - "039K20A 3330 67.0 22.0 29.0\n", - "026L20A 3327 25.0 24.0 21.0\n", - "011J20A 3072 33.0 27.0 26.0\n", - "025L20A 2391 42.0 16.0 26.0\n", - "012L20A 2352 57.0 28.0 22.0\n", - "... ... ... ... ...\n", - "0481az11a 1 0.0 0.0 0.0\n", - "fc002B21A 1 0.0 0.0 0.0\n", - "0021321N 1 0.0 0.0 0.0\n", - "elg263 1 0.0 0.0 0.0\n", - "0840Z1A 1 0.0 0.0 1.0\n", - "\n", - "[10248 rows x 4 columns]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'PFIZER\\\\BIONTECH'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ADRsDEATHSDISABILITIESLIFE THREATENING ILLNESSES
VAX_LOT
EK5730174118.025.016.0
EK9231163137.019.021.0
EH9899157014.036.018.0
EN6205148725.037.031.0
EN6208147533.027.025.0
...............
EWD015310.00.00.0
EW016010.00.01.0
EP 620310.00.00.0
En 6208. A10.00.00.0
FJ116110.00.00.0
\n", - "

6765 rows × 4 columns

\n", - "
" - ], - "text/plain": [ - " ADRs DEATHS DISABILITIES LIFE THREATENING ILLNESSES\n", - "VAX_LOT \n", - "EK5730 1741 18.0 25.0 16.0\n", - "EK9231 1631 37.0 19.0 21.0\n", - "EH9899 1570 14.0 36.0 18.0\n", - "EN6205 1487 25.0 37.0 31.0\n", - "EN6208 1475 33.0 27.0 25.0\n", - "... ... ... ... ...\n", - "EWD0153 1 0.0 0.0 0.0\n", - "EW0160 1 0.0 0.0 1.0\n", - "EP 6203 1 0.0 0.0 0.0\n", - "En 6208. A 1 0.0 0.0 0.0\n", - "FJ1161 1 0.0 0.0 0.0\n", - "\n", - "[6765 rows x 4 columns]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'JANSSEN'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ADRsDEATHSDISABILITIESLIFE THREATENING ILLNESSES
VAX_LOT
043A21A169228.029.037.0
042A21A146143.033.035.0
202A21A115922.016.021.0
201A21A112915.027.024.0
1805018112932.030.039.0
...............
MVP-COVID-19 1810.00.01.0
203921910.00.00.0
180798210.00.00.0
043az1n10.00.00.0
213s21a10.00.00.0
\n", - "

1797 rows × 4 columns

\n", - "
" - ], - "text/plain": [ - " ADRs DEATHS DISABILITIES LIFE THREATENING ILLNESSES\n", - "VAX_LOT \n", - "043A21A 1692 28.0 29.0 37.0\n", - "042A21A 1461 43.0 33.0 35.0\n", - "202A21A 1159 22.0 16.0 21.0\n", - "201A21A 1129 15.0 27.0 24.0\n", - "1805018 1129 32.0 30.0 39.0\n", - "... ... ... ... ...\n", - "MVP-COVID-19 18 1 0.0 0.0 1.0\n", - "2039219 1 0.0 0.0 0.0\n", - "1807982 1 0.0 0.0 0.0\n", - "043az1n 1 0.0 0.0 0.0\n", - "213s21a 1 0.0 0.0 0.0\n", - "\n", - "[1797 rows x 4 columns]" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "saveBatchCodeTable(\"MODERNA\", \"results/moderna.xlsx\")\n", "saveBatchCodeTable(\"PFIZER\\BIONTECH\", \"results/pfizer.xlsx\")\n", @@ -1011,7 +538,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": null, "id": "bc56831d", "metadata": {}, "outputs": [], @@ -1027,225 +554,10 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": null, "id": "ace3fed9", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'severeEffectsBatchCodeTable'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ADRsDEATHSDISABILITIESLIFE THREATENING ILLNESSESHOSPITALISATIONSEMERGENCY ROOM OR DOCTOR VISITSCOMPANY
VAX_LOT
026L20A333525.024.021.0125.00.0MODERNA
039K20A333567.022.029.0134.00.0MODERNA
011J20A307834.027.026.0106.00.0MODERNA
025L20A239742.016.026.083.00.0MODERNA
012L20A235757.029.022.0135.01.0MODERNA
........................
EN6207-10.00.00.00.00.0PFIZER\\BIONTECH
039820A10.00.00.00.00.0MODERNA
Blue-21810.00.00.00.00.0MODERNA
180897310.00.00.00.00.0UNKNOWN MANUFACTURER
FJ116110.00.00.00.00.0PFIZER\\BIONTECH
\n", - "

18428 rows × 7 columns

\n", - "
" - ], - "text/plain": [ - " ADRs DEATHS DISABILITIES LIFE THREATENING ILLNESSES \\\n", - "VAX_LOT \n", - "026L20A 3335 25.0 24.0 21.0 \n", - "039K20A 3335 67.0 22.0 29.0 \n", - "011J20A 3078 34.0 27.0 26.0 \n", - "025L20A 2397 42.0 16.0 26.0 \n", - "012L20A 2357 57.0 29.0 22.0 \n", - "... ... ... ... ... \n", - "EN6207- 1 0.0 0.0 0.0 \n", - "039820A 1 0.0 0.0 0.0 \n", - "Blue-218 1 0.0 0.0 0.0 \n", - "1808973 1 0.0 0.0 0.0 \n", - "FJ1161 1 0.0 0.0 0.0 \n", - "\n", - " HOSPITALISATIONS EMERGENCY ROOM OR DOCTOR VISITS \\\n", - "VAX_LOT \n", - "026L20A 125.0 0.0 \n", - "039K20A 134.0 0.0 \n", - "011J20A 106.0 0.0 \n", - "025L20A 83.0 0.0 \n", - "012L20A 135.0 1.0 \n", - "... ... ... \n", - "EN6207- 0.0 0.0 \n", - "039820A 0.0 0.0 \n", - "Blue-218 0.0 0.0 \n", - "1808973 0.0 0.0 \n", - "FJ1161 0.0 0.0 \n", - "\n", - " COMPANY \n", - "VAX_LOT \n", - "026L20A MODERNA \n", - "039K20A MODERNA \n", - "011J20A MODERNA \n", - "025L20A MODERNA \n", - "012L20A MODERNA \n", - "... ... \n", - "EN6207- PFIZER\\BIONTECH \n", - "039820A MODERNA \n", - "Blue-218 MODERNA \n", - "1808973 UNKNOWN MANUFACTURER \n", - "FJ1161 PFIZER\\BIONTECH \n", - "\n", - "[18428 rows x 7 columns]" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "saveSevereEffectsBatchCodeTable('results/severeEffects.xlsx')" ]