From 573c6038b250695c784bf80f4b1bf1cb3b704441 Mon Sep 17 00:00:00 2001 From: frankknoll Date: Tue, 1 Feb 2022 09:39:47 +0100 Subject: [PATCH] refactoring --- HowBadIsMyBatch.ipynb | 730 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 709 insertions(+), 21 deletions(-) diff --git a/HowBadIsMyBatch.ipynb b/HowBadIsMyBatch.ipynb index a56e8a30030..ec8fc371664 100644 --- a/HowBadIsMyBatch.ipynb +++ b/HowBadIsMyBatch.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "id": "9de5907f-18f5-4cb1-903e-26028ff1fa03", "metadata": {}, "outputs": [], @@ -16,7 +16,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "id": "a271254b", "metadata": {}, "outputs": [], @@ -61,7 +61,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "id": "7b5d6df0", "metadata": {}, "outputs": [], @@ -88,7 +88,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "id": "3ebcba86", "metadata": {}, "outputs": [], @@ -118,7 +118,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "id": "99945ca8", "metadata": {}, "outputs": [], @@ -130,15 +130,17 @@ " @staticmethod\n", " def createBatchCodeTable(df : pd.DataFrame):\n", " def filterDataFrame(df, col):\n", - " return df[df[col] == 'Y'][['VAX_LOT']]\n", + " return df[df[col] == 'Y']['VAX_LOT']\n", "\n", " batchCodeTableDict = {\n", - " 'ADRs': df[['VAX_LOT']].value_counts(),\n", + " 'ADRs': df['VAX_LOT'].value_counts(),\n", " 'DEATHS': filterDataFrame(df, 'DIED').value_counts(),\n", " 'DISABILITIES': filterDataFrame(df, 'DISABLE').value_counts(),\n", " 'LIFE THREATENING ILLNESSES': filterDataFrame(df, 'L_THREAT').value_counts()\n", " }\n", - " return pd.concat(batchCodeTableDict, axis = 'columns').replace(to_replace = np.nan, value = 0)\n", + " batchCodeTable = pd.concat(batchCodeTableDict, axis = 'columns')\n", + " batchCodeTable.index.name = 'VAX_LOT'\n", + " return batchCodeTable.replace(to_replace = np.nan, value = 0)\n", "\n", " # create table from https://www.howbadismybatch.com/combined.html\n", " # FK-TODO: DRY with createBatchCodeTable()\n", @@ -180,7 +182,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "id": "3dacedfd", "metadata": {}, "outputs": [], @@ -190,7 +192,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "id": "e59a1825", "metadata": {}, "outputs": [], @@ -364,7 +366,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "id": "e14465d7", "metadata": {}, "outputs": [], @@ -434,7 +436,7 @@ " 'DISABILITIES': [2, 0],\n", " 'LIFE THREATENING ILLNESSES': [0, 0]\n", " },\n", - " index = pd.MultiIndex.from_arrays([['025L20A', '037K20A']], names = ('VAX_LOT',)))\n", + " index = pd.Index(['025L20A', '037K20A'], name = 'VAX_LOT'))\n", " assert_frame_equal(batchCodeTable, batchCodeTableExpected, check_dtype = False)\n", "\n", " def createDataFrame(self, index, columns, data, dtypes = {}):\n", @@ -443,7 +445,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "id": "ded70c87", "metadata": {}, "outputs": [], @@ -500,17 +502,46 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 30, "id": "5a8bff1b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "test_createBatchCodeTable (__main__.BatchCodeTableTest) ... ok\n", + "test_createBatchCodeTable2 (__main__.BatchCodeTableTest) ... ok\n", + "test_createAndFilterDataFrameFromDescrs (__main__.CreateAndFilterDataFrameTest) ... ok\n", + "test_createAndFilterDataFrameFromDescrsWithFirstDose (__main__.CreateAndFilterDataFrameTest) ... ok\n", + "test_createAndFilterDataFrameFromDescrsWithSecondDose (__main__.CreateAndFilterDataFrameTest) ... ok\n", + "test_createDataFrameFromForSevereEffects (__main__.CreateAndFilterDataFrameTest) ... ok\n", + "test_createSevereEffectsBatchCodeTable (__main__.SevereEffectsBatchCodeTableTest) ... ok\n", + "\n", + "----------------------------------------------------------------------\n", + "Ran 7 tests in 0.143s\n", + "\n", + "OK\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "unittest.main(argv = [''], verbosity = 2, exit = False)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 32, "id": "86e0e4f2", "metadata": {}, "outputs": [], @@ -526,10 +557,452 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 33, "id": "ab170c16", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'MODERNA'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ADRsDEATHSDISABILITIESLIFE THREATENING ILLNESSES
VAX_LOT
039K20A333067.022.029.0
026L20A332725.024.021.0
011J20A307233.027.026.0
025L20A239142.016.026.0
012L20A235257.028.022.0
...............
0481az11a10.00.00.0
fc002B21A10.00.00.0
0021321N10.00.00.0
elg26310.00.00.0
0840Z1A10.00.01.0
\n", + "

10248 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " ADRs DEATHS DISABILITIES LIFE THREATENING ILLNESSES\n", + "VAX_LOT \n", + "039K20A 3330 67.0 22.0 29.0\n", + "026L20A 3327 25.0 24.0 21.0\n", + "011J20A 3072 33.0 27.0 26.0\n", + "025L20A 2391 42.0 16.0 26.0\n", + "012L20A 2352 57.0 28.0 22.0\n", + "... ... ... ... ...\n", + "0481az11a 1 0.0 0.0 0.0\n", + "fc002B21A 1 0.0 0.0 0.0\n", + "0021321N 1 0.0 0.0 0.0\n", + "elg263 1 0.0 0.0 0.0\n", + "0840Z1A 1 0.0 0.0 1.0\n", + "\n", + "[10248 rows x 4 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'PFIZER\\\\BIONTECH'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ADRsDEATHSDISABILITIESLIFE THREATENING ILLNESSES
VAX_LOT
EK5730174118.025.016.0
EK9231163137.019.021.0
EH9899157014.036.018.0
EN6205148725.037.031.0
EN6208147533.027.025.0
...............
EWD015310.00.00.0
EW016010.00.01.0
EP 620310.00.00.0
En 6208. A10.00.00.0
FJ116110.00.00.0
\n", + "

6765 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " ADRs DEATHS DISABILITIES LIFE THREATENING ILLNESSES\n", + "VAX_LOT \n", + "EK5730 1741 18.0 25.0 16.0\n", + "EK9231 1631 37.0 19.0 21.0\n", + "EH9899 1570 14.0 36.0 18.0\n", + "EN6205 1487 25.0 37.0 31.0\n", + "EN6208 1475 33.0 27.0 25.0\n", + "... ... ... ... ...\n", + "EWD0153 1 0.0 0.0 0.0\n", + "EW0160 1 0.0 0.0 1.0\n", + "EP 6203 1 0.0 0.0 0.0\n", + "En 6208. A 1 0.0 0.0 0.0\n", + "FJ1161 1 0.0 0.0 0.0\n", + "\n", + "[6765 rows x 4 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'JANSSEN'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ADRsDEATHSDISABILITIESLIFE THREATENING ILLNESSES
VAX_LOT
043A21A169228.029.037.0
042A21A146143.033.035.0
202A21A115922.016.021.0
201A21A112915.027.024.0
1805018112932.030.039.0
...............
MVP-COVID-19 1810.00.01.0
203921910.00.00.0
180798210.00.00.0
043az1n10.00.00.0
213s21a10.00.00.0
\n", + "

1797 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " ADRs DEATHS DISABILITIES LIFE THREATENING ILLNESSES\n", + "VAX_LOT \n", + "043A21A 1692 28.0 29.0 37.0\n", + "042A21A 1461 43.0 33.0 35.0\n", + "202A21A 1159 22.0 16.0 21.0\n", + "201A21A 1129 15.0 27.0 24.0\n", + "1805018 1129 32.0 30.0 39.0\n", + "... ... ... ... ...\n", + "MVP-COVID-19 18 1 0.0 0.0 1.0\n", + "2039219 1 0.0 0.0 0.0\n", + "1807982 1 0.0 0.0 0.0\n", + "043az1n 1 0.0 0.0 0.0\n", + "213s21a 1 0.0 0.0 0.0\n", + "\n", + "[1797 rows x 4 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "saveBatchCodeTable(\"MODERNA\", \"results/moderna.xlsx\")\n", "saveBatchCodeTable(\"PFIZER\\BIONTECH\", \"results/pfizer.xlsx\")\n", @@ -538,7 +1011,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 34, "id": "bc56831d", "metadata": {}, "outputs": [], @@ -554,10 +1027,225 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 35, "id": "ace3fed9", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'severeEffectsBatchCodeTable'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ADRsDEATHSDISABILITIESLIFE THREATENING ILLNESSESHOSPITALISATIONSEMERGENCY ROOM OR DOCTOR VISITSCOMPANY
VAX_LOT
026L20A333525.024.021.0125.00.0MODERNA
039K20A333567.022.029.0134.00.0MODERNA
011J20A307834.027.026.0106.00.0MODERNA
025L20A239742.016.026.083.00.0MODERNA
012L20A235757.029.022.0135.01.0MODERNA
........................
EN6207-10.00.00.00.00.0PFIZER\\BIONTECH
039820A10.00.00.00.00.0MODERNA
Blue-21810.00.00.00.00.0MODERNA
180897310.00.00.00.00.0UNKNOWN MANUFACTURER
FJ116110.00.00.00.00.0PFIZER\\BIONTECH
\n", + "

18428 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " ADRs DEATHS DISABILITIES LIFE THREATENING ILLNESSES \\\n", + "VAX_LOT \n", + "026L20A 3335 25.0 24.0 21.0 \n", + "039K20A 3335 67.0 22.0 29.0 \n", + "011J20A 3078 34.0 27.0 26.0 \n", + "025L20A 2397 42.0 16.0 26.0 \n", + "012L20A 2357 57.0 29.0 22.0 \n", + "... ... ... ... ... \n", + "EN6207- 1 0.0 0.0 0.0 \n", + "039820A 1 0.0 0.0 0.0 \n", + "Blue-218 1 0.0 0.0 0.0 \n", + "1808973 1 0.0 0.0 0.0 \n", + "FJ1161 1 0.0 0.0 0.0 \n", + "\n", + " HOSPITALISATIONS EMERGENCY ROOM OR DOCTOR VISITS \\\n", + "VAX_LOT \n", + "026L20A 125.0 0.0 \n", + "039K20A 134.0 0.0 \n", + "011J20A 106.0 0.0 \n", + "025L20A 83.0 0.0 \n", + "012L20A 135.0 1.0 \n", + "... ... ... \n", + "EN6207- 0.0 0.0 \n", + "039820A 0.0 0.0 \n", + "Blue-218 0.0 0.0 \n", + "1808973 0.0 0.0 \n", + "FJ1161 0.0 0.0 \n", + "\n", + " COMPANY \n", + "VAX_LOT \n", + "026L20A MODERNA \n", + "039K20A MODERNA \n", + "011J20A MODERNA \n", + "025L20A MODERNA \n", + "012L20A MODERNA \n", + "... ... \n", + "EN6207- PFIZER\\BIONTECH \n", + "039820A MODERNA \n", + "Blue-218 MODERNA \n", + "1808973 UNKNOWN MANUFACTURER \n", + "FJ1161 PFIZER\\BIONTECH \n", + "\n", + "[18428 rows x 7 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "saveSevereEffectsBatchCodeTable('results/severeEffects.xlsx')" ]