diff --git a/HowBadIsMyBatch.ipynb b/HowBadIsMyBatch.ipynb index fcc48c6c3f4..5cc0ef50871 100644 --- a/HowBadIsMyBatch.ipynb +++ b/HowBadIsMyBatch.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "9de5907f-18f5-4cb1-903e-26028ff1fa03", "metadata": {}, "outputs": [], @@ -16,7 +16,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "a271254b", "metadata": {}, "outputs": [], @@ -60,7 +60,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "7b5d6df0", "metadata": {}, "outputs": [], @@ -87,24 +87,30 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "3ebcba86", "metadata": {}, "outputs": [], "source": [ - "def filterDataFrame(df, manufacturer = None, dose = None):\n", - " isCovid19 = df[\"VAX_TYPE\"] == \"COVID19\"\n", - " isManufacturer = df[\"VAX_MANU\"] == manufacturer if manufacturer is not None else True\n", - " isDose = df[\"VAX_DOSE_SERIES\"].str.contains(dose) if dose is not None else True\n", - " return df[isCovid19 & isManufacturer & isDose]\n", + "import pandas as pd\n", "\n", - "def filterDataFrameForSevereEffects(df, dose):\n", - " return filterDataFrame(df, dose = dose)\n" + "class DataFrameFilter:\n", + " \n", + " @staticmethod\n", + " def filterDataFrame(df, manufacturer = None, dose = None):\n", + " isCovid19 = df[\"VAX_TYPE\"] == \"COVID19\"\n", + " isManufacturer = df[\"VAX_MANU\"] == manufacturer if manufacturer is not None else True\n", + " isDose = df[\"VAX_DOSE_SERIES\"].str.contains(dose) if dose is not None else True\n", + " return df[isCovid19 & isManufacturer & isDose]\n", + "\n", + " @staticmethod\n", + " def filterDataFrameForSevereEffects(df, dose):\n", + " return DataFrameFilter.filterDataFrame(df, dose = dose)\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "99945ca8", "metadata": {}, "outputs": [], @@ -158,7 +164,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "3dacedfd", "metadata": {}, "outputs": [], @@ -168,7 +174,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "e59a1825", "metadata": {}, "outputs": [], @@ -218,7 +224,7 @@ " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n", " \n", " # When\n", - " dataFrame = filterDataFrame(dataFrame, manufacturer = \"MODERNA\", dose = '1')\n", + " dataFrame = DataFrameFilter.filterDataFrame(dataFrame, manufacturer = \"MODERNA\", dose = '1')\n", " \n", " # Then\n", " dataFrameExpected = self.createDataFrame(\n", @@ -257,7 +263,7 @@ " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n", " \n", " # When\n", - " dataFrame = filterDataFrameForSevereEffects(dataFrame, dose = '1')\n", + " dataFrame = DataFrameFilter.filterDataFrameForSevereEffects(dataFrame, dose = '1')\n", " \n", " # Then\n", " dataFrameExpected = self.createDataFrame(\n", @@ -292,7 +298,7 @@ " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n", " \n", " # When\n", - " dataFrame = filterDataFrame(dataFrame, manufacturer = \"MODERNA\", dose = '1')\n", + " dataFrame = DataFrameFilter.filterDataFrame(dataFrame, manufacturer = \"MODERNA\", dose = '1')\n", " \n", " # Then\n", " dataFrameExpected = self.createDataFrame(\n", @@ -325,7 +331,7 @@ " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n", " \n", " # When\n", - " dataFrame = filterDataFrame(dataFrame, manufacturer = \"MODERNA\", dose = '2')\n", + " dataFrame = DataFrameFilter.filterDataFrame(dataFrame, manufacturer = \"MODERNA\", dose = '2')\n", " \n", " # Then\n", " dataFrameExpected = self.createDataFrame(\n", @@ -342,7 +348,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "e14465d7", "metadata": {}, "outputs": [], @@ -389,12 +395,12 @@ " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", " }\n", " ])\n", - " dataFrame = filterDataFrame(dataFrame, manufacturer = \"MODERNA\", dose = '1')\n", + " dataFrame = DataFrameFilter.filterDataFrame(dataFrame, manufacturer = \"MODERNA\", dose = '1')\n", " self._test_createBatchCodeTable(dataFrame);\n", "\n", " def test_createBatchCodeTable(self):\n", " self._test_createBatchCodeTable(\n", - " filterDataFrame(\n", + " DataFrameFilter.filterDataFrame(\n", " VaersDescr2DataFrameConverter.createDataFrameFromDescrs(\n", " VaersDescrReader(\"test/VAERS\").readAllVaersDescrs()),\n", " manufacturer = \"MODERNA\",\n", @@ -421,7 +427,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "ded70c87", "metadata": {}, "outputs": [], @@ -453,7 +459,7 @@ " }\n", " ]\n", " )\n", - " dataFrame = filterDataFrameForSevereEffects(dataFrame, dose = '1')\n", + " dataFrame = DataFrameFilter.filterDataFrameForSevereEffects(dataFrame, dose = '1')\n", "\n", " # When\n", " batchCodeTable = createSevereEffectsBatchCodeTable(dataFrame)\n", @@ -478,24 +484,53 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "5a8bff1b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "test_createBatchCodeTable (__main__.BatchCodeTableTest) ... ok\n", + "test_createBatchCodeTable2 (__main__.BatchCodeTableTest) ... ok\n", + "test_createAndFilterDataFrameFromDescrs (__main__.CreateAndFilterDataFrameTest) ... ok\n", + "test_createAndFilterDataFrameFromDescrsWithFirstDose (__main__.CreateAndFilterDataFrameTest) ... ok\n", + "test_createAndFilterDataFrameFromDescrsWithSecondDose (__main__.CreateAndFilterDataFrameTest) ... ok\n", + "test_createDataFrameFromForSevereEffects (__main__.CreateAndFilterDataFrameTest) ... ok\n", + "test_createSevereEffectsBatchCodeTable (__main__.SevereEffectsBatchCodeTableTest) ... ok\n", + "\n", + "----------------------------------------------------------------------\n", + "Ran 7 tests in 0.156s\n", + "\n", + "OK\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "unittest.main(argv = [''], verbosity = 2, exit = False)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "86e0e4f2", "metadata": {}, "outputs": [], "source": [ "def saveBatchCodeTable(manufacturer, excelFile):\n", " batchCodeTable = createBatchCodeTable(\n", - " filterDataFrame(\n", + " DataFrameFilter.filterDataFrame(\n", " VaersDescr2DataFrameConverter.createDataFrameFromDescrs(\n", " VaersDescrReader(\"VAERS\").readAllVaersDescrs()),\n", " manufacturer = manufacturer,\n", @@ -506,10 +541,452 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "ab170c16", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'MODERNA'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ADRsDEATHSDISABILITIESLIFE THREATENING ILLNESSES
VAX_LOT
039K20A333067.022.029.0
026L20A332725.024.021.0
011J20A307233.027.026.0
025L20A239142.016.026.0
012L20A235257.028.022.0
...............
028-20A10.00.00.0
028(L or Z?)20A10.00.00.0
028 L20A10.00.00.0
028 L 20A10.00.00.0
xx3A21A10.00.00.0
\n", + "

10248 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " ADRs DEATHS DISABILITIES LIFE THREATENING ILLNESSES\n", + "VAX_LOT \n", + "039K20A 3330 67.0 22.0 29.0\n", + "026L20A 3327 25.0 24.0 21.0\n", + "011J20A 3072 33.0 27.0 26.0\n", + "025L20A 2391 42.0 16.0 26.0\n", + "012L20A 2352 57.0 28.0 22.0\n", + "... ... ... ... ...\n", + "028-20A 1 0.0 0.0 0.0\n", + "028(L or Z?)20A 1 0.0 0.0 0.0\n", + "028 L20A 1 0.0 0.0 0.0\n", + "028 L 20A 1 0.0 0.0 0.0\n", + "xx3A21A 1 0.0 0.0 0.0\n", + "\n", + "[10248 rows x 4 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'PFIZER\\\\BIONTECH'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ADRsDEATHSDISABILITIESLIFE THREATENING ILLNESSES
VAX_LOT
EK5730174118.025.016.0
EK9231163137.019.021.0
EH9899157014.036.018.0
EN6205148725.037.031.0
EN6208147533.027.025.0
...............
EN626610.00.00.0
EN62620710.00.00.0
EN626210.00.00.0
EN625110.00.00.0
zw015110.01.00.0
\n", + "

6765 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " ADRs DEATHS DISABILITIES LIFE THREATENING ILLNESSES\n", + "VAX_LOT \n", + "EK5730 1741 18.0 25.0 16.0\n", + "EK9231 1631 37.0 19.0 21.0\n", + "EH9899 1570 14.0 36.0 18.0\n", + "EN6205 1487 25.0 37.0 31.0\n", + "EN6208 1475 33.0 27.0 25.0\n", + "... ... ... ... ...\n", + "EN6266 1 0.0 0.0 0.0\n", + "EN626207 1 0.0 0.0 0.0\n", + "EN6262 1 0.0 0.0 0.0\n", + "EN6251 1 0.0 0.0 0.0\n", + "zw0151 1 0.0 1.0 0.0\n", + "\n", + "[6765 rows x 4 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'JANSSEN'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ADRsDEATHSDISABILITIESLIFE THREATENING ILLNESSES
VAX_LOT
043A21A169228.029.037.0
042A21A146143.033.035.0
202A21A115922.016.021.0
1805018112932.030.039.0
201A21A112915.027.024.0
...............
180899810.00.00.0
180899610.00.00.0
180899210.00.00.0
180898le10.00.00.0
z03az1a10.00.00.0
\n", + "

1797 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " ADRs DEATHS DISABILITIES LIFE THREATENING ILLNESSES\n", + "VAX_LOT \n", + "043A21A 1692 28.0 29.0 37.0\n", + "042A21A 1461 43.0 33.0 35.0\n", + "202A21A 1159 22.0 16.0 21.0\n", + "1805018 1129 32.0 30.0 39.0\n", + "201A21A 1129 15.0 27.0 24.0\n", + "... ... ... ... ...\n", + "1808998 1 0.0 0.0 0.0\n", + "1808996 1 0.0 0.0 0.0\n", + "1808992 1 0.0 0.0 0.0\n", + "180898le 1 0.0 0.0 0.0\n", + "z03az1a 1 0.0 0.0 0.0\n", + "\n", + "[1797 rows x 4 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "saveBatchCodeTable(\"MODERNA\", \"results/moderna.xlsx\")\n", "saveBatchCodeTable(\"PFIZER\\BIONTECH\", \"results/pfizer.xlsx\")\n", @@ -518,14 +995,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "bc56831d", "metadata": {}, "outputs": [], "source": [ "def saveSevereEffectsBatchCodeTable(excelFile):\n", " severeEffectsBatchCodeTable = createSevereEffectsBatchCodeTable(\n", - " filterDataFrameForSevereEffects(\n", + " DataFrameFilter.filterDataFrameForSevereEffects(\n", " VaersDescr2DataFrameConverter.createDataFrameFromDescrs(\n", " VaersDescrReader(\"VAERS\").readAllVaersDescrs()),\n", " dose = '1'))\n", @@ -535,10 +1012,225 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "ace3fed9", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'severeEffectsBatchCodeTable'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ADRsDEATHSDISABILITIESLIFE THREATENING ILLNESSESHOSPITALISATIONSEMERGENCY ROOM OR DOCTOR VISITSCOMPANY
VAX_LOT
026L20A333525.024.021.0125.00.0MODERNA
039K20A333567.022.029.0134.00.0MODERNA
011J20A307834.027.026.0106.00.0MODERNA
025L20A239742.016.026.083.00.0MODERNA
012L20A235757.029.022.0135.01.0MODERNA
........................
EN6207-10.00.00.00.00.0PFIZER\\BIONTECH
039820A10.00.00.00.00.0MODERNA
Blue-21810.00.00.00.00.0MODERNA
180897310.00.00.00.00.0UNKNOWN MANUFACTURER
FJ116110.00.00.00.00.0PFIZER\\BIONTECH
\n", + "

18428 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " ADRs DEATHS DISABILITIES LIFE THREATENING ILLNESSES \\\n", + "VAX_LOT \n", + "026L20A 3335 25.0 24.0 21.0 \n", + "039K20A 3335 67.0 22.0 29.0 \n", + "011J20A 3078 34.0 27.0 26.0 \n", + "025L20A 2397 42.0 16.0 26.0 \n", + "012L20A 2357 57.0 29.0 22.0 \n", + "... ... ... ... ... \n", + "EN6207- 1 0.0 0.0 0.0 \n", + "039820A 1 0.0 0.0 0.0 \n", + "Blue-218 1 0.0 0.0 0.0 \n", + "1808973 1 0.0 0.0 0.0 \n", + "FJ1161 1 0.0 0.0 0.0 \n", + "\n", + " HOSPITALISATIONS EMERGENCY ROOM OR DOCTOR VISITS \\\n", + "VAX_LOT \n", + "026L20A 125.0 0.0 \n", + "039K20A 134.0 0.0 \n", + "011J20A 106.0 0.0 \n", + "025L20A 83.0 0.0 \n", + "012L20A 135.0 1.0 \n", + "... ... ... \n", + "EN6207- 0.0 0.0 \n", + "039820A 0.0 0.0 \n", + "Blue-218 0.0 0.0 \n", + "1808973 0.0 0.0 \n", + "FJ1161 0.0 0.0 \n", + "\n", + " COMPANY \n", + "VAX_LOT \n", + "026L20A MODERNA \n", + "039K20A MODERNA \n", + "011J20A MODERNA \n", + "025L20A MODERNA \n", + "012L20A MODERNA \n", + "... ... \n", + "EN6207- PFIZER\\BIONTECH \n", + "039820A MODERNA \n", + "Blue-218 MODERNA \n", + "1808973 UNKNOWN MANUFACTURER \n", + "FJ1161 PFIZER\\BIONTECH \n", + "\n", + "[18428 rows x 7 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "saveSevereEffectsBatchCodeTable('results/severeEffects.xlsx')" ]