diff --git a/HowBadIsMyBatch.ipynb b/HowBadIsMyBatch.ipynb index 5cc0ef50871..55e7506ceb6 100644 --- a/HowBadIsMyBatch.ipynb +++ b/HowBadIsMyBatch.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "9de5907f-18f5-4cb1-903e-26028ff1fa03", "metadata": {}, "outputs": [], @@ -16,7 +16,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "a271254b", "metadata": {}, "outputs": [], @@ -60,7 +60,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "7b5d6df0", "metadata": {}, "outputs": [], @@ -87,7 +87,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "3ebcba86", "metadata": {}, "outputs": [], @@ -96,21 +96,23 @@ "\n", "class DataFrameFilter:\n", " \n", - " @staticmethod\n", - " def filterDataFrame(df, manufacturer = None, dose = None):\n", - " isCovid19 = df[\"VAX_TYPE\"] == \"COVID19\"\n", - " isManufacturer = df[\"VAX_MANU\"] == manufacturer if manufacturer is not None else True\n", - " isDose = df[\"VAX_DOSE_SERIES\"].str.contains(dose) if dose is not None else True\n", - " return df[isCovid19 & isManufacturer & isDose]\n", + " def __init__(self, dataFrame):\n", + " self.dataFrame = dataFrame \n", "\n", - " @staticmethod\n", - " def filterDataFrameForSevereEffects(df, dose):\n", - " return DataFrameFilter.filterDataFrame(df, dose = dose)\n" + " def filterBy(self, manufacturer = None, dose = None):\n", + " # FK-TODO: extract private instance methods\n", + " isCovid19 = self.dataFrame[\"VAX_TYPE\"] == \"COVID19\"\n", + " isManufacturer = self.dataFrame[\"VAX_MANU\"] == manufacturer if manufacturer is not None else True\n", + " isDose = self.dataFrame[\"VAX_DOSE_SERIES\"].str.contains(dose) if dose is not None else True\n", + " return self.dataFrame[isCovid19 & isManufacturer & isDose]\n", + "\n", + " def filterForSevereEffects(self, dose):\n", + " return self.filterBy(dose = dose)\n" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "99945ca8", "metadata": {}, "outputs": [], @@ -164,7 +166,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "3dacedfd", "metadata": {}, "outputs": [], @@ -174,7 +176,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "e59a1825", "metadata": {}, "outputs": [], @@ -224,7 +226,7 @@ " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n", " \n", " # When\n", - " dataFrame = DataFrameFilter.filterDataFrame(dataFrame, manufacturer = \"MODERNA\", dose = '1')\n", + " dataFrame = DataFrameFilter(dataFrame).filterBy(manufacturer = \"MODERNA\", dose = '1')\n", " \n", " # Then\n", " dataFrameExpected = self.createDataFrame(\n", @@ -263,7 +265,7 @@ " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n", " \n", " # When\n", - " dataFrame = DataFrameFilter.filterDataFrameForSevereEffects(dataFrame, dose = '1')\n", + " dataFrame = DataFrameFilter(dataFrame).filterForSevereEffects(dose = '1')\n", " \n", " # Then\n", " dataFrameExpected = self.createDataFrame(\n", @@ -298,7 +300,7 @@ " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n", " \n", " # When\n", - " dataFrame = DataFrameFilter.filterDataFrame(dataFrame, manufacturer = \"MODERNA\", dose = '1')\n", + " dataFrame = DataFrameFilter(dataFrame).filterBy(manufacturer = \"MODERNA\", dose = '1')\n", " \n", " # Then\n", " dataFrameExpected = self.createDataFrame(\n", @@ -331,7 +333,7 @@ " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n", " \n", " # When\n", - " dataFrame = DataFrameFilter.filterDataFrame(dataFrame, manufacturer = \"MODERNA\", dose = '2')\n", + " dataFrame = DataFrameFilter(dataFrame).filterBy(manufacturer = \"MODERNA\", dose = '2')\n", " \n", " # Then\n", " dataFrameExpected = self.createDataFrame(\n", @@ -348,7 +350,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "e14465d7", "metadata": {}, "outputs": [], @@ -395,14 +397,14 @@ " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", " }\n", " ])\n", - " dataFrame = DataFrameFilter.filterDataFrame(dataFrame, manufacturer = \"MODERNA\", dose = '1')\n", + " dataFrame = DataFrameFilter(dataFrame).filterBy(manufacturer = \"MODERNA\", dose = '1')\n", " self._test_createBatchCodeTable(dataFrame);\n", "\n", " def test_createBatchCodeTable(self):\n", + " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(\n", + " VaersDescrReader(\"test/VAERS\").readAllVaersDescrs())\n", " self._test_createBatchCodeTable(\n", - " DataFrameFilter.filterDataFrame(\n", - " VaersDescr2DataFrameConverter.createDataFrameFromDescrs(\n", - " VaersDescrReader(\"test/VAERS\").readAllVaersDescrs()),\n", + " DataFrameFilter(dataFrame).filterBy(\n", " manufacturer = \"MODERNA\",\n", " dose = '1'))\n", "\n", @@ -427,7 +429,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "ded70c87", "metadata": {}, "outputs": [], @@ -459,7 +461,7 @@ " }\n", " ]\n", " )\n", - " dataFrame = DataFrameFilter.filterDataFrameForSevereEffects(dataFrame, dose = '1')\n", + " dataFrame = DataFrameFilter(dataFrame).filterForSevereEffects(dose = '1')\n", "\n", " # When\n", " batchCodeTable = createSevereEffectsBatchCodeTable(dataFrame)\n", @@ -484,509 +486,36 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "5a8bff1b", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "test_createBatchCodeTable (__main__.BatchCodeTableTest) ... ok\n", - "test_createBatchCodeTable2 (__main__.BatchCodeTableTest) ... ok\n", - "test_createAndFilterDataFrameFromDescrs (__main__.CreateAndFilterDataFrameTest) ... ok\n", - "test_createAndFilterDataFrameFromDescrsWithFirstDose (__main__.CreateAndFilterDataFrameTest) ... ok\n", - "test_createAndFilterDataFrameFromDescrsWithSecondDose (__main__.CreateAndFilterDataFrameTest) ... ok\n", - "test_createDataFrameFromForSevereEffects (__main__.CreateAndFilterDataFrameTest) ... ok\n", - "test_createSevereEffectsBatchCodeTable (__main__.SevereEffectsBatchCodeTableTest) ... ok\n", - "\n", - "----------------------------------------------------------------------\n", - "Ran 7 tests in 0.156s\n", - "\n", - "OK\n" - ] - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "unittest.main(argv = [''], verbosity = 2, exit = False)" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "86e0e4f2", "metadata": {}, "outputs": [], "source": [ "def saveBatchCodeTable(manufacturer, excelFile):\n", - " batchCodeTable = createBatchCodeTable(\n", - " DataFrameFilter.filterDataFrame(\n", - " VaersDescr2DataFrameConverter.createDataFrameFromDescrs(\n", - " VaersDescrReader(\"VAERS\").readAllVaersDescrs()),\n", - " manufacturer = manufacturer,\n", - " dose = '1'))\n", + " vaersDescrs = VaersDescrReader(\"VAERS\").readAllVaersDescrs()\n", + " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n", + " dataFrameFilter = DataFrameFilter(dataFrame)\n", + " batchCodeTable = createBatchCodeTable(dataFrameFilter.filterBy(manufacturer = manufacturer, dose = '1'))\n", " display(manufacturer, batchCodeTable)\n", " batchCodeTable.to_excel(excelFile)" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "ab170c16", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'MODERNA'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ADRsDEATHSDISABILITIESLIFE THREATENING ILLNESSES
VAX_LOT
039K20A333067.022.029.0
026L20A332725.024.021.0
011J20A307233.027.026.0
025L20A239142.016.026.0
012L20A235257.028.022.0
...............
028-20A10.00.00.0
028(L or Z?)20A10.00.00.0
028 L20A10.00.00.0
028 L 20A10.00.00.0
xx3A21A10.00.00.0
\n", - "

10248 rows × 4 columns

\n", - "
" - ], - "text/plain": [ - " ADRs DEATHS DISABILITIES LIFE THREATENING ILLNESSES\n", - "VAX_LOT \n", - "039K20A 3330 67.0 22.0 29.0\n", - "026L20A 3327 25.0 24.0 21.0\n", - "011J20A 3072 33.0 27.0 26.0\n", - "025L20A 2391 42.0 16.0 26.0\n", - "012L20A 2352 57.0 28.0 22.0\n", - "... ... ... ... ...\n", - "028-20A 1 0.0 0.0 0.0\n", - "028(L or Z?)20A 1 0.0 0.0 0.0\n", - "028 L20A 1 0.0 0.0 0.0\n", - "028 L 20A 1 0.0 0.0 0.0\n", - "xx3A21A 1 0.0 0.0 0.0\n", - "\n", - "[10248 rows x 4 columns]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'PFIZER\\\\BIONTECH'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ADRsDEATHSDISABILITIESLIFE THREATENING ILLNESSES
VAX_LOT
EK5730174118.025.016.0
EK9231163137.019.021.0
EH9899157014.036.018.0
EN6205148725.037.031.0
EN6208147533.027.025.0
...............
EN626610.00.00.0
EN62620710.00.00.0
EN626210.00.00.0
EN625110.00.00.0
zw015110.01.00.0
\n", - "

6765 rows × 4 columns

\n", - "
" - ], - "text/plain": [ - " ADRs DEATHS DISABILITIES LIFE THREATENING ILLNESSES\n", - "VAX_LOT \n", - "EK5730 1741 18.0 25.0 16.0\n", - "EK9231 1631 37.0 19.0 21.0\n", - "EH9899 1570 14.0 36.0 18.0\n", - "EN6205 1487 25.0 37.0 31.0\n", - "EN6208 1475 33.0 27.0 25.0\n", - "... ... ... ... ...\n", - "EN6266 1 0.0 0.0 0.0\n", - "EN626207 1 0.0 0.0 0.0\n", - "EN6262 1 0.0 0.0 0.0\n", - "EN6251 1 0.0 0.0 0.0\n", - "zw0151 1 0.0 1.0 0.0\n", - "\n", - "[6765 rows x 4 columns]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'JANSSEN'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ADRsDEATHSDISABILITIESLIFE THREATENING ILLNESSES
VAX_LOT
043A21A169228.029.037.0
042A21A146143.033.035.0
202A21A115922.016.021.0
1805018112932.030.039.0
201A21A112915.027.024.0
...............
180899810.00.00.0
180899610.00.00.0
180899210.00.00.0
180898le10.00.00.0
z03az1a10.00.00.0
\n", - "

1797 rows × 4 columns

\n", - "
" - ], - "text/plain": [ - " ADRs DEATHS DISABILITIES LIFE THREATENING ILLNESSES\n", - "VAX_LOT \n", - "043A21A 1692 28.0 29.0 37.0\n", - "042A21A 1461 43.0 33.0 35.0\n", - "202A21A 1159 22.0 16.0 21.0\n", - "1805018 1129 32.0 30.0 39.0\n", - "201A21A 1129 15.0 27.0 24.0\n", - "... ... ... ... ...\n", - "1808998 1 0.0 0.0 0.0\n", - "1808996 1 0.0 0.0 0.0\n", - "1808992 1 0.0 0.0 0.0\n", - "180898le 1 0.0 0.0 0.0\n", - "z03az1a 1 0.0 0.0 0.0\n", - "\n", - "[1797 rows x 4 columns]" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "saveBatchCodeTable(\"MODERNA\", \"results/moderna.xlsx\")\n", "saveBatchCodeTable(\"PFIZER\\BIONTECH\", \"results/pfizer.xlsx\")\n", @@ -995,242 +524,26 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "bc56831d", "metadata": {}, "outputs": [], "source": [ "def saveSevereEffectsBatchCodeTable(excelFile):\n", - " severeEffectsBatchCodeTable = createSevereEffectsBatchCodeTable(\n", - " DataFrameFilter.filterDataFrameForSevereEffects(\n", - " VaersDescr2DataFrameConverter.createDataFrameFromDescrs(\n", - " VaersDescrReader(\"VAERS\").readAllVaersDescrs()),\n", - " dose = '1'))\n", + " vaersDescrs = VaersDescrReader(\"VAERS\").readAllVaersDescrs()\n", + " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n", + " dataFrameFilter = DataFrameFilter(dataFrame)\n", + " severeEffectsBatchCodeTable = createSevereEffectsBatchCodeTable(dataFrameFilter.filterForSevereEffects(dose = '1'))\n", " display('severeEffectsBatchCodeTable', severeEffectsBatchCodeTable)\n", " severeEffectsBatchCodeTable.to_excel(excelFile)" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "ace3fed9", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'severeEffectsBatchCodeTable'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ADRsDEATHSDISABILITIESLIFE THREATENING ILLNESSESHOSPITALISATIONSEMERGENCY ROOM OR DOCTOR VISITSCOMPANY
VAX_LOT
026L20A333525.024.021.0125.00.0MODERNA
039K20A333567.022.029.0134.00.0MODERNA
011J20A307834.027.026.0106.00.0MODERNA
025L20A239742.016.026.083.00.0MODERNA
012L20A235757.029.022.0135.01.0MODERNA
........................
EN6207-10.00.00.00.00.0PFIZER\\BIONTECH
039820A10.00.00.00.00.0MODERNA
Blue-21810.00.00.00.00.0MODERNA
180897310.00.00.00.00.0UNKNOWN MANUFACTURER
FJ116110.00.00.00.00.0PFIZER\\BIONTECH
\n", - "

18428 rows × 7 columns

\n", - "
" - ], - "text/plain": [ - " ADRs DEATHS DISABILITIES LIFE THREATENING ILLNESSES \\\n", - "VAX_LOT \n", - "026L20A 3335 25.0 24.0 21.0 \n", - "039K20A 3335 67.0 22.0 29.0 \n", - "011J20A 3078 34.0 27.0 26.0 \n", - "025L20A 2397 42.0 16.0 26.0 \n", - "012L20A 2357 57.0 29.0 22.0 \n", - "... ... ... ... ... \n", - "EN6207- 1 0.0 0.0 0.0 \n", - "039820A 1 0.0 0.0 0.0 \n", - "Blue-218 1 0.0 0.0 0.0 \n", - "1808973 1 0.0 0.0 0.0 \n", - "FJ1161 1 0.0 0.0 0.0 \n", - "\n", - " HOSPITALISATIONS EMERGENCY ROOM OR DOCTOR VISITS \\\n", - "VAX_LOT \n", - "026L20A 125.0 0.0 \n", - "039K20A 134.0 0.0 \n", - "011J20A 106.0 0.0 \n", - "025L20A 83.0 0.0 \n", - "012L20A 135.0 1.0 \n", - "... ... ... \n", - "EN6207- 0.0 0.0 \n", - "039820A 0.0 0.0 \n", - "Blue-218 0.0 0.0 \n", - "1808973 0.0 0.0 \n", - "FJ1161 0.0 0.0 \n", - "\n", - " COMPANY \n", - "VAX_LOT \n", - "026L20A MODERNA \n", - "039K20A MODERNA \n", - "011J20A MODERNA \n", - "025L20A MODERNA \n", - "012L20A MODERNA \n", - "... ... \n", - "EN6207- PFIZER\\BIONTECH \n", - "039820A MODERNA \n", - "Blue-218 MODERNA \n", - "1808973 UNKNOWN MANUFACTURER \n", - "FJ1161 PFIZER\\BIONTECH \n", - "\n", - "[18428 rows x 7 columns]" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "saveSevereEffectsBatchCodeTable('results/severeEffects.xlsx')" ]