diff --git a/HowBadIsMyBatch.ipynb b/HowBadIsMyBatch.ipynb index 44a905ae679..1d16515fe9e 100644 --- a/HowBadIsMyBatch.ipynb +++ b/HowBadIsMyBatch.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "9de5907f-18f5-4cb1-903e-26028ff1fa03", "metadata": {}, "outputs": [], @@ -16,7 +16,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "233bc590", "metadata": {}, "outputs": [], @@ -39,12 +39,30 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, + "id": "dea776cd", + "metadata": {}, + "outputs": [], + "source": [ + "def createDataFrame2(vaersDescrs, manufacturer):\n", + " def vaersDescr2Vaers(vaersDescr):\n", + " return pd.merge(vaersDescr['VAERSDATA'], vaersDescr['VAERSVAX'], left_index = True, right_index = True)\n", + "\n", + " def vaersDescrs2Vaers():\n", + " return map(vaersDescr2Vaers, vaersDescrs);\n", + "\n", + " df = pd.concat(vaersDescrs2Vaers())\n", + " return df[(df[\"VAX_TYPE\"] == \"COVID19\") & (df[\"VAX_MANU\"] == manufacturer)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, "id": "99945ca8", "metadata": {}, "outputs": [], "source": [ - "def createBatchCodeTable(df):\n", + "def createBatchCodeTable(df : pd.DataFrame):\n", " def filter(df, col):\n", " return df[df[col] == 'Y'][['VAX_LOT']]\n", "\n", @@ -59,7 +77,94 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, + "id": "3dacedfd", + "metadata": {}, + "outputs": [], + "source": [ + "import unittest" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e14465d7", + "metadata": {}, + "outputs": [], + "source": [ + "from pandas.testing import assert_frame_equal\n", + "\n", + "class BatchCodeTableTest(unittest.TestCase):\n", + "\n", + " def test_createBatchCodeTable2(self):\n", + " # Given\n", + " vaersData2021 = pd.DataFrame(columns = ['DIED', 'L_THREAT', 'DISABLE'], index = ['0916600', '0916601'])\n", + " vaersData2021.loc['0916600'] = pd.Series({'DIED': 'Y', 'L_THREAT': np.NaN, 'DISABLE': np.NaN})\n", + " vaersData2021.loc['0916601'] = pd.Series({'DIED': np.NaN, 'L_THREAT': np.NaN, 'DISABLE': 'Y'})\n", + "\n", + " vaersVax2021 = pd.DataFrame(columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT'], index = ['0916600', '0916601'])\n", + " vaersVax2021.loc['0916600'] = pd.Series({'VAX_TYPE': 'COVID19', 'VAX_MANU': 'MODERNA', 'VAX_LOT': '037K20A'})\n", + " vaersVax2021.loc['0916601'] = pd.Series({'VAX_TYPE': 'COVID19', 'VAX_MANU': 'MODERNA', 'VAX_LOT': '025L20A'})\n", + "\n", + " vaersData2022 = pd.DataFrame(columns = ['DIED', 'L_THREAT', 'DISABLE'], index = ['1996873', '1996874'])\n", + " vaersData2022.loc['1996873'] = pd.Series({'DIED': np.NaN, 'L_THREAT': np.NaN, 'DISABLE': np.NaN})\n", + " vaersData2022.loc['1996874'] = pd.Series({'DIED': np.NaN, 'L_THREAT': np.NaN, 'DISABLE': 'Y'})\n", + "\n", + " vaersVax2022 = pd.DataFrame(columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT'], index = ['1996873', '1996874'])\n", + " vaersVax2022.loc['1996873'] = pd.Series({'VAX_TYPE': 'HPV9', 'VAX_MANU': 'MERCK & CO. INC.', 'VAX_LOT': 'R017624'})\n", + " vaersVax2022.loc['1996874'] = pd.Series({'VAX_TYPE': 'COVID19', 'VAX_MANU': 'MODERNA', 'VAX_LOT': '025L20A'})\n", + " \n", + " dataFrame = createDataFrame2(\n", + " [\n", + " {'VAERSDATA': vaersData2021, 'VAERSVAX': vaersVax2021},\n", + " {'VAERSDATA': vaersData2022, 'VAERSVAX': vaersVax2022}\n", + " ],\n", + " \"MODERNA\")\n", + " display(\"dataFrame:\", dataFrame)\n", + "\n", + " # When\n", + " self._test_createBatchCodeTable(dataFrame);\n", + " \n", + " def test_createBatchCodeTable(self):\n", + " # Given\n", + " dataFrame = createDataFrame(\"test/VAERS\", \"MODERNA\")\n", + " display(\"dataFrame:\", dataFrame)\n", + " self._test_createBatchCodeTable(dataFrame);\n", + "\n", + "\n", + " def _test_createBatchCodeTable(self, dataFrame):\n", + " # When\n", + " batchCodeTable = createBatchCodeTable(dataFrame)\n", + " display(\"batchCodeTable:\", batchCodeTable)\n", + "\n", + " # Then\n", + " batchCodeTableExpected = pd.DataFrame(\n", + " data = {\n", + " 'ADRs': [2, 1],\n", + " 'DEATHS': [0, 1],\n", + " 'DISABILITIES': [2, 0],\n", + " 'LIFE THREATENING ILLNESSES': [0, 0]\n", + " },\n", + " index = pd.MultiIndex.from_arrays([['025L20A', '037K20A']], names = ('VAX_LOT',)))\n", + " display(\"batchCodeTableExpected:\", batchCodeTableExpected)\n", + " assert_frame_equal(batchCodeTable, batchCodeTableExpected, check_dtype = False)\n", + "\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a8bff1b", + "metadata": {}, + "outputs": [], + "source": [ + "unittest.main(argv = [''], verbosity = 2, exit = False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, "id": "86e0e4f2", "metadata": {}, "outputs": [], @@ -72,452 +177,10 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "ab170c16", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'MODERNA'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ADRsDEATHSDISABILITIESLIFE THREATENING ILLNESSES
VAX_LOT
026L20A417742.035.028.0
039K20A416993.039.037.0
011J20A366137.033.028.0
013L20A319168.047.032.0
012L20A306872.030.030.0
...............
029L2VA10.00.00.0
029L2oa10.00.00.0
029L30A10.00.00.0
029L420A10.00.00.0
Ø38B21A10.00.00.0
\n", - "

16284 rows × 4 columns

\n", - "
" - ], - "text/plain": [ - " ADRs DEATHS DISABILITIES LIFE THREATENING ILLNESSES\n", - "VAX_LOT \n", - "026L20A 4177 42.0 35.0 28.0\n", - "039K20A 4169 93.0 39.0 37.0\n", - "011J20A 3661 37.0 33.0 28.0\n", - "013L20A 3191 68.0 47.0 32.0\n", - "012L20A 3068 72.0 30.0 30.0\n", - "... ... ... ... ...\n", - "029L2VA 1 0.0 0.0 0.0\n", - "029L2oa 1 0.0 0.0 0.0\n", - "029L30A 1 0.0 0.0 0.0\n", - "029L420A 1 0.0 0.0 0.0\n", - "Ø38B21A 1 0.0 0.0 0.0\n", - "\n", - "[16284 rows x 4 columns]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'PFIZER\\\\BIONTECH'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ADRsDEATHSDISABILITIESLIFE THREATENING ILLNESSES
VAX_LOT
EK9231339248.056.035.0
ER2613334565.062.057.0
EN62012928148.069.055.0
EN53182811114.062.057.0
ER8732270550.051.068.0
...............
EN6203 UPC10.00.00.0
EN6203,10.00.00.0
EN6203, EL324710.00.00.0
EN6203, EN620410.00.00.0
ÉÑ619810.00.00.0
\n", - "

12447 rows × 4 columns

\n", - "
" - ], - "text/plain": [ - " ADRs DEATHS DISABILITIES LIFE THREATENING ILLNESSES\n", - "VAX_LOT \n", - "EK9231 3392 48.0 56.0 35.0\n", - "ER2613 3345 65.0 62.0 57.0\n", - "EN6201 2928 148.0 69.0 55.0\n", - "EN5318 2811 114.0 62.0 57.0\n", - "ER8732 2705 50.0 51.0 68.0\n", - "... ... ... ... ...\n", - "EN6203 UPC 1 0.0 0.0 0.0\n", - "EN6203, 1 0.0 0.0 0.0\n", - "EN6203, EL3247 1 0.0 0.0 0.0\n", - "EN6203, EN6204 1 0.0 0.0 0.0\n", - "ÉÑ6198 1 0.0 0.0 0.0\n", - "\n", - "[12447 rows x 4 columns]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'JANSSEN'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ADRsDEATHSDISABILITIESLIFE THREATENING ILLNESSES
VAX_LOT
Unknown7312264.019.031.0
043A21A234737.031.049.0
042A21A217044.041.043.0
1805018175635.036.049.0
202A21A172625.021.025.0
...............
180E01810.00.00.0
180D06810.00.00.0
180C06810.00.00.0
180B98210.00.00.0
zz10.00.00.0
\n", - "

2456 rows × 4 columns

\n", - "
" - ], - "text/plain": [ - " ADRs DEATHS DISABILITIES LIFE THREATENING ILLNESSES\n", - "VAX_LOT \n", - "Unknown 7312 264.0 19.0 31.0\n", - "043A21A 2347 37.0 31.0 49.0\n", - "042A21A 2170 44.0 41.0 43.0\n", - "1805018 1756 35.0 36.0 49.0\n", - "202A21A 1726 25.0 21.0 25.0\n", - "... ... ... ... ...\n", - "180E018 1 0.0 0.0 0.0\n", - "180D068 1 0.0 0.0 0.0\n", - "180C068 1 0.0 0.0 0.0\n", - "180B982 1 0.0 0.0 0.0\n", - "zz 1 0.0 0.0 0.0\n", - "\n", - "[2456 rows x 4 columns]" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "saveBatchCodeTable(\"MODERNA\", \"results/moderna.xlsx\")\n", "saveBatchCodeTable(\"PFIZER\\BIONTECH\", \"results/pfizer.xlsx\")\n", @@ -526,318 +189,10 @@ }, { "cell_type": "code", - "execution_count": 6, - "id": "9f506ac8", - "metadata": {}, - "outputs": [], - "source": [ - "import unittest" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "e14465d7", - "metadata": {}, - "outputs": [], - "source": [ - "from pandas.testing import assert_frame_equal\n", - "\n", - "\n", - "class HowBadIsMyBatchTest(unittest.TestCase):\n", - "\n", - " def test_createBatchCodeTable(self):\n", - " # Given\n", - " dataFrame = createDataFrame(\"test/VAERS\", \"MODERNA\")\n", - " display(\"dataFrame:\", dataFrame)\n", - "\n", - " # When\n", - " batchCodeTable = createBatchCodeTable(dataFrame)\n", - " display(\"batchCodeTable:\", batchCodeTable)\n", - "\n", - " # Then\n", - " batchCodeTableExpected = pd.DataFrame(\n", - " {\n", - " 'ADRs': [2, 1],\n", - " 'DEATHS': [0, 1],\n", - " 'DISABILITIES': [2, 0],\n", - " 'LIFE THREATENING ILLNESSES': [0, 0]\n", - " },\n", - " index = pd.MultiIndex.from_arrays([['025L20A', '037K20A']], names = ('VAX_LOT',)))\n", - " display(\"batchCodeTableExpected:\", batchCodeTableExpected)\n", - " assert_frame_equal(batchCodeTable, batchCodeTableExpected, check_dtype = False)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "ef8f99c4", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "test_createBatchCodeTable (__main__.HowBadIsMyBatchTest) ... " - ] - }, - { - "data": { - "text/plain": [ - "'dataFrame:'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
DIEDL_THREATDISABLEVAX_TYPEVAX_MANUVAX_LOT
VAERS_ID
916600YNaNNaNCOVID19MODERNA037K20A
916601NaNNaNYCOVID19MODERNA025L20A
1996874NaNNaNYCOVID19MODERNA025L20A
\n", - "
" - ], - "text/plain": [ - " DIED L_THREAT DISABLE VAX_TYPE VAX_MANU VAX_LOT\n", - "VAERS_ID \n", - "916600 Y NaN NaN COVID19 MODERNA 037K20A\n", - "916601 NaN NaN Y COVID19 MODERNA 025L20A\n", - "1996874 NaN NaN Y COVID19 MODERNA 025L20A" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'batchCodeTable:'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ADRsDEATHSDISABILITIESLIFE THREATENING ILLNESSES
VAX_LOT
025L20A20.02.00.0
037K20A11.00.00.0
\n", - "
" - ], - "text/plain": [ - " ADRs DEATHS DISABILITIES LIFE THREATENING ILLNESSES\n", - "VAX_LOT \n", - "025L20A 2 0.0 2.0 0.0\n", - "037K20A 1 1.0 0.0 0.0" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'batchCodeTableExpected:'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ADRsDEATHSDISABILITIESLIFE THREATENING ILLNESSES
VAX_LOT
025L20A2020
037K20A1100
\n", - "
" - ], - "text/plain": [ - " ADRs DEATHS DISABILITIES LIFE THREATENING ILLNESSES\n", - "VAX_LOT \n", - "025L20A 2 0 2 0\n", - "037K20A 1 1 0 0" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "ok\n", - "\n", - "----------------------------------------------------------------------\n", - "Ran 1 test in 0.220s\n", - "\n", - "OK\n" - ] - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "unittest.main(argv = [''], verbosity = 2, exit = False)" ] diff --git a/help.txt b/help.txt index c5d49ddbeff..914857e48ee 100644 --- a/help.txt +++ b/help.txt @@ -1,7 +1,14 @@ jupyter notebook FK-TODO: -- Prüfe, ob die VAERS_ID wirklich eindeutig ist. +- "I would suggest that you filter the vax table first for just C19 vaccines, and for just first dose. + Then carry out the analysis as before. + Repeat for second dose and third dose separately. The cumulative effect will then appear. + It should be analysed separately anyway, because adverse reactions increase with each dose." + # 1. filter the vax table first for just C19 vaccines + # 2. and for just n-tn (n \in {1, 2, 3}) dose => VAERSDATA --> VAERSVAX ist 1:1-Beziehung statt 1:n und kann einfacher in eine einzige Tabelle gemergt werden + # 3. filter for manufacturer +- Prüfe, ob die VAERS_ID wirklich eindeutig ist. Antwort: VAERS_ID ist in der VAERSVAX-Tabelle nicht eindeutig, da es mehrere Impfungen pro Person geben kann. - VAX_LOT-Spalte normalisieren, d.h. mindestens toUpperCase() darauf anwenden - Format des jeweiligen Herstellers berücksichtigen und "verschmutzte" Einträge säubern, denn sie stellen alle dieselbe Charge dar: 039k20a