From 0317d25a90ad0ad4f856d8ee616fff990b79e181 Mon Sep 17 00:00:00 2001
From: frankknoll <Knoll_Frank@web.de>
Date: Tue, 1 Feb 2022 23:56:40 +0100
Subject: [PATCH] adding DoseAnalysisTest

---
 HowBadIsMyBatch.ipynb | 124 +++++++++++++++++++++++++++++++++++++++---
 help.txt              |   5 +-
 2 files changed, 117 insertions(+), 12 deletions(-)

diff --git a/HowBadIsMyBatch.ipynb b/HowBadIsMyBatch.ipynb
index 5624a25f15b..7fe38f11659 100644
--- a/HowBadIsMyBatch.ipynb
+++ b/HowBadIsMyBatch.ipynb
@@ -210,6 +210,27 @@
     "        return BatchCodeTableHelper(severeEffectsDataFrame).createSevereEffectsBatchCodeTable()\n"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "41d4fa30",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class DoseAnalysis:\n",
+    "    \n",
+    "    def getNthDoseTable(dataFrame, dose):\n",
+    "        nthDoseDataFrame = DataFrameFilter(dataFrame).filterBy(dose = dose)\n",
+    "        dict = {\n",
+    "            'Total reports': len(nthDoseDataFrame.index),\n",
+    "            'Deaths': len(nthDoseDataFrame[nthDoseDataFrame['DIED'] == 'Y']),\n",
+    "            'Disabilities': len(nthDoseDataFrame[nthDoseDataFrame['DISABLE'] == 'Y']),\n",
+    "            'Life Threatening Illnesses': len(nthDoseDataFrame[nthDoseDataFrame['L_THREAT'] == 'Y'])\n",
+    "        }\n",
+    "        dict['Severe reports'] =  (dict['Deaths'] + dict['Disabilities'] + dict['Life Threatening Illnesses'])/dict['Total reports'] * 100\n",
+    "        return pd.Series(dict)"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -247,7 +268,7 @@
     "                        'VAERSVAX': self.createDataFrame(\n",
     "                            columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
     "                            data = [  ['COVID19',  'MODERNA',  '037K20A', '1'],\n",
-    "                                        ['COVID19',  'MODERNA',  '025L20A', '1']],\n",
+    "                                      ['COVID19',  'MODERNA',  '025L20A', '1']],\n",
     "                            index = [\n",
     "                                \"0916600\",\n",
     "                                \"0916601\"],\n",
@@ -257,14 +278,14 @@
     "                            'VAERSDATA': self.createDataFrame(\n",
     "                            columns = ['DIED', 'L_THREAT', 'DISABLE'],\n",
     "                            data = [  [np.NaN, np.NaN,     np.NaN],\n",
-    "                                        [np.NaN, np.NaN,     'Y']],\n",
+    "                                      [np.NaN, np.NaN,     'Y']],\n",
     "                                index = [\n",
     "                                \"1996873\",\n",
     "                                \"1996874\"]),\n",
     "                            'VAERSVAX': self.createDataFrame(\n",
     "                                columns = ['VAX_TYPE', 'VAX_MANU',         'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
     "                                data = [  ['HPV9',     'MERCK & CO. INC.', 'R017624', 'UNK'],\n",
-    "                                        ['COVID19',  'MODERNA',          '025L20A', '1']],\n",
+    "                                          ['COVID19',  'MODERNA',          '025L20A', '1']],\n",
     "                                index = [\n",
     "                                    \"1996873\",\n",
     "                                    \"1996874\"],\n",
@@ -340,7 +361,7 @@
     "                        'VAERSVAX': self.createDataFrame(\n",
     "                            columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
     "                            data = [  ['COVID19',  'MODERNA',  '016M20A', '2'],\n",
-    "                                        ['COVID19',  'MODERNA',  '030L20A', '1']],\n",
+    "                                      ['COVID19',  'MODERNA',  '030L20A', '1']],\n",
     "                            index = [\n",
     "                                \"1048786\",\n",
     "                                \"1048786\"],\n",
@@ -354,7 +375,7 @@
     "        # Then\n",
     "        dataFrameExpected = self.createDataFrame(\n",
     "            columns = ['DIED', 'L_THREAT', 'DISABLE',  'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
-    "            data = [  ['Y',     np.NaN,    np.NaN,     'COVID19',  'MODERNA',  '030L20A',  '1']],\n",
+    "            data = [  ['Y',     np.NaN,    np.NaN,     'COVID19',  'MODERNA',  '030L20A', '1']],\n",
     "            index = [\n",
     "                \"1048786\"],\n",
     "            dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
@@ -374,7 +395,7 @@
     "                        'VAERSVAX': self.createDataFrame(\n",
     "                            columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
     "                            data = [  ['COVID19',  'MODERNA',  '016M20A',  '2'],\n",
-    "                                        ['COVID19',  'MODERNA',  '030L20A',  '1']],\n",
+    "                                      ['COVID19',  'MODERNA',  '030L20A',  '1']],\n",
     "                            index = [\n",
     "                                \"1048786\",\n",
     "                                \"1048786\"],\n",
@@ -388,7 +409,7 @@
     "        # Then\n",
     "        dataFrameExpected = self.createDataFrame(\n",
     "            columns = ['DIED', 'L_THREAT', 'DISABLE',  'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
-    "            data = [  ['Y',     np.NaN,    np.NaN,     'COVID19',  'MODERNA',  '016M20A',  '2']],\n",
+    "            data = [  ['Y',     np.NaN,    np.NaN,     'COVID19',  'MODERNA',  '016M20A', '2']],\n",
     "            index = [\n",
     "                \"1048786\"],\n",
     "            dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
@@ -417,14 +438,14 @@
     "                    'VAERSDATA': self.createDataFrame(\n",
     "                        columns = ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'],\n",
     "                        data = [  ['Y',    'Y',        np.NaN,    'Y',        'Y'],\n",
-    "                                    [np.NaN, np.NaN,     'Y',       np.NaN,     'Y']],\n",
+    "                                  [np.NaN, np.NaN,     'Y',       np.NaN,     'Y']],\n",
     "                        index = [\n",
     "                            \"0916600\",\n",
     "                            \"0916601\"]),\n",
     "                    'VAERSVAX': self.createDataFrame(\n",
     "                        columns = ['VAX_TYPE', 'VAX_MANU',        'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
     "                        data = [  ['COVID19',  'MODERNA',         '037K20A', '1'],\n",
-    "                                    ['COVID19',  'PFIZER\\BIONTECH', '025L20A', '1']],\n",
+    "                                  ['COVID19',  'PFIZER\\BIONTECH', '025L20A', '1']],\n",
     "                        index = [\n",
     "                            \"0916600\",\n",
     "                            \"0916601\"],\n",
@@ -513,6 +534,75 @@
     "        return pd.DataFrame(index = index, columns = columns, data = data).astype(dtypes)\n"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "44c121ec",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pandas.testing import assert_series_equal\n",
+    "\n",
+    "class DoseAnalysisTest(unittest.TestCase):\n",
+    "\n",
+    "    def test_getFirstDoseTable(self):\n",
+    "        # Given\n",
+    "        dataFrame = self.createDataFrame(\n",
+    "            columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
+    "            data = [  ['Y',    np.NaN,     np.NaN,\t  'COVID19',  'MODERNA',  '016M20A', '2'],\n",
+    "                      ['Y',    np.NaN,     np.NaN,    'COVID19',  'MODERNA',  '030L20A', '1'],\n",
+    "                      ['Y',    'Y',        'Y',       'COVID19',  'MODERNA',  '030L20B', '1']],\n",
+    "            index = [\n",
+    "                \"1048786\",\n",
+    "                \"1048786\",\n",
+    "                \"4711\"],\n",
+    "            dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
+    "            \n",
+    "        # When\n",
+    "        doseTable = DoseAnalysis.getNthDoseTable(dataFrame, dose = '1')\n",
+    "        \n",
+    "        # Then\n",
+    "        doseTableExpected = pd.Series(\n",
+    "            {\n",
+    "                'Total reports': 2,\n",
+    "                'Deaths': 2,\n",
+    "                'Disabilities': 1,\n",
+    "                'Life Threatening Illnesses': 1,\n",
+    "                'Severe reports': (2 + 1 + 1)/2 * 100\n",
+    "            })\n",
+    "        assert_series_equal(doseTable, doseTableExpected)\n",
+    "\n",
+    "    def test_getSecondDoseTable(self):\n",
+    "        # Given\n",
+    "        dataFrame = self.createDataFrame(\n",
+    "            columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
+    "            data = [  ['Y',    np.NaN,     np.NaN,\t  'COVID19',  'MODERNA',  '016M20A', '2'],\n",
+    "                      ['Y',    np.NaN,     np.NaN,    'COVID19',  'MODERNA',  '030L20A', '1'],\n",
+    "                      ['Y',    'Y',        'Y',       'COVID19',  'MODERNA',  '030L20B', '1']],\n",
+    "            index = [\n",
+    "                \"1048786\",\n",
+    "                \"1048786\",\n",
+    "                \"4711\"],\n",
+    "            dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
+    "            \n",
+    "        # When\n",
+    "        doseTable = DoseAnalysis.getNthDoseTable(dataFrame, dose = '2')\n",
+    "        \n",
+    "        # Then\n",
+    "        doseTableExpected = pd.Series(\n",
+    "            {\n",
+    "                'Total reports': 1,\n",
+    "                'Deaths': 1,\n",
+    "                'Disabilities': 0,\n",
+    "                'Life Threatening Illnesses': 0,\n",
+    "                'Severe reports': (1 + 0 + 0)/1 * 100\n",
+    "            })\n",
+    "        assert_series_equal(doseTable, doseTableExpected)\n",
+    "\n",
+    "    def createDataFrame(self, index, columns, data, dtypes = {}):\n",
+    "        return pd.DataFrame(index = index, columns = columns, data = data).astype(dtypes)\n"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -574,6 +664,22 @@
    "source": [
     "saveSevereEffectsBatchCodeTable('results/severeEffects.xlsx')"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1b228a16",
+   "metadata": {},
+   "source": [
+    "### Variation in Effect of First and Second Doses"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "202f7c3f",
+   "metadata": {},
+   "source": [
+    "see https://www.howbadismybatch.com/firstsecond.html"
+   ]
   }
  ],
  "metadata": {
diff --git a/help.txt b/help.txt
index c7f36c7eb41..d689351828d 100644
--- a/help.txt
+++ b/help.txt
@@ -6,9 +6,8 @@ FK-TODO:
   Then carry out the analysis as before.
   Repeat for second dose and third dose separately. The cumulative effect will then appear.
   It should be analysed separately anyway, because adverse reactions increase with each dose."
-  # 1. filter the vax table first for just C19 vaccines
-  # 2. and for just n-th (VAERSVAX.VAX_DOSE_SERIES == n \in {1, 2, 3}) dose => VAERSDATA --> VAERSVAX ist 1:1-Beziehung statt 1:n und kann einfacher in eine einzige Tabelle gemergt werden
-  # 3. filter for manufacturer
+  # 1. filter the vax table first for just C19 vaccines  and for just n-th (VAERSVAX.VAX_DOSE_SERIES == n \in {1, 2, 3}) dose
+  # 2. filter for manufacturer
 - Prüfe, ob die VAERS_ID wirklich eindeutig ist. Antwort: VAERS_ID ist in der VAERSVAX-Tabelle nicht eindeutig, da es mehrere Impfungen pro Person geben kann.
 - VAX_LOT-Spalte normalisieren, d.h. mindestens toUpperCase() darauf anwenden
 - Format des jeweiligen Herstellers berücksichtigen und "verschmutzte" Einträge säubern, denn sie stellen alle dieselbe Charge dar: