adding DoseAnalysisTest

2022-02-01 23:56:40 +01:00
parent 0380ec9092
commit 0317d25a90
2 changed files with 117 additions and 12 deletions
--- a/HowBadIsMyBatch.ipynb
+++ b/HowBadIsMyBatch.ipynb
@@ -210,6 +210,27 @@
    "        return BatchCodeTableHelper(severeEffectsDataFrame).createSevereEffectsBatchCodeTable()\n"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "41d4fa30",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class DoseAnalysis:\n",
+    "    \n",
+    "    def getNthDoseTable(dataFrame, dose):\n",
+    "        nthDoseDataFrame = DataFrameFilter(dataFrame).filterBy(dose = dose)\n",
+    "        dict = {\n",
+    "            'Total reports': len(nthDoseDataFrame.index),\n",
+    "            'Deaths': len(nthDoseDataFrame[nthDoseDataFrame['DIED'] == 'Y']),\n",
+    "            'Disabilities': len(nthDoseDataFrame[nthDoseDataFrame['DISABLE'] == 'Y']),\n",
+    "            'Life Threatening Illnesses': len(nthDoseDataFrame[nthDoseDataFrame['L_THREAT'] == 'Y'])\n",
+    "        }\n",
+    "        dict['Severe reports'] =  (dict['Deaths'] + dict['Disabilities'] + dict['Life Threatening Illnesses'])/dict['Total reports'] * 100\n",
+    "        return pd.Series(dict)"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
@@ -247,7 +268,7 @@
    "                        'VAERSVAX': self.createDataFrame(\n",
    "                            columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
    "                            data = [  ['COVID19',  'MODERNA',  '037K20A', '1'],\n",
-    "                                        ['COVID19',  'MODERNA',  '025L20A', '1']],\n",
+    "                                      ['COVID19',  'MODERNA',  '025L20A', '1']],\n",
    "                            index = [\n",
    "                                \"0916600\",\n",
    "                                \"0916601\"],\n",
@@ -257,14 +278,14 @@
    "                            'VAERSDATA': self.createDataFrame(\n",
    "                            columns = ['DIED', 'L_THREAT', 'DISABLE'],\n",
    "                            data = [  [np.NaN, np.NaN,     np.NaN],\n",
-    "                                        [np.NaN, np.NaN,     'Y']],\n",
+    "                                      [np.NaN, np.NaN,     'Y']],\n",
    "                                index = [\n",
    "                                \"1996873\",\n",
    "                                \"1996874\"]),\n",
    "                            'VAERSVAX': self.createDataFrame(\n",
    "                                columns = ['VAX_TYPE', 'VAX_MANU',         'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
    "                                data = [  ['HPV9',     'MERCK & CO. INC.', 'R017624', 'UNK'],\n",
-    "                                        ['COVID19',  'MODERNA',          '025L20A', '1']],\n",
+    "                                          ['COVID19',  'MODERNA',          '025L20A', '1']],\n",
    "                                index = [\n",
    "                                    \"1996873\",\n",
    "                                    \"1996874\"],\n",
@@ -340,7 +361,7 @@
    "                        'VAERSVAX': self.createDataFrame(\n",
    "                            columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
    "                            data = [  ['COVID19',  'MODERNA',  '016M20A', '2'],\n",
-    "                                        ['COVID19',  'MODERNA',  '030L20A', '1']],\n",
+    "                                      ['COVID19',  'MODERNA',  '030L20A', '1']],\n",
    "                            index = [\n",
    "                                \"1048786\",\n",
    "                                \"1048786\"],\n",
@@ -354,7 +375,7 @@
    "        # Then\n",
    "        dataFrameExpected = self.createDataFrame(\n",
    "            columns = ['DIED', 'L_THREAT', 'DISABLE',  'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
-    "            data = [  ['Y',     np.NaN,    np.NaN,     'COVID19',  'MODERNA',  '030L20A',  '1']],\n",
+    "            data = [  ['Y',     np.NaN,    np.NaN,     'COVID19',  'MODERNA',  '030L20A', '1']],\n",
    "            index = [\n",
    "                \"1048786\"],\n",
    "            dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
@@ -374,7 +395,7 @@
    "                        'VAERSVAX': self.createDataFrame(\n",
    "                            columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
    "                            data = [  ['COVID19',  'MODERNA',  '016M20A',  '2'],\n",
-    "                                        ['COVID19',  'MODERNA',  '030L20A',  '1']],\n",
+    "                                      ['COVID19',  'MODERNA',  '030L20A',  '1']],\n",
    "                            index = [\n",
    "                                \"1048786\",\n",
    "                                \"1048786\"],\n",
@@ -388,7 +409,7 @@
    "        # Then\n",
    "        dataFrameExpected = self.createDataFrame(\n",
    "            columns = ['DIED', 'L_THREAT', 'DISABLE',  'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
-    "            data = [  ['Y',     np.NaN,    np.NaN,     'COVID19',  'MODERNA',  '016M20A',  '2']],\n",
+    "            data = [  ['Y',     np.NaN,    np.NaN,     'COVID19',  'MODERNA',  '016M20A', '2']],\n",
    "            index = [\n",
    "                \"1048786\"],\n",
    "            dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
@@ -417,14 +438,14 @@
    "                    'VAERSDATA': self.createDataFrame(\n",
    "                        columns = ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'],\n",
    "                        data = [  ['Y',    'Y',        np.NaN,    'Y',        'Y'],\n",
-    "                                    [np.NaN, np.NaN,     'Y',       np.NaN,     'Y']],\n",
+    "                                  [np.NaN, np.NaN,     'Y',       np.NaN,     'Y']],\n",
    "                        index = [\n",
    "                            \"0916600\",\n",
    "                            \"0916601\"]),\n",
    "                    'VAERSVAX': self.createDataFrame(\n",
    "                        columns = ['VAX_TYPE', 'VAX_MANU',        'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
    "                        data = [  ['COVID19',  'MODERNA',         '037K20A', '1'],\n",
-    "                                    ['COVID19',  'PFIZER\\BIONTECH', '025L20A', '1']],\n",
+    "                                  ['COVID19',  'PFIZER\\BIONTECH', '025L20A', '1']],\n",
    "                        index = [\n",
    "                            \"0916600\",\n",
    "                            \"0916601\"],\n",
@@ -513,6 +534,75 @@
    "        return pd.DataFrame(index = index, columns = columns, data = data).astype(dtypes)\n"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "44c121ec",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pandas.testing import assert_series_equal\n",
+    "\n",
+    "class DoseAnalysisTest(unittest.TestCase):\n",
+    "\n",
+    "    def test_getFirstDoseTable(self):\n",
+    "        # Given\n",
+    "        dataFrame = self.createDataFrame(\n",
+    "            columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
+    "            data = [  ['Y',    np.NaN,     np.NaN,\t  'COVID19',  'MODERNA',  '016M20A', '2'],\n",
+    "                      ['Y',    np.NaN,     np.NaN,    'COVID19',  'MODERNA',  '030L20A', '1'],\n",
+    "                      ['Y',    'Y',        'Y',       'COVID19',  'MODERNA',  '030L20B', '1']],\n",
+    "            index = [\n",
+    "                \"1048786\",\n",
+    "                \"1048786\",\n",
+    "                \"4711\"],\n",
+    "            dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
+    "            \n",
+    "        # When\n",
+    "        doseTable = DoseAnalysis.getNthDoseTable(dataFrame, dose = '1')\n",
+    "        \n",
+    "        # Then\n",
+    "        doseTableExpected = pd.Series(\n",
+    "            {\n",
+    "                'Total reports': 2,\n",
+    "                'Deaths': 2,\n",
+    "                'Disabilities': 1,\n",
+    "                'Life Threatening Illnesses': 1,\n",
+    "                'Severe reports': (2 + 1 + 1)/2 * 100\n",
+    "            })\n",
+    "        assert_series_equal(doseTable, doseTableExpected)\n",
+    "\n",
+    "    def test_getSecondDoseTable(self):\n",
+    "        # Given\n",
+    "        dataFrame = self.createDataFrame(\n",
+    "            columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
+    "            data = [  ['Y',    np.NaN,     np.NaN,\t  'COVID19',  'MODERNA',  '016M20A', '2'],\n",
+    "                      ['Y',    np.NaN,     np.NaN,    'COVID19',  'MODERNA',  '030L20A', '1'],\n",
+    "                      ['Y',    'Y',        'Y',       'COVID19',  'MODERNA',  '030L20B', '1']],\n",
+    "            index = [\n",
+    "                \"1048786\",\n",
+    "                \"1048786\",\n",
+    "                \"4711\"],\n",
+    "            dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
+    "            \n",
+    "        # When\n",
+    "        doseTable = DoseAnalysis.getNthDoseTable(dataFrame, dose = '2')\n",
+    "        \n",
+    "        # Then\n",
+    "        doseTableExpected = pd.Series(\n",
+    "            {\n",
+    "                'Total reports': 1,\n",
+    "                'Deaths': 1,\n",
+    "                'Disabilities': 0,\n",
+    "                'Life Threatening Illnesses': 0,\n",
+    "                'Severe reports': (1 + 0 + 0)/1 * 100\n",
+    "            })\n",
+    "        assert_series_equal(doseTable, doseTableExpected)\n",
+    "\n",
+    "    def createDataFrame(self, index, columns, data, dtypes = {}):\n",
+    "        return pd.DataFrame(index = index, columns = columns, data = data).astype(dtypes)\n"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
@@ -574,6 +664,22 @@
   "source": [
    "saveSevereEffectsBatchCodeTable('results/severeEffects.xlsx')"
   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1b228a16",
+   "metadata": {},
+   "source": [
+    "### Variation in Effect of First and Second Doses"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "202f7c3f",
+   "metadata": {},
+   "source": [
+    "see https://www.howbadismybatch.com/firstsecond.html"
+   ]
  }
 ],
 "metadata": {
--- a/help.txt
+++ b/help.txt
@@ -6,9 +6,8 @@ FK-TODO:
  Then carry out the analysis as before.
  Repeat for second dose and third dose separately. The cumulative effect will then appear.
  It should be analysed separately anyway, because adverse reactions increase with each dose."
-  # 1. filter the vax table first for just C19 vaccines
-  # 2. and for just n-th (VAERSVAX.VAX_DOSE_SERIES == n \in {1, 2, 3}) dose => VAERSDATA --> VAERSVAX ist 1:1-Beziehung statt 1:n und kann einfacher in eine einzige Tabelle gemergt werden
-  # 3. filter for manufacturer
+  # 1. filter the vax table first for just C19 vaccines  and for just n-th (VAERSVAX.VAX_DOSE_SERIES == n \in {1, 2, 3}) dose
+  # 2. filter for manufacturer
 - Prüfe, ob die VAERS_ID wirklich eindeutig ist. Antwort: VAERS_ID ist in der VAERSVAX-Tabelle nicht eindeutig, da es mehrere Impfungen pro Person geben kann.
 - VAX_LOT-Spalte normalisieren, d.h. mindestens toUpperCase() darauf anwenden
 - Format des jeweiligen Herstellers berücksichtigen und "verschmutzte" Einträge säubern, denn sie stellen alle dieselbe Charge dar: