adding DoseAnalysisTest
This commit is contained in:
@@ -210,6 +210,27 @@
|
|||||||
" return BatchCodeTableHelper(severeEffectsDataFrame).createSevereEffectsBatchCodeTable()\n"
|
" return BatchCodeTableHelper(severeEffectsDataFrame).createSevereEffectsBatchCodeTable()\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "41d4fa30",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"class DoseAnalysis:\n",
|
||||||
|
" \n",
|
||||||
|
" def getNthDoseTable(dataFrame, dose):\n",
|
||||||
|
" nthDoseDataFrame = DataFrameFilter(dataFrame).filterBy(dose = dose)\n",
|
||||||
|
" dict = {\n",
|
||||||
|
" 'Total reports': len(nthDoseDataFrame.index),\n",
|
||||||
|
" 'Deaths': len(nthDoseDataFrame[nthDoseDataFrame['DIED'] == 'Y']),\n",
|
||||||
|
" 'Disabilities': len(nthDoseDataFrame[nthDoseDataFrame['DISABLE'] == 'Y']),\n",
|
||||||
|
" 'Life Threatening Illnesses': len(nthDoseDataFrame[nthDoseDataFrame['L_THREAT'] == 'Y'])\n",
|
||||||
|
" }\n",
|
||||||
|
" dict['Severe reports'] = (dict['Deaths'] + dict['Disabilities'] + dict['Life Threatening Illnesses'])/dict['Total reports'] * 100\n",
|
||||||
|
" return pd.Series(dict)"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
@@ -247,7 +268,7 @@
|
|||||||
" 'VAERSVAX': self.createDataFrame(\n",
|
" 'VAERSVAX': self.createDataFrame(\n",
|
||||||
" columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
|
" columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
|
||||||
" data = [ ['COVID19', 'MODERNA', '037K20A', '1'],\n",
|
" data = [ ['COVID19', 'MODERNA', '037K20A', '1'],\n",
|
||||||
" ['COVID19', 'MODERNA', '025L20A', '1']],\n",
|
" ['COVID19', 'MODERNA', '025L20A', '1']],\n",
|
||||||
" index = [\n",
|
" index = [\n",
|
||||||
" \"0916600\",\n",
|
" \"0916600\",\n",
|
||||||
" \"0916601\"],\n",
|
" \"0916601\"],\n",
|
||||||
@@ -257,14 +278,14 @@
|
|||||||
" 'VAERSDATA': self.createDataFrame(\n",
|
" 'VAERSDATA': self.createDataFrame(\n",
|
||||||
" columns = ['DIED', 'L_THREAT', 'DISABLE'],\n",
|
" columns = ['DIED', 'L_THREAT', 'DISABLE'],\n",
|
||||||
" data = [ [np.NaN, np.NaN, np.NaN],\n",
|
" data = [ [np.NaN, np.NaN, np.NaN],\n",
|
||||||
" [np.NaN, np.NaN, 'Y']],\n",
|
" [np.NaN, np.NaN, 'Y']],\n",
|
||||||
" index = [\n",
|
" index = [\n",
|
||||||
" \"1996873\",\n",
|
" \"1996873\",\n",
|
||||||
" \"1996874\"]),\n",
|
" \"1996874\"]),\n",
|
||||||
" 'VAERSVAX': self.createDataFrame(\n",
|
" 'VAERSVAX': self.createDataFrame(\n",
|
||||||
" columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
|
" columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
|
||||||
" data = [ ['HPV9', 'MERCK & CO. INC.', 'R017624', 'UNK'],\n",
|
" data = [ ['HPV9', 'MERCK & CO. INC.', 'R017624', 'UNK'],\n",
|
||||||
" ['COVID19', 'MODERNA', '025L20A', '1']],\n",
|
" ['COVID19', 'MODERNA', '025L20A', '1']],\n",
|
||||||
" index = [\n",
|
" index = [\n",
|
||||||
" \"1996873\",\n",
|
" \"1996873\",\n",
|
||||||
" \"1996874\"],\n",
|
" \"1996874\"],\n",
|
||||||
@@ -340,7 +361,7 @@
|
|||||||
" 'VAERSVAX': self.createDataFrame(\n",
|
" 'VAERSVAX': self.createDataFrame(\n",
|
||||||
" columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
|
" columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
|
||||||
" data = [ ['COVID19', 'MODERNA', '016M20A', '2'],\n",
|
" data = [ ['COVID19', 'MODERNA', '016M20A', '2'],\n",
|
||||||
" ['COVID19', 'MODERNA', '030L20A', '1']],\n",
|
" ['COVID19', 'MODERNA', '030L20A', '1']],\n",
|
||||||
" index = [\n",
|
" index = [\n",
|
||||||
" \"1048786\",\n",
|
" \"1048786\",\n",
|
||||||
" \"1048786\"],\n",
|
" \"1048786\"],\n",
|
||||||
@@ -354,7 +375,7 @@
|
|||||||
" # Then\n",
|
" # Then\n",
|
||||||
" dataFrameExpected = self.createDataFrame(\n",
|
" dataFrameExpected = self.createDataFrame(\n",
|
||||||
" columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
|
" columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
|
||||||
" data = [ ['Y', np.NaN, np.NaN, 'COVID19', 'MODERNA', '030L20A', '1']],\n",
|
" data = [ ['Y', np.NaN, np.NaN, 'COVID19', 'MODERNA', '030L20A', '1']],\n",
|
||||||
" index = [\n",
|
" index = [\n",
|
||||||
" \"1048786\"],\n",
|
" \"1048786\"],\n",
|
||||||
" dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
|
" dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
|
||||||
@@ -374,7 +395,7 @@
|
|||||||
" 'VAERSVAX': self.createDataFrame(\n",
|
" 'VAERSVAX': self.createDataFrame(\n",
|
||||||
" columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
|
" columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
|
||||||
" data = [ ['COVID19', 'MODERNA', '016M20A', '2'],\n",
|
" data = [ ['COVID19', 'MODERNA', '016M20A', '2'],\n",
|
||||||
" ['COVID19', 'MODERNA', '030L20A', '1']],\n",
|
" ['COVID19', 'MODERNA', '030L20A', '1']],\n",
|
||||||
" index = [\n",
|
" index = [\n",
|
||||||
" \"1048786\",\n",
|
" \"1048786\",\n",
|
||||||
" \"1048786\"],\n",
|
" \"1048786\"],\n",
|
||||||
@@ -388,7 +409,7 @@
|
|||||||
" # Then\n",
|
" # Then\n",
|
||||||
" dataFrameExpected = self.createDataFrame(\n",
|
" dataFrameExpected = self.createDataFrame(\n",
|
||||||
" columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
|
" columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
|
||||||
" data = [ ['Y', np.NaN, np.NaN, 'COVID19', 'MODERNA', '016M20A', '2']],\n",
|
" data = [ ['Y', np.NaN, np.NaN, 'COVID19', 'MODERNA', '016M20A', '2']],\n",
|
||||||
" index = [\n",
|
" index = [\n",
|
||||||
" \"1048786\"],\n",
|
" \"1048786\"],\n",
|
||||||
" dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
|
" dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
|
||||||
@@ -417,14 +438,14 @@
|
|||||||
" 'VAERSDATA': self.createDataFrame(\n",
|
" 'VAERSDATA': self.createDataFrame(\n",
|
||||||
" columns = ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'],\n",
|
" columns = ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'],\n",
|
||||||
" data = [ ['Y', 'Y', np.NaN, 'Y', 'Y'],\n",
|
" data = [ ['Y', 'Y', np.NaN, 'Y', 'Y'],\n",
|
||||||
" [np.NaN, np.NaN, 'Y', np.NaN, 'Y']],\n",
|
" [np.NaN, np.NaN, 'Y', np.NaN, 'Y']],\n",
|
||||||
" index = [\n",
|
" index = [\n",
|
||||||
" \"0916600\",\n",
|
" \"0916600\",\n",
|
||||||
" \"0916601\"]),\n",
|
" \"0916601\"]),\n",
|
||||||
" 'VAERSVAX': self.createDataFrame(\n",
|
" 'VAERSVAX': self.createDataFrame(\n",
|
||||||
" columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
|
" columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
|
||||||
" data = [ ['COVID19', 'MODERNA', '037K20A', '1'],\n",
|
" data = [ ['COVID19', 'MODERNA', '037K20A', '1'],\n",
|
||||||
" ['COVID19', 'PFIZER\\BIONTECH', '025L20A', '1']],\n",
|
" ['COVID19', 'PFIZER\\BIONTECH', '025L20A', '1']],\n",
|
||||||
" index = [\n",
|
" index = [\n",
|
||||||
" \"0916600\",\n",
|
" \"0916600\",\n",
|
||||||
" \"0916601\"],\n",
|
" \"0916601\"],\n",
|
||||||
@@ -513,6 +534,75 @@
|
|||||||
" return pd.DataFrame(index = index, columns = columns, data = data).astype(dtypes)\n"
|
" return pd.DataFrame(index = index, columns = columns, data = data).astype(dtypes)\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "44c121ec",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from pandas.testing import assert_series_equal\n",
|
||||||
|
"\n",
|
||||||
|
"class DoseAnalysisTest(unittest.TestCase):\n",
|
||||||
|
"\n",
|
||||||
|
" def test_getFirstDoseTable(self):\n",
|
||||||
|
" # Given\n",
|
||||||
|
" dataFrame = self.createDataFrame(\n",
|
||||||
|
" columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
|
||||||
|
" data = [ ['Y', np.NaN, np.NaN,\t 'COVID19', 'MODERNA', '016M20A', '2'],\n",
|
||||||
|
" ['Y', np.NaN, np.NaN, 'COVID19', 'MODERNA', '030L20A', '1'],\n",
|
||||||
|
" ['Y', 'Y', 'Y', 'COVID19', 'MODERNA', '030L20B', '1']],\n",
|
||||||
|
" index = [\n",
|
||||||
|
" \"1048786\",\n",
|
||||||
|
" \"1048786\",\n",
|
||||||
|
" \"4711\"],\n",
|
||||||
|
" dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
|
||||||
|
" \n",
|
||||||
|
" # When\n",
|
||||||
|
" doseTable = DoseAnalysis.getNthDoseTable(dataFrame, dose = '1')\n",
|
||||||
|
" \n",
|
||||||
|
" # Then\n",
|
||||||
|
" doseTableExpected = pd.Series(\n",
|
||||||
|
" {\n",
|
||||||
|
" 'Total reports': 2,\n",
|
||||||
|
" 'Deaths': 2,\n",
|
||||||
|
" 'Disabilities': 1,\n",
|
||||||
|
" 'Life Threatening Illnesses': 1,\n",
|
||||||
|
" 'Severe reports': (2 + 1 + 1)/2 * 100\n",
|
||||||
|
" })\n",
|
||||||
|
" assert_series_equal(doseTable, doseTableExpected)\n",
|
||||||
|
"\n",
|
||||||
|
" def test_getSecondDoseTable(self):\n",
|
||||||
|
" # Given\n",
|
||||||
|
" dataFrame = self.createDataFrame(\n",
|
||||||
|
" columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
|
||||||
|
" data = [ ['Y', np.NaN, np.NaN,\t 'COVID19', 'MODERNA', '016M20A', '2'],\n",
|
||||||
|
" ['Y', np.NaN, np.NaN, 'COVID19', 'MODERNA', '030L20A', '1'],\n",
|
||||||
|
" ['Y', 'Y', 'Y', 'COVID19', 'MODERNA', '030L20B', '1']],\n",
|
||||||
|
" index = [\n",
|
||||||
|
" \"1048786\",\n",
|
||||||
|
" \"1048786\",\n",
|
||||||
|
" \"4711\"],\n",
|
||||||
|
" dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
|
||||||
|
" \n",
|
||||||
|
" # When\n",
|
||||||
|
" doseTable = DoseAnalysis.getNthDoseTable(dataFrame, dose = '2')\n",
|
||||||
|
" \n",
|
||||||
|
" # Then\n",
|
||||||
|
" doseTableExpected = pd.Series(\n",
|
||||||
|
" {\n",
|
||||||
|
" 'Total reports': 1,\n",
|
||||||
|
" 'Deaths': 1,\n",
|
||||||
|
" 'Disabilities': 0,\n",
|
||||||
|
" 'Life Threatening Illnesses': 0,\n",
|
||||||
|
" 'Severe reports': (1 + 0 + 0)/1 * 100\n",
|
||||||
|
" })\n",
|
||||||
|
" assert_series_equal(doseTable, doseTableExpected)\n",
|
||||||
|
"\n",
|
||||||
|
" def createDataFrame(self, index, columns, data, dtypes = {}):\n",
|
||||||
|
" return pd.DataFrame(index = index, columns = columns, data = data).astype(dtypes)\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
@@ -574,6 +664,22 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"saveSevereEffectsBatchCodeTable('results/severeEffects.xlsx')"
|
"saveSevereEffectsBatchCodeTable('results/severeEffects.xlsx')"
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "1b228a16",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Variation in Effect of First and Second Doses"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "202f7c3f",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"see https://www.howbadismybatch.com/firstsecond.html"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
|||||||
5
help.txt
5
help.txt
@@ -6,9 +6,8 @@ FK-TODO:
|
|||||||
Then carry out the analysis as before.
|
Then carry out the analysis as before.
|
||||||
Repeat for second dose and third dose separately. The cumulative effect will then appear.
|
Repeat for second dose and third dose separately. The cumulative effect will then appear.
|
||||||
It should be analysed separately anyway, because adverse reactions increase with each dose."
|
It should be analysed separately anyway, because adverse reactions increase with each dose."
|
||||||
# 1. filter the vax table first for just C19 vaccines
|
# 1. filter the vax table first for just C19 vaccines and for just n-th (VAERSVAX.VAX_DOSE_SERIES == n \in {1, 2, 3}) dose
|
||||||
# 2. and for just n-th (VAERSVAX.VAX_DOSE_SERIES == n \in {1, 2, 3}) dose => VAERSDATA --> VAERSVAX ist 1:1-Beziehung statt 1:n und kann einfacher in eine einzige Tabelle gemergt werden
|
# 2. filter for manufacturer
|
||||||
# 3. filter for manufacturer
|
|
||||||
- Prüfe, ob die VAERS_ID wirklich eindeutig ist. Antwort: VAERS_ID ist in der VAERSVAX-Tabelle nicht eindeutig, da es mehrere Impfungen pro Person geben kann.
|
- Prüfe, ob die VAERS_ID wirklich eindeutig ist. Antwort: VAERS_ID ist in der VAERSVAX-Tabelle nicht eindeutig, da es mehrere Impfungen pro Person geben kann.
|
||||||
- VAX_LOT-Spalte normalisieren, d.h. mindestens toUpperCase() darauf anwenden
|
- VAX_LOT-Spalte normalisieren, d.h. mindestens toUpperCase() darauf anwenden
|
||||||
- Format des jeweiligen Herstellers berücksichtigen und "verschmutzte" Einträge säubern, denn sie stellen alle dieselbe Charge dar:
|
- Format des jeweiligen Herstellers berücksichtigen und "verschmutzte" Einträge säubern, denn sie stellen alle dieselbe Charge dar:
|
||||||
|
|||||||
Reference in New Issue
Block a user