Merge branch 'main' into pages

This commit is contained in:
frankknoll
2023-01-31 10:40:05 +01:00
49012 changed files with 50168 additions and 81 deletions

View File

@@ -28,7 +28,7 @@ class BatchCodeTableFactoryTest(unittest.TestCase):
# Then
assert_frame_equal(
batchCodeTable,
batchCodeTable[['Adverse Reaction Reports', 'Deaths', 'Disabilities', 'Life Threatening Illnesses', 'Company', 'Countries', 'Severe reports', 'Lethality']],
TestHelper.createDataFrame(
columns = ['Adverse Reaction Reports', 'Deaths', 'Disabilities', 'Life Threatening Illnesses', 'Company', 'Countries', 'Severe reports', 'Lethality'],
data = [ [2, 1, 2, 2, 'MODERNA', 'France', 2/2 * 100, 1/2 * 100],
@@ -62,7 +62,7 @@ class BatchCodeTableFactoryTest(unittest.TestCase):
# Then
assert_frame_equal(
batchCodeTable,
batchCodeTable[['Adverse Reaction Reports', 'Deaths', 'Disabilities', 'Life Threatening Illnesses', 'Company', 'Countries', 'Severe reports', 'Lethality']],
TestHelper.createDataFrame(
columns = ['Adverse Reaction Reports', 'Deaths', 'Disabilities', 'Life Threatening Illnesses', 'Company', 'Countries', 'Severe reports', 'Lethality'],
data = [ [1, 1, 0, 0, 'PFIZER\BIONTECH', 'United Kingdom', 1/1 * 100, 1/1 * 100],
@@ -98,7 +98,7 @@ class BatchCodeTableFactoryTest(unittest.TestCase):
# Then
assert_frame_equal(
batchCodeTable,
batchCodeTable[['Adverse Reaction Reports', 'Deaths', 'Disabilities', 'Life Threatening Illnesses', 'Company', 'Countries', 'Severe reports', 'Lethality']],
TestHelper.createDataFrame(
columns = ['Adverse Reaction Reports', 'Deaths', 'Disabilities', 'Life Threatening Illnesses', 'Company', 'Countries', 'Severe reports', 'Lethality'],
data = [ ],

View File

@@ -4,19 +4,24 @@ import numpy as np
from HtmlUtils import getCountries
def createAndSaveBatchCodeTables(internationalVaersCovid19, minADRsForLethality):
def createAndSaveBatchCodeTables(
internationalVaersCovid19,
minADRsForLethality,
onCountryProcessed = lambda country: None):
batchCodeTableFactory = BatchCodeTableFactory(internationalVaersCovid19)
_createAndSaveBatchCodeTablesForCountries(
createBatchCodeTableForCountry=lambda country: batchCodeTableFactory.createBatchCodeTableByCountry(country),
countries=getCountries(internationalVaersCovid19),
minADRsForLethality=minADRsForLethality)
createBatchCodeTableForCountry = lambda country: batchCodeTableFactory.createBatchCodeTableByCountry(country),
countries = getCountries(internationalVaersCovid19),
minADRsForLethality = minADRsForLethality,
onCountryProcessed = onCountryProcessed)
_createAndSaveBatchCodeTableForCountry(
createBatchCodeTableForCountry=lambda country: batchCodeTableFactory.createGlobalBatchCodeTable(),
country='Global',
minADRsForLethality=minADRsForLethality)
createBatchCodeTableForCountry = lambda country: batchCodeTableFactory.createGlobalBatchCodeTable(),
country = 'Global',
minADRsForLethality = minADRsForLethality,
onCountryProcessed = onCountryProcessed)
def _createAndSaveBatchCodeTableForCountry(createBatchCodeTableForCountry, country, minADRsForLethality=None):
def _createAndSaveBatchCodeTableForCountry(createBatchCodeTableForCountry, country, minADRsForLethality, onCountryProcessed):
batchCodeTable = createBatchCodeTableForCountry(country)
batchCodeTable.index.set_names("Batch", inplace=True)
if minADRsForLethality is not None:
@@ -24,14 +29,25 @@ def _createAndSaveBatchCodeTableForCountry(createBatchCodeTableForCountry, count
batchCodeTable['Adverse Reaction Reports'] < minADRsForLethality,
['Severe reports', 'Lethality']
] = [np.nan, np.nan]
batchCodeTable = batchCodeTable.reset_index();
batchCodeTable = batchCodeTable[
[
'Batch',
'Adverse Reaction Reports',
'Deaths',
'Disabilities',
'Life Threatening Illnesses',
'Company',
'Countries',
'Severe reports',
'Lethality'
]]
IOUtils.saveDataFrame(
batchCodeTable,
'../docs/data/batchCodeTables/' + country)
# display(country + ":", batchCodeTable)
display(country)
onCountryProcessed(country)
def _createAndSaveBatchCodeTablesForCountries(createBatchCodeTableForCountry, countries, minADRsForLethality=None):
def _createAndSaveBatchCodeTablesForCountries(createBatchCodeTableForCountry, countries, minADRsForLethality, onCountryProcessed):
for country in countries:
_createAndSaveBatchCodeTableForCountry(
createBatchCodeTableForCountry, country, minADRsForLethality)
_createAndSaveBatchCodeTableForCountry(createBatchCodeTableForCountry, country, minADRsForLethality, onCountryProcessed)

View File

@@ -1,7 +1,5 @@
import pandas as pd
class DataFrameFilter:
def filterByCovid19(self, dataFrame):
return dataFrame[self._isCovid19(dataFrame)]

View File

@@ -25,7 +25,7 @@ class DataFrameFilterTest(unittest.TestCase):
index = [
"0916600",
"0916601"],
dtypes = {'VAX_DOSE_SERIES': "string"})
dtypes = {"VAX_DOSE_SERIES": "string"})
},
{
'VAERSDATA': TestHelper.createDataFrame(
@@ -42,7 +42,7 @@ class DataFrameFilterTest(unittest.TestCase):
index = [
"1996873",
"1996874"],
dtypes = {'VAX_DOSE_SERIES': "string"})
dtypes = {"VAX_DOSE_SERIES": "string"})
}
])
dataFrameFilter = DataFrameFilter()
@@ -60,5 +60,5 @@ class DataFrameFilterTest(unittest.TestCase):
"0916600",
"0916601",
"1996874"],
dtypes = {'VAX_DOSE_SERIES': "string"})
dtypes = {"VAX_DOSE_SERIES": "string"})
assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)

View File

@@ -0,0 +1,26 @@
class DictByBatchcodeTable2DictConverter:
@staticmethod
def convertDictByBatchcodeTable2Dict(dictByBatchcodeTable, batchcode):
return {
"batchcode": batchcode,
"histograms": DictByBatchcodeTable2DictConverter._getHistograms(dictByBatchcodeTable)
}
@staticmethod
def _getHistograms(dictByBatchcodeTable):
return (
dictByBatchcodeTable
.apply(
lambda row: {
"batchcodes": DictByBatchcodeTable2DictConverter._getNaNBatchcodes(row.name),
"histogram": row['SYMPTOM_COUNT_BY_VAX_LOT']
},
axis = 'columns')
.to_list()
)
@staticmethod
def _getNaNBatchcodes(batchcodes):
# FK-TODO: handle 'nan' everywhere correctly
return [batchcode for batchcode in batchcodes if batchcode != 'nan']

View File

@@ -0,0 +1,53 @@
import unittest
import json
from DictByBatchcodeTable2DictConverter import DictByBatchcodeTable2DictConverter
from TestHelper import TestHelper
import pandas as pd
class DictByBatchcodeTable2DictConverterTest(unittest.TestCase):
def test_convertDictByBatchcodeTable2Json(self):
# Given
dictByBatchcodeTable = TestHelper.createDataFrame(
columns = ['SYMPTOM_COUNT_BY_VAX_LOT'],
data = [ [
{
"Blood pressure orthostatic abnormal": 5,
"Chest discomfort": 1
}
],
[
{
"Chest discomfort": 2
}
]
],
index = pd.MultiIndex.from_tuples(
names = ['VAX_LOT1', 'VAX_LOT2', 'VAX_LOT3'],
tuples = [['1808982', 'EW0175', 'FD1921'],
['015M20A', '1808982', 'nan']]))
# When
dict = DictByBatchcodeTable2DictConverter.convertDictByBatchcodeTable2Dict(dictByBatchcodeTable, '1808982')
# Then
self.assertEqual(
dict,
{
"batchcode": "1808982",
"histograms": [
{
"batchcodes": ["1808982", "EW0175", "FD1921"],
"histogram": {
"Blood pressure orthostatic abnormal": 5,
"Chest discomfort": 1
}
},
{
"batchcodes": ["015M20A", "1808982"],
"histogram": {
"Chest discomfort": 2
}
}
]
})

View File

@@ -0,0 +1,21 @@
from TableByBatchcodeFilter import TableByBatchcodeFilter
from DictByBatchcodeTable2DictConverter import DictByBatchcodeTable2DictConverter
from IOUtils import IOUtils
class HistogramDescriptionPersister:
def __init__(self, directory):
self.directory = directory
def saveHistogramDescriptionsForBatchcodes(self, batchcodes, dictByBatchcodeTable, progress):
for count, batchcode in enumerate(batchcodes, start = 1):
histogramDescription = self._getHistogramDescriptionForBatchcode(batchcode, dictByBatchcodeTable)
# FK-TODO: nicht direkt {batchcode}.json speichern, denn im Dateinamen könnte sich dann ein '/' befinden, was ein nicht gewünschtes Unterverzeichnis erzeugt. Deshalb in der Batchcode-Tabelle eine unsichtbare Spalte einfügen, in welcher für den jeweiligen batchcode der bereinigte und eindeutige Dateiname steht (z.B. einfach durchnummeriert: 0.json, 1.json, ...).
IOUtils.saveDictAsJson(histogramDescription, f'{self.directory}/{batchcode}.json')
progress(count, len(batchcodes), batchcode)
def _getHistogramDescriptionForBatchcode(self, batchcode, dictByBatchcodeTable):
dictByBatchcodeTableForBatchcode = TableByBatchcodeFilter.filterTableByBatchcode(batchcode, dictByBatchcodeTable)
histogramDescription = DictByBatchcodeTable2DictConverter.convertDictByBatchcodeTable2Dict(dictByBatchcodeTableForBatchcode, batchcode)
return histogramDescription

View File

@@ -0,0 +1,15 @@
class HistogramTable2DictTableConverter:
@staticmethod
def convertHistogramTable2DictTable(symptomHistogramByBatchcodeTable):
vax_lot_columns = symptomHistogramByBatchcodeTable.index.names.difference(['SYMPTOM'])
return (
symptomHistogramByBatchcodeTable
.groupby(vax_lot_columns)
.agg(lambda histogram_with_vax_lots: HistogramTable2DictTableConverter._histogram_to_json(histogram_with_vax_lots, vax_lot_columns))
)
@staticmethod
def _histogram_to_json(histogram_with_vax_lots, vax_lot_columns):
histogram = histogram_with_vax_lots.reset_index(level = vax_lot_columns, drop=True)
return histogram.to_dict()

View File

@@ -0,0 +1,45 @@
import unittest
from pandas.testing import assert_frame_equal
from HistogramTable2DictTableConverter import HistogramTable2DictTableConverter
from TestHelper import TestHelper
import pandas as pd
class HistogramTable2DictTableConverterTest(unittest.TestCase):
def test_convertHistogramTable2DictTable(self):
# Given
histogramTable = TestHelper.createDataFrame(
columns = ['SYMPTOM_COUNT_BY_VAX_LOT'],
data = [ [5],
[1],
[2]],
index = pd.MultiIndex.from_tuples(
names = ['VAX_LOT1', 'SYMPTOM'],
tuples = [['1808982', 'Blood pressure orthostatic abnormal'],
['1808982', 'Chest discomfort'],
['EW0175', 'Chest discomfort']]))
# When
dictTable = HistogramTable2DictTableConverter.convertHistogramTable2DictTable(histogramTable)
# Then
assert_frame_equal(
dictTable,
TestHelper.createDataFrame(
columns = ['SYMPTOM_COUNT_BY_VAX_LOT'],
data = [ [
{
"Blood pressure orthostatic abnormal": 5,
"Chest discomfort": 1
}
],
[
{
"Chest discomfort": 2
}
]],
index = pd.Index(
name = 'VAX_LOT1',
data = ['1808982',
'EW0175'])))

View File

@@ -16,7 +16,7 @@
"from VAERSFileDownloader import updateVAERSFiles\n",
"from datetime import datetime\n",
"from DateProvider import DateProvider\n",
"from InternationalVaersCovid19Provider import getInternationalVaersCovid19\n",
"from InternationalVaersCovid19Provider import getInternationalVaersCovid19,get_international_VAERSVAX_VAERSSYMPTOMS_Covid19\n",
"from BatchCodeTableHtmlUpdater import updateBatchCodeTableHtmlFile\n",
"from BatchCodeTablePersister import createAndSaveBatchCodeTables"
]
@@ -67,6 +67,117 @@
" workingDirectory = os.getcwd())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "58333a19",
"metadata": {},
"outputs": [],
"source": [
"international_VAERSVAX_Covid19, international_VAERSSYMPTOMS = get_international_VAERSVAX_VAERSSYMPTOMS_Covid19(years = years_from_2020_to_present)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f10b558f",
"metadata": {},
"outputs": [],
"source": [
"international_VAERSVAX_Covid19"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3e0908fe",
"metadata": {},
"outputs": [],
"source": [
"international_VAERSSYMPTOMS"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "204af94d",
"metadata": {},
"outputs": [],
"source": [
"from SymptomByBatchcodeTableFactory import SymptomByBatchcodeTableFactory\n",
"\n",
"symptomByBatchcodeTable = SymptomByBatchcodeTableFactory.createSymptomByBatchcodeTable(international_VAERSVAX_Covid19, international_VAERSSYMPTOMS)\n",
"# symptomByBatchcodeTable.to_pickle('tmp/symptomByBatchcodeTable.pkl')\n",
"symptomByBatchcodeTable"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b5a70fa0",
"metadata": {},
"outputs": [],
"source": [
"# symptomByBatchcodeTable = pd.read_pickle('tmp/symptomByBatchcodeTable.pkl')\n",
"# symptomByBatchcodeTable"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9268d60d",
"metadata": {},
"outputs": [],
"source": [
"from SymptomHistogramByBatchcodeTableFactory import SymptomHistogramByBatchcodeTableFactory\n",
"\n",
"symptomHistogramByBatchcodeTable = SymptomHistogramByBatchcodeTableFactory.createSymptomHistogramByBatchcodeTable(symptomByBatchcodeTable)\n",
"symptomHistogramByBatchcodeTable"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5170efad",
"metadata": {},
"outputs": [],
"source": [
"from HistogramTable2DictTableConverter import HistogramTable2DictTableConverter\n",
"\n",
"dictByBatchcodeTable = HistogramTable2DictTableConverter.convertHistogramTable2DictTable(symptomHistogramByBatchcodeTable)\n",
"dictByBatchcodeTable"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5e2a0a30",
"metadata": {},
"outputs": [],
"source": [
"batchcodes = international_VAERSVAX_Covid19['VAX_LOT'].dropna().drop_duplicates().to_list()\n",
"batchcodes"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6e6efc19",
"metadata": {},
"outputs": [],
"source": [
"from HistogramDescriptionPersister import HistogramDescriptionPersister\n",
"import shutil\n",
"\n",
"directory = '../docs/data/histograms'\n",
"shutil.rmtree(directory)\n",
"histogramDescriptionPersister = HistogramDescriptionPersister(directory)\n",
"histogramDescriptionPersister.saveHistogramDescriptionsForBatchcodes(\n",
" batchcodes,\n",
" dictByBatchcodeTable,\n",
" progress = lambda count, size, batchcode: print(f'{count}/{size}: {batchcode}'))"
]
},
{
"cell_type": "code",
"execution_count": null,
@@ -95,13 +206,16 @@
"metadata": {},
"outputs": [],
"source": [
"createAndSaveBatchCodeTables(internationalVaersCovid19, minADRsForLethality=100)"
"createAndSaveBatchCodeTables(\n",
" internationalVaersCovid19,\n",
" minADRsForLethality = 100,\n",
" onCountryProcessed = display)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.10.8 ('howbadismybatch-venv')",
"display_name": "howbadismybatch-venv",
"language": "python",
"name": "python3"
},
@@ -115,7 +229,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.8 (main, Nov 24 2022, 14:13:03) [GCC 11.2.0]"
"version": "3.10.8"
},
"vscode": {
"interpreter": {

View File

@@ -17,7 +17,7 @@ class IOUtils:
@staticmethod
def saveDataFrameAsHtml(dataFrame, file):
IOUtils.ensurePath(file)
dataFrame.reset_index().to_html(
dataFrame.to_html(
file + '.html',
index = False,
table_id = 'batchCodeTable',
@@ -28,7 +28,7 @@ class IOUtils:
@staticmethod
def saveDataFrameAsJson(dataFrame, file):
IOUtils.ensurePath(file)
dataFrame.reset_index().to_json(
dataFrame.to_json(
file + '.json',
orient = "split",
index = False)

View File

@@ -1,6 +1,7 @@
from DataFrameFilter import DataFrameFilter
import VaersReader
import pandas as pd
from VaersDescrReader import VaersDescrReader
def getInternationalVaersCovid19(years):
@@ -11,3 +12,20 @@ def getInternationalVaersCovid19(years):
])
internationalVaersCovid19 = DataFrameFilter().filterByCovid19(internationalVaers)
return internationalVaersCovid19
def get_international_VAERSVAX_VAERSSYMPTOMS_Covid19(years):
international_VAERSVAX, international_VAERSSYMPTOMS = _get_international_VAERSVAX_VAERSSYMPTOMS(years)
international_VAERSVAX.dropna(subset = ['VAX_LOT'], inplace = True)
international_VAERSVAX_Covid19 = DataFrameFilter().filterByCovid19(international_VAERSVAX)
return international_VAERSVAX_Covid19, international_VAERSSYMPTOMS
def _get_international_VAERSVAX_VAERSSYMPTOMS(years):
vaersDescrReader = VaersDescrReader(dataDir = "VAERS")
internationalVaersDescrs = vaersDescrReader.readVaersDescrsForYears(years) + [vaersDescrReader.readNonDomesticVaersDescr()]
return _getVaersDescrByName(internationalVaersDescrs, 'VAERSVAX'), _getVaersDescrByName(internationalVaersDescrs, 'VAERSSYMPTOMS')
def _getVaersDescrByName(vaersDescrs, vaersDescrName):
return pd.concat([vaersDescr[vaersDescrName] for vaersDescr in vaersDescrs])

View File

@@ -0,0 +1,68 @@
import pandas as pd
import numpy as np
class SymptomByBatchcodeTableFactory:
@staticmethod
def createSymptomByBatchcodeTable(VAERSVAX, VAERSSYMPTOMS):
index_columns = SymptomByBatchcodeTableFactory._getIndexColumns(VAERSVAX)
symptomColumn = 'SYMPTOM'
return (pd
.merge(
SymptomByBatchcodeTableFactory._get_VAERSVAX_WITH_VAX_LOTS(VAERSVAX, index_columns),
SymptomByBatchcodeTableFactory._getSymptomsTable(VAERSSYMPTOMS, symptomColumn),
on = 'VAERS_ID')
.set_index(index_columns)
[[symptomColumn]])
@staticmethod
def _getIndexColumns(VAERSVAX):
return [f"VAX_LOT{num}" for num in range(1, SymptomByBatchcodeTableFactory._getMaxNumShots(VAERSVAX) + 1)]
@staticmethod
def _getMaxNumShots(VAERSVAX):
return VAERSVAX.index.value_counts().iloc[0]
@staticmethod
def _get_VAERSVAX_WITH_VAX_LOTS(VAERSVAX, index_columns):
return (pd
.concat(
[VAERSVAX, SymptomByBatchcodeTableFactory._getVaxLotsTable(VAERSVAX, index_columns)],
axis = 'columns')
.reset_index()
.drop_duplicates(subset = ['VAERS_ID'] + index_columns))
@staticmethod
def _getVaxLotsTable(VAERSVAX, index_columns):
VAX_LOT_LIST_Table = VAERSVAX.groupby("VAERS_ID").agg(
VAX_LOT_LIST = pd.NamedAgg(
column = 'VAX_LOT',
aggfunc = lambda VAX_LOT_series: list(VAX_LOT_series.sort_values())))
return pd.DataFrame(
fillLsts(
lsts = VAX_LOT_LIST_Table['VAX_LOT_LIST'].tolist(),
desiredLen = len(index_columns),
fillValue = str(np.nan)),
columns = index_columns,
index = VAX_LOT_LIST_Table.index)
@staticmethod
def _getSymptomsTable(VAERSSYMPTOMS, symptomColumn):
return (pd
.concat(
[
VAERSSYMPTOMS['SYMPTOM1'],
VAERSSYMPTOMS['SYMPTOM2'],
VAERSSYMPTOMS['SYMPTOM3'],
VAERSSYMPTOMS['SYMPTOM4'],
VAERSSYMPTOMS['SYMPTOM5']
])
.dropna()
.to_frame(name = symptomColumn)
.reset_index())
def fillLsts(lsts, desiredLen, fillValue):
return [fillLst(lst, desiredLen, fillValue) for lst in lsts]
def fillLst(lst, desiredLen, fillValue):
return lst + [fillValue] * (max(desiredLen - len(lst), 0))

View File

@@ -0,0 +1,169 @@
import unittest
from pandas.testing import assert_frame_equal
from TestHelper import TestHelper
from SymptomByBatchcodeTableFactory import SymptomByBatchcodeTableFactory
import pandas as pd
import numpy as np
class SymptomByBatchcodeTableFactoryTest(unittest.TestCase):
def test_createSymptomByBatchcodeTable(self):
# Given
VAERSVAX = TestHelper.createDataFrame(
columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],
data = [ ['COVID19', 'JANSSEN', 'EW0175', 'UNK'],
['COVID19', 'PFIZER\BIONTECH', '1808982', '1']],
index = pd.Index(
name = 'VAERS_ID',
data=[
2547730,
2547730]),
dtypes = {
'VAX_DOSE_SERIES': 'string',
'VAX_LOT': 'string'})
VAERSSYMPTOMS = TestHelper.createDataFrame(
columns = ['SYMPTOM1', 'SYMPTOM2', 'SYMPTOM3', 'SYMPTOM4', 'SYMPTOM5'],
data = [ ['Blood pressure orthostatic abnormal', 'COVID-19', 'Coma', 'Computerised tomogram', 'Exposure to SARS-CoV-2'],
['Head injury', 'Headache', 'Laboratory test', 'Magnetic resonance imaging', 'SARS-CoV-2 antibody test negative'],
['SARS-CoV-2 test positive', 'Unresponsive to stimuli', 'X-ray', np.nan, np.nan]],
index = pd.Index(
name = 'VAERS_ID',
data=[
2547730,
2547730,
2547730]))
# When
symptomByBatchcodeTable = SymptomByBatchcodeTableFactory.createSymptomByBatchcodeTable(VAERSVAX, VAERSSYMPTOMS)
# Then
assert_frame_equal(
symptomByBatchcodeTable,
TestHelper.createDataFrame(
columns = ['SYMPTOM'],
data = [ ['Blood pressure orthostatic abnormal'],
['Head injury'],
['SARS-CoV-2 test positive'],
['COVID-19'],
['Headache'],
['Unresponsive to stimuli'],
['Coma'],
['Laboratory test'],
['X-ray'],
['Computerised tomogram'],
['Magnetic resonance imaging'],
['Exposure to SARS-CoV-2'],
['SARS-CoV-2 antibody test negative']],
index = pd.MultiIndex.from_tuples(
names = ['VAX_LOT1', 'VAX_LOT2'],
tuples = [['1808982', 'EW0175']] * 13)))
def test_createSymptomByBatchcodeTable_two_patients_same_symptoms(self):
# Given
VAERSVAX = TestHelper.createDataFrame(
columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],
data = [ ['COVID19', 'JANSSEN', 'EW0175', '1'],
['COVID19', 'JANSSEN', 'EW0175', '1']],
index = pd.Index(
name = 'VAERS_ID',
data=[
2547730,
2547731]),
dtypes = {
'VAX_DOSE_SERIES': 'string',
'VAX_LOT': 'string'})
VAERSSYMPTOMS = TestHelper.createDataFrame(
columns = ['SYMPTOM1', 'SYMPTOM2', 'SYMPTOM3', 'SYMPTOM4', 'SYMPTOM5'],
data = [ ['Blood pressure orthostatic abnormal', np.nan, np.nan, np.nan, np.nan],
['Blood pressure orthostatic abnormal', np.nan, np.nan, np.nan, np.nan]],
index = pd.Index(
name = 'VAERS_ID',
data=[
2547730,
2547731]))
# When
symptomByBatchcodeTable = SymptomByBatchcodeTableFactory.createSymptomByBatchcodeTable(VAERSVAX, VAERSSYMPTOMS)
# Then
assert_frame_equal(
symptomByBatchcodeTable,
TestHelper.createDataFrame(
columns = ['SYMPTOM'],
data = [ ['Blood pressure orthostatic abnormal'],
['Blood pressure orthostatic abnormal']],
index = pd.Index(
name = 'VAX_LOT1',
data = ['EW0175',
'EW0175'])),
check_dtype = False)
def test_createSymptomByBatchcodeTable_two_patients_distinct_symptoms(self):
# Given
VAERSVAX = TestHelper.createDataFrame(
columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],
data = [ ['COVID19', 'JANSSEN', '1808982', 'UNK'],
['COVID19', 'PFIZER\BIONTECH', 'EW0175', '1'],
['COVID19', 'PFIZER\BIONTECH', 'EW0175', '1'],
['COVID19', 'PFIZER\BIONTECH', 'EW0167', '2']],
index = pd.Index(
name = 'VAERS_ID',
data=[
2547730,
2547730,
2547744,
2547744]),
dtypes = {
'VAX_DOSE_SERIES': 'string',
'VAX_LOT': 'string'})
VAERSSYMPTOMS = TestHelper.createDataFrame(
columns = ['SYMPTOM1', 'SYMPTOM2', 'SYMPTOM3', 'SYMPTOM4', 'SYMPTOM5'],
data = [ ['Blood pressure orthostatic abnormal', 'COVID-19', 'Coma', 'Computerised tomogram', 'Exposure to SARS-CoV-2'],
['Head injury', 'Headache', 'Laboratory test', 'Magnetic resonance imaging', 'SARS-CoV-2 antibody test negative'],
['SARS-CoV-2 test positive', 'Unresponsive to stimuli', 'X-ray', np.nan, np.nan],
['Computerised tomogram head abnormal', 'Ear pain', 'Headache', 'Idiopathic intracranial hypertension', 'Intracranial pressure increased'],
['Lumbar puncture', 'Magnetic resonance imaging head', 'Pain', 'Swelling', 'Vision blurred']],
index = pd.Index(
name = 'VAERS_ID',
data=[
2547730,
2547730,
2547730,
2547744,
2547744]))
# When
symptomByBatchcodeTable = SymptomByBatchcodeTableFactory.createSymptomByBatchcodeTable(VAERSVAX, VAERSSYMPTOMS)
# Then
assert_frame_equal(
symptomByBatchcodeTable,
TestHelper.createDataFrame(
columns = ['SYMPTOM'],
data = [ ['Blood pressure orthostatic abnormal'],
['Head injury'],
['SARS-CoV-2 test positive'],
['COVID-19'],
['Headache'],
['Unresponsive to stimuli'],
['Coma'],
['Laboratory test'],
['X-ray'],
['Computerised tomogram'],
['Magnetic resonance imaging'],
['Exposure to SARS-CoV-2'],
['SARS-CoV-2 antibody test negative'],
['Computerised tomogram head abnormal'],
['Lumbar puncture'],
['Ear pain'],
['Magnetic resonance imaging head'],
['Headache'],
['Pain'],
['Idiopathic intracranial hypertension'],
['Swelling'],
['Intracranial pressure increased'],
['Vision blurred']],
index = pd.MultiIndex.from_tuples(
names = ['VAX_LOT1', 'VAX_LOT2'],
tuples = [['1808982', 'EW0175']] * 13 + [['EW0167', 'EW0175']] * 10)))

View File

@@ -0,0 +1,10 @@
class SymptomHistogramByBatchcodeTableFactory:
@staticmethod
def createSymptomHistogramByBatchcodeTable(symptomByBatchcodeTable):
return (
symptomByBatchcodeTable
.groupby(symptomByBatchcodeTable.index.names)
['SYMPTOM'].value_counts()
.to_frame(name = 'SYMPTOM_COUNT_BY_VAX_LOT')
)

View File

@@ -0,0 +1,61 @@
import unittest
from pandas.testing import assert_frame_equal
from SymptomHistogramByBatchcodeTableFactory import SymptomHistogramByBatchcodeTableFactory
from TestHelper import TestHelper
import pandas as pd
class SymptomHistogramByBatchcodeTableFactoryTest(unittest.TestCase):
def test_createSymptomHistogramByBatchcodeTable(self):
# Given
symptomByBatchcodeTable = TestHelper.createDataFrame(
columns = ['SYMPTOM'],
data = [ ['Blood pressure orthostatic abnormal'],
['Blood pressure orthostatic abnormal'],
['Blood pressure orthostatic abnormal']],
index = pd.Index(
name = 'VAX_LOT1',
data = ['EW0175',
'EW0175',
'1808982']))
# When
symptomHistogramByBatchcodeTable = SymptomHistogramByBatchcodeTableFactory.createSymptomHistogramByBatchcodeTable(symptomByBatchcodeTable)
# Then
assert_frame_equal(
symptomHistogramByBatchcodeTable,
TestHelper.createDataFrame(
columns = ['SYMPTOM_COUNT_BY_VAX_LOT'],
data = [ [1],
[2]],
index = pd.MultiIndex.from_tuples(
names = ['VAX_LOT1', 'SYMPTOM'],
tuples = [['1808982', 'Blood pressure orthostatic abnormal'],
['EW0175', 'Blood pressure orthostatic abnormal']])))
def test_createSymptomHistogramByBatchcodeTable_two_VAX_LOTs_Index(self):
# Given
symptomByBatchcodeTable = TestHelper.createDataFrame(
columns = ['SYMPTOM'],
data = [ ['Blood pressure orthostatic abnormal'],
['Blood pressure orthostatic abnormal'],
['Headache']],
index = pd.MultiIndex.from_tuples(
names = ['VAX_LOT1', 'VAX_LOT2'],
tuples = [['1808982', 'EW0175']] * 3))
# When
symptomHistogramByBatchcodeTable = SymptomHistogramByBatchcodeTableFactory.createSymptomHistogramByBatchcodeTable(symptomByBatchcodeTable)
# Then
assert_frame_equal(
symptomHistogramByBatchcodeTable,
TestHelper.createDataFrame(
columns = ['SYMPTOM_COUNT_BY_VAX_LOT'],
data = [ [2],
[1]],
index = pd.MultiIndex.from_tuples(
names = ['VAX_LOT1', 'VAX_LOT2', 'SYMPTOM'],
tuples = [['1808982', 'EW0175', 'Blood pressure orthostatic abnormal'],
['1808982', 'EW0175', 'Headache']])))

View File

@@ -0,0 +1,18 @@
from functools import reduce
class TableByBatchcodeFilter:
@staticmethod
def filterTableByBatchcode(batchcode, table):
batchcodeColumns = table.index.names
table = table.reset_index()
filteredTable = table[TableByBatchcodeFilter._existsBatchcodeInAnyBatchcodeColumn(table, batchcodeColumns, batchcode)]
return filteredTable.set_index(batchcodeColumns)
@staticmethod
def _existsBatchcodeInAnyBatchcodeColumn(table, batchcodeColumns, batchcode):
return reduce(
lambda accum, batchcodeColumn: accum | (table[batchcodeColumn] == batchcode),
batchcodeColumns,
[False] * len(table.index))

View File

@@ -0,0 +1,65 @@
import unittest
from pandas.testing import assert_frame_equal
from TableByBatchcodeFilter import TableByBatchcodeFilter
from TestHelper import TestHelper
import pandas as pd
class TableByBatchcodeFilterTest(unittest.TestCase):
def test_convertHistogramTable2JsonTable_2_VAX_LOT_columns(self):
# Given
batchcode = '1808982'
symptomHistogramByBatchcodeTable = TestHelper.createDataFrame(
columns = ['SYMPTOM_COUNT_BY_VAX_LOT'],
data = [ ['{"Blood pressure orthostatic abnormal":5,"Chest discomfort":1}'],
['{"Chest discomfort":2}'],
['{"Chills":5}']],
index = pd.MultiIndex.from_tuples(
names = ['VAX_LOT1', 'VAX_LOT2'],
tuples = [[batchcode, 'EW0175'],
['015M20A', batchcode],
['015M20A', 'EW0175']]))
# When
filteredTable = TableByBatchcodeFilter.filterTableByBatchcode(batchcode, symptomHistogramByBatchcodeTable)
# Then
assert_frame_equal(
filteredTable,
TestHelper.createDataFrame(
columns = ['SYMPTOM_COUNT_BY_VAX_LOT'],
data = [ ['{"Blood pressure orthostatic abnormal":5,"Chest discomfort":1}'],
['{"Chest discomfort":2}']],
index = pd.MultiIndex.from_tuples(
names = ['VAX_LOT1', 'VAX_LOT2'],
tuples = [[batchcode, 'EW0175'],
['015M20A', batchcode]])))
def test_convertHistogramTable2JsonTable_3_VAX_LOT_columns(self):
# Given
batchcode = '1808983'
symptomHistogramByBatchcodeTable = TestHelper.createDataFrame(
columns = ['SYMPTOM_COUNT_BY_VAX_LOT'],
data = [ ['{"Blood pressure orthostatic abnormal":5,"Chest discomfort":1}'],
['{"Chest discomfort":2}'],
['{"Chills":5}']],
index = pd.MultiIndex.from_tuples(
names = ['VAX_LOT1', 'VAX_LOT2', 'VAX_LOT3'],
tuples = [[batchcode, 'EW0175', None],
['015M20A', None, batchcode],
['015M20A', 'EW0175', 'dummy2']]))
# When
filteredTable = TableByBatchcodeFilter.filterTableByBatchcode(batchcode, symptomHistogramByBatchcodeTable)
# Then
assert_frame_equal(
filteredTable,
TestHelper.createDataFrame(
columns = ['SYMPTOM_COUNT_BY_VAX_LOT'],
data = [ ['{"Blood pressure orthostatic abnormal":5,"Chest discomfort":1}'],
['{"Chest discomfort":2}']],
index = pd.MultiIndex.from_tuples(
names = ['VAX_LOT1', 'VAX_LOT2', 'VAX_LOT3'],
tuples = [[batchcode, 'EW0175', None],
['015M20A', None, batchcode]])))

View File

@@ -1,4 +1,6 @@
import pandas as pd
from DataFrameNormalizer import DataFrameNormalizer
class VaersDescrReader:
@@ -11,13 +13,15 @@ class VaersDescrReader:
def readVaersDescrForYear(self, year):
return {
'VAERSDATA': self._readVAERSDATA('{dataDir}/{year}VAERSDATA.csv'.format(dataDir = self.dataDir, year = year)),
'VAERSVAX': self._readVAERSVAX('{dataDir}/{year}VAERSVAX.csv'.format(dataDir = self.dataDir, year = year))
'VAERSVAX': self._readVAERSVAX('{dataDir}/{year}VAERSVAX.csv'.format(dataDir = self.dataDir, year = year)),
'VAERSSYMPTOMS': self._readVAERSSYMPTOMS('{dataDir}/{year}VAERSSYMPTOMS.csv'.format(dataDir = self.dataDir, year = year))
}
def readNonDomesticVaersDescr(self):
return {
'VAERSDATA': self._readVAERSDATA(self.dataDir + "/NonDomesticVAERSDATA.csv"),
'VAERSVAX': self._readVAERSVAX(self.dataDir + "/NonDomesticVAERSVAX.csv")
'VAERSVAX': self._readVAERSVAX(self.dataDir + "/NonDomesticVAERSVAX.csv"),
'VAERSSYMPTOMS': self._readVAERSSYMPTOMS(self.dataDir + "/NonDomesticVAERSSYMPTOMS.csv")
}
def _readVAERSDATA(self, file):
@@ -28,10 +32,22 @@ class VaersDescrReader:
date_parser = lambda dateStr: pd.to_datetime(dateStr, format = "%m/%d/%Y"))
def _readVAERSVAX(self, file):
return self._read_csv(
VAERSVAX = self._read_csv(
file = file,
usecols = ['VAERS_ID', 'VAX_DOSE_SERIES', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT'],
dtype = {"VAX_DOSE_SERIES": "string"})
dtype =
{
"VAX_DOSE_SERIES": "string",
"VAX_LOT": "string"
})
DataFrameNormalizer.convertVAX_LOTColumnToUpperCase(VAERSVAX)
return VAERSVAX
def _readVAERSSYMPTOMS(self, file):
return self._read_csv(
file = file,
usecols = ['VAERS_ID', 'SYMPTOM1', 'SYMPTOM2', 'SYMPTOM3', 'SYMPTOM4', 'SYMPTOM5'])
def _read_csv(self, file, **kwargs):
return pd.read_csv(

View File

@@ -0,0 +1,52 @@
import unittest
from pandas.testing import assert_frame_equal
from TestHelper import TestHelper
from VaersDescrReader import VaersDescrReader
import pandas as pd
import numpy as np
class VaersDescrReaderTest(unittest.TestCase):
def test_readVaersDescrForYear(self):
# Given
vaersDescrReader = VaersDescrReader(dataDir = "src/testdata")
# When
vaersDescr = vaersDescrReader.readVaersDescrForYear(2023)
# Then
assert_frame_equal(
vaersDescr['VAERSDATA'],
TestHelper.createDataFrame(
columns = ['RECVDATE', 'DIED', 'L_THREAT', 'ER_VISIT', 'HOSPITAL', 'DISABLE', 'SPLTTYPE'],
data = [ [pd.to_datetime('01/01/2023', format = "%m/%d/%Y"), np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]],
index = pd.Index(
name = 'VAERS_ID',
data=[2547730])))
assert_frame_equal(
vaersDescr['VAERSVAX'],
TestHelper.createDataFrame(
columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],
data = [ ['COVID19', 'JANSSEN', '1808982', 'UNK'],
['COVID19', 'PFIZER\BIONTECH', 'EW0175', '1']],
index = pd.Index(
name = 'VAERS_ID',
data=[
2547730,
2547730]),
dtypes = {
'VAX_DOSE_SERIES': 'string',
'VAX_LOT': 'string'}))
assert_frame_equal(
vaersDescr['VAERSSYMPTOMS'],
TestHelper.createDataFrame(
columns = ['SYMPTOM1', 'SYMPTOM2', 'SYMPTOM3', 'SYMPTOM4', 'SYMPTOM5'],
data = [ ['Blood pressure orthostatic abnormal', 'COVID-19', 'Coma', 'Computerised tomogram', 'Exposure to SARS-CoV-2'],
['Head injury', 'Headache', 'Laboratory test', 'Magnetic resonance imaging', 'SARS-CoV-2 antibody test negative'],
['SARS-CoV-2 test positive', 'Unresponsive to stimuli', 'X-ray', np.nan, np.nan]],
index = pd.Index(
name = 'VAERS_ID',
data=[
2547730,
2547730,
2547730])))

View File

@@ -3,6 +3,7 @@ jupyter notebook
FK-FIXME:
FK-TODO:
- Histogramme auch speziell für die einzelnen Länder berechnen und anzeigen.
anacron job:
sudo cp src/intensivstationen_howbadismybatch.sh /etc/cron.daily/intensivstationen_howbadismybatch
@@ -13,3 +14,9 @@ ipython kernel install --user --name=howbadismybatch-venv-kernel
jupyter kernelspec list
conda env export --from-history > environment.yml
conda env create -f environment.yml
www.HowBadIsMyBatch.info
/etc/apache2/sites-available/HowBadIsMyBatch.conf
https://datatables.net/examples/api/row_details.html
https://www.datatables.net/blog/2017-03-31

2
src/testdata/2023VAERSDATA.csv vendored Normal file
View File

@@ -0,0 +1,2 @@
VAERS_ID,RECVDATE,STATE,AGE_YRS,CAGE_YR,CAGE_MO,SEX,RPT_DATE,SYMPTOM_TEXT,DIED,DATEDIED,L_THREAT,ER_VISIT,HOSPITAL,HOSPDAYS,X_STAY,DISABLE,RECOVD,VAX_DATE,ONSET_DATE,NUMDAYS,LAB_DATA,V_ADMINBY,V_FUNDBY,OTHER_MEDS,CUR_ILL,HISTORY,PRIOR_VAX,SPLTTYPE,FORM_VERS,TODAYS_DATE,BIRTH_DEFECT,OFC_VISIT,ER_ED_VISIT,ALLERGIES
2547730,01/01/2023,DE,53.0,,,F,,"The adverse event is that the patient went into a coma state and was non responsive. Patient spent almost a month hospitalized and transferred into a nursing home. Trauma to the head caused severe orthostatic blood pressure problems, high fall risk, ongoing headaches, and caused patient to be exposed to covid, Be advised patient was tested the day before with a PCR 3 day covid test that resulted in zero antibodies.",,,,,,,,,N,04/06/2021,,,"Hospilization 4/17/2021 - Lab work, MRI, Catscan, Xrays, Tested positive for Covid 19",PHM,,no,diabetic,,,,2,12/31/2022,,,,none
1 VAERS_ID RECVDATE STATE AGE_YRS CAGE_YR CAGE_MO SEX RPT_DATE SYMPTOM_TEXT DIED DATEDIED L_THREAT ER_VISIT HOSPITAL HOSPDAYS X_STAY DISABLE RECOVD VAX_DATE ONSET_DATE NUMDAYS LAB_DATA V_ADMINBY V_FUNDBY OTHER_MEDS CUR_ILL HISTORY PRIOR_VAX SPLTTYPE FORM_VERS TODAYS_DATE BIRTH_DEFECT OFC_VISIT ER_ED_VISIT ALLERGIES
2 2547730 01/01/2023 DE 53.0 F The adverse event is that the patient went into a coma state and was non responsive. Patient spent almost a month hospitalized and transferred into a nursing home. Trauma to the head caused severe orthostatic blood pressure problems, high fall risk, ongoing headaches, and caused patient to be exposed to covid, Be advised patient was tested the day before with a PCR 3 day covid test that resulted in zero antibodies. N 04/06/2021 Hospilization 4/17/2021 - Lab work, MRI, Catscan, Xrays, Tested positive for Covid 19 PHM no diabetic 2 12/31/2022 none

4
src/testdata/2023VAERSSYMPTOMS.csv vendored Normal file
View File

@@ -0,0 +1,4 @@
VAERS_ID,SYMPTOM1,SYMPTOMVERSION1,SYMPTOM2,SYMPTOMVERSION2,SYMPTOM3,SYMPTOMVERSION3,SYMPTOM4,SYMPTOMVERSION4,SYMPTOM5,SYMPTOMVERSION5
2547730,Blood pressure orthostatic abnormal,25.10,COVID-19,25.10,Coma,25.10,Computerised tomogram,25.10,Exposure to SARS-CoV-2,25.10
2547730,Head injury,25.10,Headache,25.10,Laboratory test,25.10,Magnetic resonance imaging,25.10,SARS-CoV-2 antibody test negative,25.10
2547730,SARS-CoV-2 test positive,25.10,Unresponsive to stimuli,25.10,X-ray,25.10,,,,
1 VAERS_ID SYMPTOM1 SYMPTOMVERSION1 SYMPTOM2 SYMPTOMVERSION2 SYMPTOM3 SYMPTOMVERSION3 SYMPTOM4 SYMPTOMVERSION4 SYMPTOM5 SYMPTOMVERSION5
2 2547730 Blood pressure orthostatic abnormal 25.10 COVID-19 25.10 Coma 25.10 Computerised tomogram 25.10 Exposure to SARS-CoV-2 25.10
3 2547730 Head injury 25.10 Headache 25.10 Laboratory test 25.10 Magnetic resonance imaging 25.10 SARS-CoV-2 antibody test negative 25.10
4 2547730 SARS-CoV-2 test positive 25.10 Unresponsive to stimuli 25.10 X-ray 25.10

3
src/testdata/2023VAERSVAX.csv vendored Normal file
View File

@@ -0,0 +1,3 @@
VAERS_ID,VAX_TYPE,VAX_MANU,VAX_LOT,VAX_DOSE_SERIES,VAX_ROUTE,VAX_SITE,VAX_NAME
2547730,COVID19,JANSSEN,1808982,UNK,SYR,AR,COVID19 (COVID19 (JANSSEN))
2547730,COVID19,PFIZER\BIONTECH,EW0175,1,IM,,COVID19 (COVID19 (PFIZER-BIONTECH))
1 VAERS_ID VAX_TYPE VAX_MANU VAX_LOT VAX_DOSE_SERIES VAX_ROUTE VAX_SITE VAX_NAME
2 2547730 COVID19 JANSSEN 1808982 UNK SYR AR COVID19 (COVID19 (JANSSEN))
3 2547730 COVID19 PFIZER\BIONTECH EW0175 1 IM COVID19 (COVID19 (PFIZER-BIONTECH))