Files
HowBadIsMyBatch/src/SymptomByBatchcodeTableFactory.py
2023-04-22 02:32:59 +02:00

77 lines
3.1 KiB
Python

import pandas as pd
import numpy as np
from Utils import fillLsts
class SymptomByBatchcodeTableFactory:
@staticmethod
def createSymptomByBatchcodeTable(VAERSVAX, VAERSSYMPTOMS):
index_columns = SymptomByBatchcodeTableFactory._getIndexColumns(VAERSVAX)
symptomColumn = 'SYMPTOM'
return (pd
.merge(
SymptomByBatchcodeTableFactory._get_VAERSVAX_WITH_VAX_LOTS(VAERSVAX, index_columns),
SymptomByBatchcodeTableFactory._getSymptomsTable(VAERSSYMPTOMS, symptomColumn),
on = 'VAERS_ID')
.set_index(index_columns)
[[symptomColumn, 'COUNTRY']])
@staticmethod
def _getIndexColumns(VAERSVAX):
return [f"VAX_LOT{num}" for num in range(1, SymptomByBatchcodeTableFactory._getMaxNumShots(VAERSVAX) + 1)]
@staticmethod
def _getMaxNumShots(VAERSVAX):
return min(VAERSVAX.index.value_counts().iloc[0], 10)
@staticmethod
def _get_VAERSVAX_WITH_VAX_LOTS(VAERSVAX, index_columns):
return (pd
.concat(
[VAERSVAX, SymptomByBatchcodeTableFactory._getVaxLotsTable(VAERSVAX, index_columns)],
axis = 'columns')
.reset_index()
.drop_duplicates(subset = ['VAERS_ID'] + index_columns))
@staticmethod
def _getVaxLotsTable(VAERSVAX, index_columns):
VAX_LOT_LIST_Table = SymptomByBatchcodeTableFactory._getVAX_LOT_LIST_Table(VAERSVAX, index_columns)
return SymptomByBatchcodeTableFactory._fillLstsInDataframe(VAX_LOT_LIST_Table, index_columns)
@staticmethod
def _getVAX_LOT_LIST_Table(VAERSVAX, index_columns):
# slow: aggfunc = lambda VAX_LOT_series: list(VAX_LOT_series.sort_values())))
# fast:
VAX_LOT_LIST_Table = VAERSVAX.groupby("VAERS_ID").agg(
VAX_LOT_LIST = pd.NamedAgg(
column = 'VAX_LOT',
aggfunc = lambda VAX_LOT_series: list(VAX_LOT_series)[:len(index_columns)]))
VAX_LOT_LIST_Table['VAX_LOT_LIST'] = VAX_LOT_LIST_Table['VAX_LOT_LIST'].apply(sorted)
return VAX_LOT_LIST_Table
@staticmethod
def _fillLstsInDataframe(VAX_LOT_LIST_Table, index_columns):
return pd.DataFrame(
fillLsts(
lsts = VAX_LOT_LIST_Table['VAX_LOT_LIST'].tolist(),
desiredLen = len(index_columns),
fillValue = str(np.nan)),
columns = index_columns,
index = VAX_LOT_LIST_Table.index)
@staticmethod
def _getSymptomsTable(VAERSSYMPTOMS, symptomColumn):
return (pd
.concat(
[
VAERSSYMPTOMS['SYMPTOM1'],
VAERSSYMPTOMS['SYMPTOM2'],
VAERSSYMPTOMS['SYMPTOM3'],
VAERSSYMPTOMS['SYMPTOM4'],
VAERSSYMPTOMS['SYMPTOM5']
])
.dropna()
.to_frame(name = symptomColumn)
.reset_index())