77 lines
3.1 KiB
Python
77 lines
3.1 KiB
Python
import pandas as pd
|
|
import numpy as np
|
|
from Utils import fillLsts
|
|
|
|
class SymptomByBatchcodeTableFactory:
|
|
|
|
@staticmethod
|
|
def createSymptomByBatchcodeTable(VAERSVAX, VAERSSYMPTOMS):
|
|
index_columns = SymptomByBatchcodeTableFactory._getIndexColumns(VAERSVAX)
|
|
symptomColumn = 'SYMPTOM'
|
|
return (pd
|
|
.merge(
|
|
SymptomByBatchcodeTableFactory._get_VAERSVAX_WITH_VAX_LOTS(VAERSVAX, index_columns),
|
|
SymptomByBatchcodeTableFactory._getSymptomsTable(VAERSSYMPTOMS, symptomColumn),
|
|
on = 'VAERS_ID')
|
|
.set_index(index_columns)
|
|
[[symptomColumn, 'COUNTRY']])
|
|
|
|
@staticmethod
|
|
def _getIndexColumns(VAERSVAX):
|
|
return [f"VAX_LOT{num}" for num in range(1, SymptomByBatchcodeTableFactory._getMaxNumShots(VAERSVAX) + 1)]
|
|
|
|
@staticmethod
|
|
def _getMaxNumShots(VAERSVAX):
|
|
return min(VAERSVAX.index.value_counts().iloc[0], 10)
|
|
|
|
@staticmethod
|
|
def _get_VAERSVAX_WITH_VAX_LOTS(VAERSVAX, index_columns):
|
|
return (pd
|
|
.concat(
|
|
[VAERSVAX, SymptomByBatchcodeTableFactory._getVaxLotsTable(VAERSVAX, index_columns)],
|
|
axis = 'columns')
|
|
.reset_index()
|
|
.drop_duplicates(subset = ['VAERS_ID'] + index_columns))
|
|
|
|
@staticmethod
|
|
def _getVaxLotsTable(VAERSVAX, index_columns):
|
|
VAX_LOT_LIST_Table = SymptomByBatchcodeTableFactory._getVAX_LOT_LIST_Table(VAERSVAX, index_columns)
|
|
return SymptomByBatchcodeTableFactory._fillLstsInDataframe(VAX_LOT_LIST_Table, index_columns)
|
|
|
|
@staticmethod
|
|
def _getVAX_LOT_LIST_Table(VAERSVAX, index_columns):
|
|
# slow: aggfunc = lambda VAX_LOT_series: list(VAX_LOT_series.sort_values())))
|
|
# fast:
|
|
VAX_LOT_LIST_Table = VAERSVAX.groupby("VAERS_ID").agg(
|
|
VAX_LOT_LIST = pd.NamedAgg(
|
|
column = 'VAX_LOT',
|
|
aggfunc = lambda VAX_LOT_series: list(VAX_LOT_series)[:len(index_columns)]))
|
|
VAX_LOT_LIST_Table['VAX_LOT_LIST'] = VAX_LOT_LIST_Table['VAX_LOT_LIST'].apply(sorted)
|
|
return VAX_LOT_LIST_Table
|
|
|
|
@staticmethod
|
|
def _fillLstsInDataframe(VAX_LOT_LIST_Table, index_columns):
|
|
return pd.DataFrame(
|
|
fillLsts(
|
|
lsts = VAX_LOT_LIST_Table['VAX_LOT_LIST'].tolist(),
|
|
desiredLen = len(index_columns),
|
|
fillValue = str(np.nan)),
|
|
columns = index_columns,
|
|
index = VAX_LOT_LIST_Table.index)
|
|
|
|
|
|
@staticmethod
|
|
def _getSymptomsTable(VAERSSYMPTOMS, symptomColumn):
|
|
return (pd
|
|
.concat(
|
|
[
|
|
VAERSSYMPTOMS['SYMPTOM1'],
|
|
VAERSSYMPTOMS['SYMPTOM2'],
|
|
VAERSSYMPTOMS['SYMPTOM3'],
|
|
VAERSSYMPTOMS['SYMPTOM4'],
|
|
VAERSSYMPTOMS['SYMPTOM5']
|
|
])
|
|
.dropna()
|
|
.to_frame(name = symptomColumn)
|
|
.reset_index())
|