refactoring

This commit is contained in:
frankknoll
2023-01-23 08:07:23 +01:00
parent 28e71af7af
commit b6eb929a7b
2 changed files with 18 additions and 18 deletions

View File

@@ -0,0 +1,58 @@
import pandas as pd
import numpy as np
class SymptomByBatchcodeTableFactory:
@staticmethod
def createSymptomByBatchcodeTable(VAERSVAX, VAERSSYMPTOMS):
index_columns = SymptomByBatchcodeTableFactory._getIndexColumns(VAERSVAX)
symptomColumn = 'SYMPTOM'
return pd.merge(
SymptomByBatchcodeTableFactory._get_VAERSVAX_WITH_VAX_LOTS(VAERSVAX, index_columns),
SymptomByBatchcodeTableFactory._getSymptomsTable(VAERSSYMPTOMS, symptomColumn),
on = 'VAERS_ID').set_index(index_columns)[[symptomColumn]]
@staticmethod
def _getIndexColumns(VAERSVAX):
return [f"VAX_LOT{num}" for num in range(1, SymptomByBatchcodeTableFactory._getMaxNumShots(VAERSVAX) + 1)]
@staticmethod
def _getMaxNumShots(VAERSVAX):
return VAERSVAX.index.value_counts().iloc[0]
@staticmethod
def _get_VAERSVAX_WITH_VAX_LOTS(VAERSVAX, index_columns):
return pd.concat(
[VAERSVAX, SymptomByBatchcodeTableFactory._getVaxLotsTable(VAERSVAX, index_columns)],
axis='columns').reset_index().drop_duplicates(subset = ['VAERS_ID'] + index_columns)
@staticmethod
def _getVaxLotsTable(VAERSVAX, index_columns):
VAX_LOT_LIST_Table = VAERSVAX.groupby("VAERS_ID").agg(
VAX_LOT_LIST = pd.NamedAgg(
column = 'VAX_LOT',
aggfunc = lambda VAX_LOT_series: list(VAX_LOT_series.sort_values())))
return pd.DataFrame(
fillLsts(
lsts = VAX_LOT_LIST_Table['VAX_LOT_LIST'].tolist(),
desiredLen = len(index_columns),
fillValue = str(np.nan)),
columns = index_columns,
index = VAX_LOT_LIST_Table.index)
@staticmethod
def _getSymptomsTable(VAERSSYMPTOMS, symptomColumn):
return pd.concat(
[
VAERSSYMPTOMS['SYMPTOM1'],
VAERSSYMPTOMS['SYMPTOM2'],
VAERSSYMPTOMS['SYMPTOM3'],
VAERSSYMPTOMS['SYMPTOM4'],
VAERSSYMPTOMS['SYMPTOM5']
]).dropna().to_frame(name = symptomColumn).reset_index()
def fillLsts(lsts, desiredLen, fillValue):
return [fillLst(lst, desiredLen, fillValue) for lst in lsts]
def fillLst(lst, desiredLen, fillValue):
return lst + [fillValue] * (max(desiredLen - len(lst), 0))