refactoring for performance

This commit is contained in:
frankknoll
2023-02-02 16:29:44 +01:00
parent b59e276d56
commit d4ac529f5e

View File

@@ -34,10 +34,22 @@ class SymptomByBatchcodeTableFactory:
@staticmethod
def _getVaxLotsTable(VAERSVAX, index_columns):
VAX_LOT_LIST_Table = SymptomByBatchcodeTableFactory._getVAX_LOT_LIST_Table(VAERSVAX)
return SymptomByBatchcodeTableFactory._fillLstsInDataframe(VAX_LOT_LIST_Table, index_columns)
@staticmethod
def _getVAX_LOT_LIST_Table(VAERSVAX):
# slow: aggfunc = lambda VAX_LOT_series: list(VAX_LOT_series.sort_values())))
# fast:
VAX_LOT_LIST_Table = VAERSVAX.groupby("VAERS_ID").agg(
VAX_LOT_LIST = pd.NamedAgg(
column = 'VAX_LOT',
aggfunc = lambda VAX_LOT_series: list(VAX_LOT_series.sort_values())))
aggfunc = list))
VAX_LOT_LIST_Table['VAX_LOT_LIST'] = VAX_LOT_LIST_Table['VAX_LOT_LIST'].apply(sorted)
return VAX_LOT_LIST_Table
@staticmethod
def _fillLstsInDataframe(VAX_LOT_LIST_Table, index_columns):
return pd.DataFrame(
fillLsts(
lsts = VAX_LOT_LIST_Table['VAX_LOT_LIST'].tolist(),
@@ -46,6 +58,7 @@ class SymptomByBatchcodeTableFactory:
columns = index_columns,
index = VAX_LOT_LIST_Table.index)
@staticmethod
def _getSymptomsTable(VAERSSYMPTOMS, symptomColumn):
return (pd