From d4ac529f5e142c7c4e466bc06a6451672a43eaa7 Mon Sep 17 00:00:00 2001 From: frankknoll Date: Thu, 2 Feb 2023 16:29:44 +0100 Subject: [PATCH] refactoring for performance --- src/SymptomByBatchcodeTableFactory.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/SymptomByBatchcodeTableFactory.py b/src/SymptomByBatchcodeTableFactory.py index ba81712e3c0..9670b5369b1 100644 --- a/src/SymptomByBatchcodeTableFactory.py +++ b/src/SymptomByBatchcodeTableFactory.py @@ -34,10 +34,22 @@ class SymptomByBatchcodeTableFactory: @staticmethod def _getVaxLotsTable(VAERSVAX, index_columns): + VAX_LOT_LIST_Table = SymptomByBatchcodeTableFactory._getVAX_LOT_LIST_Table(VAERSVAX) + return SymptomByBatchcodeTableFactory._fillLstsInDataframe(VAX_LOT_LIST_Table, index_columns) + + @staticmethod + def _getVAX_LOT_LIST_Table(VAERSVAX): + # slow: aggfunc = lambda VAX_LOT_series: list(VAX_LOT_series.sort_values()))) + # fast: VAX_LOT_LIST_Table = VAERSVAX.groupby("VAERS_ID").agg( VAX_LOT_LIST = pd.NamedAgg( column = 'VAX_LOT', - aggfunc = lambda VAX_LOT_series: list(VAX_LOT_series.sort_values()))) + aggfunc = list)) + VAX_LOT_LIST_Table['VAX_LOT_LIST'] = VAX_LOT_LIST_Table['VAX_LOT_LIST'].apply(sorted) + return VAX_LOT_LIST_Table + + @staticmethod + def _fillLstsInDataframe(VAX_LOT_LIST_Table, index_columns): return pd.DataFrame( fillLsts( lsts = VAX_LOT_LIST_Table['VAX_LOT_LIST'].tolist(), @@ -46,6 +58,7 @@ class SymptomByBatchcodeTableFactory: columns = index_columns, index = VAX_LOT_LIST_Table.index) + @staticmethod def _getSymptomsTable(VAERSSYMPTOMS, symptomColumn): return (pd