diff --git a/src/SymptomsByBatchcodesTableFactory.py b/src/SymptomsByBatchcodesTableFactory.py index d61b8b1b1fe..657a80942bd 100644 --- a/src/SymptomsByBatchcodesTableFactory.py +++ b/src/SymptomsByBatchcodesTableFactory.py @@ -5,23 +5,24 @@ class SymptomsByBatchcodesTableFactory: @staticmethod def createSymptomsByBatchcodesTable(VAERSVAX, VAERSSYMPTOMS): + index_columns = ['VAX_LOT1', 'VAX_LOT2'] return pd.merge( - SymptomsByBatchcodesTableFactory._get_VAERSVAX_WITH_VAX_LOTS(VAERSVAX), + SymptomsByBatchcodesTableFactory._get_VAERSVAX_WITH_VAX_LOTS(VAERSVAX, index_columns), SymptomsByBatchcodesTableFactory._getSymptomsTable(VAERSSYMPTOMS), - on = 'VAERS_ID').set_index(['VAX_LOT1', 'VAX_LOT2'])[['SYMPTOMS']] + on = 'VAERS_ID').set_index(index_columns)[['SYMPTOMS']] @staticmethod - def _get_VAERSVAX_WITH_VAX_LOTS(VAERSVAX): + def _get_VAERSVAX_WITH_VAX_LOTS(VAERSVAX, index_columns): return pd.concat( - [VAERSVAX, SymptomsByBatchcodesTableFactory._getVaxLotsTable(VAERSVAX)], - axis='columns').reset_index().drop_duplicates(subset=['VAERS_ID', 'VAX_LOT1', 'VAX_LOT2']) + [VAERSVAX, SymptomsByBatchcodesTableFactory._getVaxLotsTable(VAERSVAX, index_columns)], + axis='columns').reset_index().drop_duplicates(subset = ['VAERS_ID'] + index_columns) @staticmethod - def _getVaxLotsTable(VAERSVAX): + def _getVaxLotsTable(VAERSVAX, index_columns): VAX_LOT_LIST_Table = VAERSVAX.groupby("VAERS_ID").agg(VAX_LOT_LIST = pd.NamedAgg(column = 'VAX_LOT', aggfunc = list)) return pd.DataFrame( [fill(VAX_LOTS, 2, str(np.nan)) for VAX_LOTS in VAX_LOT_LIST_Table['VAX_LOT_LIST'].tolist()], - columns = ['VAX_LOT1', 'VAX_LOT2'], + columns = index_columns, index = VAX_LOT_LIST_Table.index) @staticmethod diff --git a/src/SymptomsByBatchcodesTableFactoryTest.py b/src/SymptomsByBatchcodesTableFactoryTest.py index a328f60c45a..c18528e4766 100644 --- a/src/SymptomsByBatchcodesTableFactoryTest.py +++ b/src/SymptomsByBatchcodesTableFactoryTest.py @@ -61,9 +61,9 @@ class SymptomsByBatchcodesTableFactoryTest(unittest.TestCase): def test_createSymptomsByBatchcodesTable_two_patients_same_symptoms(self): # Given VAERSVAX = TestHelper.createDataFrame( - columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'], - data = [ ['COVID19', 'JANSSEN', 'EW0175', '1'], - ['COVID19', 'JANSSEN', 'EW0175', '1']], + columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'], + data = [ ['COVID19', 'JANSSEN', 'EW0175', '1'], + ['COVID19', 'JANSSEN', 'EW0175', '1']], index = pd.Index( name = 'VAERS_ID', data=[