From df83bcce625fd57609817f5d7ce224a72733f2bd Mon Sep 17 00:00:00 2001 From: frankknoll Date: Thu, 19 Jan 2023 22:34:22 +0100 Subject: [PATCH] making SymptomsByBatchcodesTableFactoryTest pass --- src/SymptomsByBatchcodesTableFactory.py | 32 +++++++++++++++++++-- src/SymptomsByBatchcodesTableFactoryTest.py | 18 ++++++++++-- 2 files changed, 45 insertions(+), 5 deletions(-) diff --git a/src/SymptomsByBatchcodesTableFactory.py b/src/SymptomsByBatchcodesTableFactory.py index da0fe432adb..3560d53644f 100644 --- a/src/SymptomsByBatchcodesTableFactory.py +++ b/src/SymptomsByBatchcodesTableFactory.py @@ -4,5 +4,33 @@ import pandas as pd class SymptomsByBatchcodesTableFactory: @staticmethod - def createSymptomsByBatchcodesTable(vaxTable, symptomsTable): - return pd.DataFrame({'A': []}) + def createSymptomsByBatchcodesTable(VAERSVAX, VAERSSYMPTOMS): + return pd.merge( + SymptomsByBatchcodesTableFactory._get_VAERSVAX_WITH_VAX_LOTS(VAERSVAX), + SymptomsByBatchcodesTableFactory._getSymptomsTable(VAERSSYMPTOMS), + on = 'VAERS_ID').set_index(['VAX_LOT1', 'VAX_LOT2'])[['SYMPTOMS']] + + @staticmethod + def _get_VAERSVAX_WITH_VAX_LOTS(VAERSVAX): + return pd.concat( + [VAERSVAX, SymptomsByBatchcodesTableFactory._getVaxLotsTable(VAERSVAX)], + axis=1).drop_duplicates(subset=['VAX_LOT1', 'VAX_LOT2']).reset_index() + + @staticmethod + def _getVaxLotsTable(VAERSVAX): + VAX_LOT_LIST_Table = VAERSVAX.groupby("VAERS_ID").agg(VAX_LOT_LIST = pd.NamedAgg(column = 'VAX_LOT', aggfunc = list)) + return pd.DataFrame( + VAX_LOT_LIST_Table['VAX_LOT_LIST'].tolist(), + columns = ['VAX_LOT1', 'VAX_LOT2'], + index = VAX_LOT_LIST_Table.index) + + @staticmethod + def _getSymptomsTable(VAERSSYMPTOMS): + return pd.concat( + [ + VAERSSYMPTOMS['SYMPTOM1'], + VAERSSYMPTOMS['SYMPTOM2'], + VAERSSYMPTOMS['SYMPTOM3'], + VAERSSYMPTOMS['SYMPTOM4'], + VAERSSYMPTOMS['SYMPTOM5'] + ]).dropna().drop_duplicates().to_frame(name = "SYMPTOMS").reset_index() diff --git a/src/SymptomsByBatchcodesTableFactoryTest.py b/src/SymptomsByBatchcodesTableFactoryTest.py index 69f1126cd8b..4ea1b66bd24 100644 --- a/src/SymptomsByBatchcodesTableFactoryTest.py +++ b/src/SymptomsByBatchcodesTableFactoryTest.py @@ -40,8 +40,20 @@ class SymptomsByBatchcodesTableFactoryTest(unittest.TestCase): assert_frame_equal( symptomsByBatchcodesTable, TestHelper.createDataFrame( - columns = ['Blood pressure orthostatic abnormal', 'COVID-19', 'Coma', 'Computerised tomogram', 'Exposure to SARS-CoV-2', 'Head injury', 'Headache', 'Laboratory test', 'Magnetic resonance imaging', 'SARS-CoV-2 antibody test negative', 'SARS-CoV-2 test positive', 'Unresponsive to stimuli', 'X-ray'], - data = [ [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], + columns = ['SYMPTOMS'], + data = [ ['Blood pressure orthostatic abnormal'], + ['Head injury'], + ['SARS-CoV-2 test positive'], + ['COVID-19'], + ['Headache'], + ['Unresponsive to stimuli'], + ['Coma'], + ['Laboratory test'], + ['X-ray'], + ['Computerised tomogram'], + ['Magnetic resonance imaging'], + ['Exposure to SARS-CoV-2'], + ['SARS-CoV-2 antibody test negative']], index = pd.MultiIndex.from_tuples( names = ['VAX_LOT1', 'VAX_LOT2'], - tuples = [['1808982', 'EW0175']]))) + tuples = [['1808982', 'EW0175']] * 13)))