diff --git a/src/DataFrameFilterTest.py b/src/DataFrameFilterTest.py index b0907a266f8..dfa9e006778 100644 --- a/src/DataFrameFilterTest.py +++ b/src/DataFrameFilterTest.py @@ -25,7 +25,7 @@ class DataFrameFilterTest(unittest.TestCase): index = [ "0916600", "0916601"], - dtypes = {'VAX_DOSE_SERIES': "string"}) + dtypes = {"VAX_DOSE_SERIES": "string"}) }, { 'VAERSDATA': TestHelper.createDataFrame( @@ -42,7 +42,7 @@ class DataFrameFilterTest(unittest.TestCase): index = [ "1996873", "1996874"], - dtypes = {'VAX_DOSE_SERIES': "string"}) + dtypes = {"VAX_DOSE_SERIES": "string"}) } ]) dataFrameFilter = DataFrameFilter() @@ -60,5 +60,5 @@ class DataFrameFilterTest(unittest.TestCase): "0916600", "0916601", "1996874"], - dtypes = {'VAX_DOSE_SERIES': "string"}) + dtypes = {"VAX_DOSE_SERIES": "string"}) assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False) diff --git a/src/VaersDescrReader.py b/src/VaersDescrReader.py index dccec6cb496..fb19dcb1e08 100644 --- a/src/VaersDescrReader.py +++ b/src/VaersDescrReader.py @@ -11,13 +11,15 @@ class VaersDescrReader: def readVaersDescrForYear(self, year): return { 'VAERSDATA': self._readVAERSDATA('{dataDir}/{year}VAERSDATA.csv'.format(dataDir = self.dataDir, year = year)), - 'VAERSVAX': self._readVAERSVAX('{dataDir}/{year}VAERSVAX.csv'.format(dataDir = self.dataDir, year = year)) + 'VAERSVAX': self._readVAERSVAX('{dataDir}/{year}VAERSVAX.csv'.format(dataDir = self.dataDir, year = year)), + 'VAERSSYMPTOMS': self._readVAERSSYMPTOMS('{dataDir}/{year}VAERSSYMPTOMS.csv'.format(dataDir = self.dataDir, year = year)) } def readNonDomesticVaersDescr(self): return { 'VAERSDATA': self._readVAERSDATA(self.dataDir + "/NonDomesticVAERSDATA.csv"), - 'VAERSVAX': self._readVAERSVAX(self.dataDir + "/NonDomesticVAERSVAX.csv") + 'VAERSVAX': self._readVAERSVAX(self.dataDir + "/NonDomesticVAERSVAX.csv"), + 'VAERSSYMPTOMS': self._readVAERSSYMPTOMS(self.dataDir + "/NonDomesticVAERSSYMPTOMS.csv") } def _readVAERSDATA(self, file): @@ -37,6 +39,11 @@ class VaersDescrReader: "VAX_LOT": "string" }) + def _readVAERSSYMPTOMS(self, file): + return self._read_csv( + file = file, + usecols = ['VAERS_ID', 'SYMPTOM1', 'SYMPTOM2', 'SYMPTOM3', 'SYMPTOM4', 'SYMPTOM5']) + def _read_csv(self, file, **kwargs): return pd.read_csv( file, diff --git a/src/VaersDescrReaderTest.py b/src/VaersDescrReaderTest.py index a157a777325..6ab87d9717a 100644 --- a/src/VaersDescrReaderTest.py +++ b/src/VaersDescrReaderTest.py @@ -22,8 +22,7 @@ class VaersDescrReaderTest(unittest.TestCase): data = [ [pd.to_datetime('01/01/2023', format = "%m/%d/%Y"), np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]], index = pd.Index( name = 'VAERS_ID', - data=[2547730])), - check_dtype = False) + data=[2547730]))) assert_frame_equal( vaersDescr['VAERSVAX'], TestHelper.createDataFrame( @@ -31,5 +30,20 @@ class VaersDescrReaderTest(unittest.TestCase): data = [ ['COVID19', 'JANSSEN', '1808982', 'UNK']], index = pd.Index( name = 'VAERS_ID', - data=[2547730])), - check_dtype = False) + data=[2547730]), + dtypes = { + 'VAX_DOSE_SERIES': 'string', + 'VAX_LOT': 'string'})) + assert_frame_equal( + vaersDescr['VAERSSYMPTOMS'], + TestHelper.createDataFrame( + columns = ['SYMPTOM1', 'SYMPTOM2', 'SYMPTOM3', 'SYMPTOM4', 'SYMPTOM5'], + data = [ ['Blood pressure orthostatic abnormal', 'COVID-19', 'Coma', 'Computerised tomogram', 'Exposure to SARS-CoV-2'], + ['Head injury', 'Headache', 'Laboratory test', 'Magnetic resonance imaging', 'SARS-CoV-2 antibody test negative'], + ['SARS-CoV-2 test positive', 'Unresponsive to stimuli', 'X-ray', np.nan, np.nan]], + index = pd.Index( + name = 'VAERS_ID', + data=[ + 2547730, + 2547730, + 2547730])))