refining VaersDescrReaderTest

This commit is contained in:
frankknoll
2023-01-18 18:07:01 +01:00
parent bc87b9d34d
commit 28e7eb632a
3 changed files with 30 additions and 9 deletions

View File

@@ -25,7 +25,7 @@ class DataFrameFilterTest(unittest.TestCase):
index = [ index = [
"0916600", "0916600",
"0916601"], "0916601"],
dtypes = {'VAX_DOSE_SERIES': "string"}) dtypes = {"VAX_DOSE_SERIES": "string"})
}, },
{ {
'VAERSDATA': TestHelper.createDataFrame( 'VAERSDATA': TestHelper.createDataFrame(
@@ -42,7 +42,7 @@ class DataFrameFilterTest(unittest.TestCase):
index = [ index = [
"1996873", "1996873",
"1996874"], "1996874"],
dtypes = {'VAX_DOSE_SERIES': "string"}) dtypes = {"VAX_DOSE_SERIES": "string"})
} }
]) ])
dataFrameFilter = DataFrameFilter() dataFrameFilter = DataFrameFilter()
@@ -60,5 +60,5 @@ class DataFrameFilterTest(unittest.TestCase):
"0916600", "0916600",
"0916601", "0916601",
"1996874"], "1996874"],
dtypes = {'VAX_DOSE_SERIES': "string"}) dtypes = {"VAX_DOSE_SERIES": "string"})
assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False) assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)

View File

@@ -11,13 +11,15 @@ class VaersDescrReader:
def readVaersDescrForYear(self, year): def readVaersDescrForYear(self, year):
return { return {
'VAERSDATA': self._readVAERSDATA('{dataDir}/{year}VAERSDATA.csv'.format(dataDir = self.dataDir, year = year)), 'VAERSDATA': self._readVAERSDATA('{dataDir}/{year}VAERSDATA.csv'.format(dataDir = self.dataDir, year = year)),
'VAERSVAX': self._readVAERSVAX('{dataDir}/{year}VAERSVAX.csv'.format(dataDir = self.dataDir, year = year)) 'VAERSVAX': self._readVAERSVAX('{dataDir}/{year}VAERSVAX.csv'.format(dataDir = self.dataDir, year = year)),
'VAERSSYMPTOMS': self._readVAERSSYMPTOMS('{dataDir}/{year}VAERSSYMPTOMS.csv'.format(dataDir = self.dataDir, year = year))
} }
def readNonDomesticVaersDescr(self): def readNonDomesticVaersDescr(self):
return { return {
'VAERSDATA': self._readVAERSDATA(self.dataDir + "/NonDomesticVAERSDATA.csv"), 'VAERSDATA': self._readVAERSDATA(self.dataDir + "/NonDomesticVAERSDATA.csv"),
'VAERSVAX': self._readVAERSVAX(self.dataDir + "/NonDomesticVAERSVAX.csv") 'VAERSVAX': self._readVAERSVAX(self.dataDir + "/NonDomesticVAERSVAX.csv"),
'VAERSSYMPTOMS': self._readVAERSSYMPTOMS(self.dataDir + "/NonDomesticVAERSSYMPTOMS.csv")
} }
def _readVAERSDATA(self, file): def _readVAERSDATA(self, file):
@@ -37,6 +39,11 @@ class VaersDescrReader:
"VAX_LOT": "string" "VAX_LOT": "string"
}) })
def _readVAERSSYMPTOMS(self, file):
return self._read_csv(
file = file,
usecols = ['VAERS_ID', 'SYMPTOM1', 'SYMPTOM2', 'SYMPTOM3', 'SYMPTOM4', 'SYMPTOM5'])
def _read_csv(self, file, **kwargs): def _read_csv(self, file, **kwargs):
return pd.read_csv( return pd.read_csv(
file, file,

View File

@@ -22,8 +22,7 @@ class VaersDescrReaderTest(unittest.TestCase):
data = [ [pd.to_datetime('01/01/2023', format = "%m/%d/%Y"), np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]], data = [ [pd.to_datetime('01/01/2023', format = "%m/%d/%Y"), np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]],
index = pd.Index( index = pd.Index(
name = 'VAERS_ID', name = 'VAERS_ID',
data=[2547730])), data=[2547730])))
check_dtype = False)
assert_frame_equal( assert_frame_equal(
vaersDescr['VAERSVAX'], vaersDescr['VAERSVAX'],
TestHelper.createDataFrame( TestHelper.createDataFrame(
@@ -31,5 +30,20 @@ class VaersDescrReaderTest(unittest.TestCase):
data = [ ['COVID19', 'JANSSEN', '1808982', 'UNK']], data = [ ['COVID19', 'JANSSEN', '1808982', 'UNK']],
index = pd.Index( index = pd.Index(
name = 'VAERS_ID', name = 'VAERS_ID',
data=[2547730])), data=[2547730]),
check_dtype = False) dtypes = {
'VAX_DOSE_SERIES': 'string',
'VAX_LOT': 'string'}))
assert_frame_equal(
vaersDescr['VAERSSYMPTOMS'],
TestHelper.createDataFrame(
columns = ['SYMPTOM1', 'SYMPTOM2', 'SYMPTOM3', 'SYMPTOM4', 'SYMPTOM5'],
data = [ ['Blood pressure orthostatic abnormal', 'COVID-19', 'Coma', 'Computerised tomogram', 'Exposure to SARS-CoV-2'],
['Head injury', 'Headache', 'Laboratory test', 'Magnetic resonance imaging', 'SARS-CoV-2 antibody test negative'],
['SARS-CoV-2 test positive', 'Unresponsive to stimuli', 'X-ray', np.nan, np.nan]],
index = pd.Index(
name = 'VAERS_ID',
data=[
2547730,
2547730,
2547730])))