refining VaersDescrReaderTest

This commit is contained in:
frankknoll
2023-01-18 18:07:01 +01:00
parent bc87b9d34d
commit 28e7eb632a
3 changed files with 30 additions and 9 deletions

View File

@@ -25,7 +25,7 @@ class DataFrameFilterTest(unittest.TestCase):
index = [
"0916600",
"0916601"],
dtypes = {'VAX_DOSE_SERIES': "string"})
dtypes = {"VAX_DOSE_SERIES": "string"})
},
{
'VAERSDATA': TestHelper.createDataFrame(
@@ -42,7 +42,7 @@ class DataFrameFilterTest(unittest.TestCase):
index = [
"1996873",
"1996874"],
dtypes = {'VAX_DOSE_SERIES': "string"})
dtypes = {"VAX_DOSE_SERIES": "string"})
}
])
dataFrameFilter = DataFrameFilter()
@@ -60,5 +60,5 @@ class DataFrameFilterTest(unittest.TestCase):
"0916600",
"0916601",
"1996874"],
dtypes = {'VAX_DOSE_SERIES': "string"})
dtypes = {"VAX_DOSE_SERIES": "string"})
assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)

View File

@@ -11,13 +11,15 @@ class VaersDescrReader:
def readVaersDescrForYear(self, year):
return {
'VAERSDATA': self._readVAERSDATA('{dataDir}/{year}VAERSDATA.csv'.format(dataDir = self.dataDir, year = year)),
'VAERSVAX': self._readVAERSVAX('{dataDir}/{year}VAERSVAX.csv'.format(dataDir = self.dataDir, year = year))
'VAERSVAX': self._readVAERSVAX('{dataDir}/{year}VAERSVAX.csv'.format(dataDir = self.dataDir, year = year)),
'VAERSSYMPTOMS': self._readVAERSSYMPTOMS('{dataDir}/{year}VAERSSYMPTOMS.csv'.format(dataDir = self.dataDir, year = year))
}
def readNonDomesticVaersDescr(self):
return {
'VAERSDATA': self._readVAERSDATA(self.dataDir + "/NonDomesticVAERSDATA.csv"),
'VAERSVAX': self._readVAERSVAX(self.dataDir + "/NonDomesticVAERSVAX.csv")
'VAERSVAX': self._readVAERSVAX(self.dataDir + "/NonDomesticVAERSVAX.csv"),
'VAERSSYMPTOMS': self._readVAERSSYMPTOMS(self.dataDir + "/NonDomesticVAERSSYMPTOMS.csv")
}
def _readVAERSDATA(self, file):
@@ -37,6 +39,11 @@ class VaersDescrReader:
"VAX_LOT": "string"
})
def _readVAERSSYMPTOMS(self, file):
return self._read_csv(
file = file,
usecols = ['VAERS_ID', 'SYMPTOM1', 'SYMPTOM2', 'SYMPTOM3', 'SYMPTOM4', 'SYMPTOM5'])
def _read_csv(self, file, **kwargs):
return pd.read_csv(
file,

View File

@@ -22,8 +22,7 @@ class VaersDescrReaderTest(unittest.TestCase):
data = [ [pd.to_datetime('01/01/2023', format = "%m/%d/%Y"), np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]],
index = pd.Index(
name = 'VAERS_ID',
data=[2547730])),
check_dtype = False)
data=[2547730])))
assert_frame_equal(
vaersDescr['VAERSVAX'],
TestHelper.createDataFrame(
@@ -31,5 +30,20 @@ class VaersDescrReaderTest(unittest.TestCase):
data = [ ['COVID19', 'JANSSEN', '1808982', 'UNK']],
index = pd.Index(
name = 'VAERS_ID',
data=[2547730])),
check_dtype = False)
data=[2547730]),
dtypes = {
'VAX_DOSE_SERIES': 'string',
'VAX_LOT': 'string'}))
assert_frame_equal(
vaersDescr['VAERSSYMPTOMS'],
TestHelper.createDataFrame(
columns = ['SYMPTOM1', 'SYMPTOM2', 'SYMPTOM3', 'SYMPTOM4', 'SYMPTOM5'],
data = [ ['Blood pressure orthostatic abnormal', 'COVID-19', 'Coma', 'Computerised tomogram', 'Exposure to SARS-CoV-2'],
['Head injury', 'Headache', 'Laboratory test', 'Magnetic resonance imaging', 'SARS-CoV-2 antibody test negative'],
['SARS-CoV-2 test positive', 'Unresponsive to stimuli', 'X-ray', np.nan, np.nan]],
index = pd.Index(
name = 'VAERS_ID',
data=[
2547730,
2547730,
2547730])))