64 lines
2.5 KiB
Python
64 lines
2.5 KiB
Python
import pandas as pd
|
|
from DataFrameNormalizer import DataFrameNormalizer
|
|
|
|
|
|
class VaersDescrReader:
|
|
|
|
def __init__(self, dataDir):
|
|
self.dataDir = dataDir
|
|
|
|
def readVaersDescrsForYears(self, years):
|
|
return [self.readVaersDescrForYear(year) for year in years]
|
|
|
|
def readVaersDescrForYear(self, year):
|
|
return {
|
|
'VAERSDATA': self._readVAERSDATA('{dataDir}/{year}VAERSDATA.csv'.format(dataDir = self.dataDir, year = year)),
|
|
'VAERSVAX': self._readVAERSVAX('{dataDir}/{year}VAERSVAX.csv'.format(dataDir = self.dataDir, year = year)),
|
|
'VAERSSYMPTOMS': self._readVAERSSYMPTOMS('{dataDir}/{year}VAERSSYMPTOMS.csv'.format(dataDir = self.dataDir, year = year))
|
|
}
|
|
|
|
def readNonDomesticVaersDescr(self):
|
|
return {
|
|
'VAERSDATA': self._readVAERSDATA(self.dataDir + '/NonDomesticVAERSDATA.csv'),
|
|
'VAERSVAX': self._readVAERSVAX(self.dataDir + '/NonDomesticVAERSVAX.csv'),
|
|
'VAERSSYMPTOMS': self._readVAERSSYMPTOMS(self.dataDir + '/NonDomesticVAERSSYMPTOMS.csv')
|
|
}
|
|
|
|
def _readVAERSDATA(self, file):
|
|
VAERSDATA = self._read_csv(
|
|
file = file,
|
|
usecols = ['VAERS_ID', 'RECVDATE', 'DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT', 'SPLTTYPE'],
|
|
parse_dates = ['RECVDATE'],
|
|
date_parser = lambda dateStr: pd.to_datetime(dateStr, format = '%m/%d/%Y'))
|
|
DataFrameNormalizer._convertColumnsOfDataFrame_Y_to_1_else_0(
|
|
VAERSDATA,
|
|
['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'])
|
|
return VAERSDATA
|
|
|
|
def _readVAERSVAX(self, file):
|
|
VAERSVAX = self._read_csv(
|
|
file = file,
|
|
usecols = ['VAERS_ID', 'VAX_DOSE_SERIES', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT'],
|
|
dtype =
|
|
{
|
|
'VAX_DOSE_SERIES': 'string',
|
|
'VAX_LOT': 'string'
|
|
})
|
|
DataFrameNormalizer.removeUnknownBatchCodes(VAERSVAX)
|
|
DataFrameNormalizer.convertVAX_LOTColumnToUpperCase(VAERSVAX)
|
|
return VAERSVAX
|
|
|
|
|
|
def _readVAERSSYMPTOMS(self, file):
|
|
return self._read_csv(
|
|
file = file,
|
|
usecols = ['VAERS_ID', 'SYMPTOM1', 'SYMPTOM2', 'SYMPTOM3', 'SYMPTOM4', 'SYMPTOM5'])
|
|
|
|
def _read_csv(self, file, **kwargs):
|
|
return pd.read_csv(
|
|
file,
|
|
index_col = 'VAERS_ID',
|
|
encoding = 'latin1',
|
|
low_memory = False,
|
|
**kwargs)
|