normalizing data while reading VAERS data
This commit is contained in:
@@ -2,14 +2,6 @@ import numpy as np
|
|||||||
|
|
||||||
class DataFrameNormalizer:
|
class DataFrameNormalizer:
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def normalize(dataFrame):
|
|
||||||
DataFrameNormalizer.removeUnknownBatchCodes(dataFrame)
|
|
||||||
DataFrameNormalizer.convertVAX_LOTColumnToUpperCase(dataFrame)
|
|
||||||
DataFrameNormalizer._convertColumnsOfDataFrame_Y_to_1_else_0(
|
|
||||||
dataFrame,
|
|
||||||
['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'])
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def convertVAX_LOTColumnToUpperCase(dataFrame):
|
def convertVAX_LOTColumnToUpperCase(dataFrame):
|
||||||
dataFrame['VAX_LOT'] = dataFrame['VAX_LOT'].str.upper()
|
dataFrame['VAX_LOT'] = dataFrame['VAX_LOT'].str.upper()
|
||||||
|
|||||||
@@ -25,11 +25,15 @@ class VaersDescrReader:
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _readVAERSDATA(self, file):
|
def _readVAERSDATA(self, file):
|
||||||
return self._read_csv(
|
VAERSDATA = self._read_csv(
|
||||||
file = file,
|
file = file,
|
||||||
usecols = ['VAERS_ID', 'RECVDATE', 'DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT', 'SPLTTYPE'],
|
usecols = ['VAERS_ID', 'RECVDATE', 'DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT', 'SPLTTYPE'],
|
||||||
parse_dates = ['RECVDATE'],
|
parse_dates = ['RECVDATE'],
|
||||||
date_parser = lambda dateStr: pd.to_datetime(dateStr, format = "%m/%d/%Y"))
|
date_parser = lambda dateStr: pd.to_datetime(dateStr, format = "%m/%d/%Y"))
|
||||||
|
DataFrameNormalizer._convertColumnsOfDataFrame_Y_to_1_else_0(
|
||||||
|
VAERSDATA,
|
||||||
|
['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'])
|
||||||
|
return VAERSDATA
|
||||||
|
|
||||||
def _readVAERSVAX(self, file):
|
def _readVAERSVAX(self, file):
|
||||||
VAERSVAX = self._read_csv(
|
VAERSVAX = self._read_csv(
|
||||||
@@ -40,6 +44,7 @@ class VaersDescrReader:
|
|||||||
"VAX_DOSE_SERIES": "string",
|
"VAX_DOSE_SERIES": "string",
|
||||||
"VAX_LOT": "string"
|
"VAX_LOT": "string"
|
||||||
})
|
})
|
||||||
|
DataFrameNormalizer.removeUnknownBatchCodes(VAERSVAX)
|
||||||
DataFrameNormalizer.convertVAX_LOTColumnToUpperCase(VAERSVAX)
|
DataFrameNormalizer.convertVAX_LOTColumnToUpperCase(VAERSVAX)
|
||||||
return VAERSVAX
|
return VAERSVAX
|
||||||
|
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ class VaersDescrReaderTest(unittest.TestCase):
|
|||||||
vaersDescr['VAERSDATA'],
|
vaersDescr['VAERSDATA'],
|
||||||
TestHelper.createDataFrame(
|
TestHelper.createDataFrame(
|
||||||
columns = ['RECVDATE', 'DIED', 'L_THREAT', 'ER_VISIT', 'HOSPITAL', 'DISABLE', 'SPLTTYPE'],
|
columns = ['RECVDATE', 'DIED', 'L_THREAT', 'ER_VISIT', 'HOSPITAL', 'DISABLE', 'SPLTTYPE'],
|
||||||
data = [ [pd.to_datetime('01/01/2023', format = "%m/%d/%Y"), np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]],
|
data = [ [pd.to_datetime('01/01/2023', format = "%m/%d/%Y"), 0, 0, 0, 0, 0, np.nan]],
|
||||||
index = pd.Index(
|
index = pd.Index(
|
||||||
name = 'VAERS_ID',
|
name = 'VAERS_ID',
|
||||||
data = [2547730])))
|
data = [2547730])))
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
from CountryColumnAdder import CountryColumnAdder
|
from CountryColumnAdder import CountryColumnAdder
|
||||||
from VaersDescrReader import VaersDescrReader
|
from VaersDescrReader import VaersDescrReader
|
||||||
from VaersDescr2DataFrameConverter import VaersDescr2DataFrameConverter
|
from VaersDescr2DataFrameConverter import VaersDescr2DataFrameConverter
|
||||||
from DataFrameNormalizer import DataFrameNormalizer
|
|
||||||
from SevereColumnAdder import SevereColumnAdder
|
from SevereColumnAdder import SevereColumnAdder
|
||||||
|
|
||||||
def getVaersForYears(years):
|
def getVaersForYears(years):
|
||||||
@@ -24,6 +23,5 @@ def _getVaersDescrReader():
|
|||||||
def _getVaers(vaersDescrs, addCountryColumn):
|
def _getVaers(vaersDescrs, addCountryColumn):
|
||||||
dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)
|
dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)
|
||||||
dataFrame = addCountryColumn(dataFrame)
|
dataFrame = addCountryColumn(dataFrame)
|
||||||
DataFrameNormalizer.normalize(dataFrame)
|
|
||||||
dataFrame = SevereColumnAdder.addSevereColumn(dataFrame)
|
dataFrame = SevereColumnAdder.addSevereColumn(dataFrame)
|
||||||
return dataFrame
|
return dataFrame
|
||||||
3
src/testdata/2023VAERSVAX.csv
vendored
3
src/testdata/2023VAERSVAX.csv
vendored
@@ -1,3 +1,4 @@
|
|||||||
VAERS_ID,VAX_TYPE,VAX_MANU,VAX_LOT,VAX_DOSE_SERIES,VAX_ROUTE,VAX_SITE,VAX_NAME
|
VAERS_ID,VAX_TYPE,VAX_MANU,VAX_LOT,VAX_DOSE_SERIES,VAX_ROUTE,VAX_SITE,VAX_NAME
|
||||||
2547730,COVID19,JANSSEN,1808982,UNK,SYR,AR,COVID19 (COVID19 (JANSSEN))
|
2547730,COVID19,JANSSEN,1808982,UNK,SYR,AR,COVID19 (COVID19 (JANSSEN))
|
||||||
2547730,COVID19,PFIZER\BIONTECH,EW0175,1,IM,,COVID19 (COVID19 (PFIZER-BIONTECH))
|
2547730,COVID19,PFIZER\BIONTECH,ew0175,1,IM,,COVID19 (COVID19 (PFIZER-BIONTECH))
|
||||||
|
2547731,COVID19,PFIZER\BIONTECH,UNKNOWN TO ME,1,IM,,COVID19 (COVID19 (PFIZER-BIONTECH))
|
||||||
|
|||||||
|
Reference in New Issue
Block a user