diff --git a/src/DataFrameNormalizer.py b/src/DataFrameNormalizer.py index 699d8ba7c82..2dd3d056a5a 100644 --- a/src/DataFrameNormalizer.py +++ b/src/DataFrameNormalizer.py @@ -2,14 +2,6 @@ import numpy as np class DataFrameNormalizer: - @staticmethod - def normalize(dataFrame): - DataFrameNormalizer.removeUnknownBatchCodes(dataFrame) - DataFrameNormalizer.convertVAX_LOTColumnToUpperCase(dataFrame) - DataFrameNormalizer._convertColumnsOfDataFrame_Y_to_1_else_0( - dataFrame, - ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT']) - @staticmethod def convertVAX_LOTColumnToUpperCase(dataFrame): dataFrame['VAX_LOT'] = dataFrame['VAX_LOT'].str.upper() diff --git a/src/VaersDescrReader.py b/src/VaersDescrReader.py index 99e541f1743..08002999477 100644 --- a/src/VaersDescrReader.py +++ b/src/VaersDescrReader.py @@ -25,11 +25,15 @@ class VaersDescrReader: } def _readVAERSDATA(self, file): - return self._read_csv( + VAERSDATA = self._read_csv( file = file, usecols = ['VAERS_ID', 'RECVDATE', 'DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT', 'SPLTTYPE'], parse_dates = ['RECVDATE'], date_parser = lambda dateStr: pd.to_datetime(dateStr, format = "%m/%d/%Y")) + DataFrameNormalizer._convertColumnsOfDataFrame_Y_to_1_else_0( + VAERSDATA, + ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT']) + return VAERSDATA def _readVAERSVAX(self, file): VAERSVAX = self._read_csv( @@ -40,6 +44,7 @@ class VaersDescrReader: "VAX_DOSE_SERIES": "string", "VAX_LOT": "string" }) + DataFrameNormalizer.removeUnknownBatchCodes(VAERSVAX) DataFrameNormalizer.convertVAX_LOTColumnToUpperCase(VAERSVAX) return VAERSVAX diff --git a/src/VaersDescrReaderTest.py b/src/VaersDescrReaderTest.py index 98c535f6651..dbba3397024 100644 --- a/src/VaersDescrReaderTest.py +++ b/src/VaersDescrReaderTest.py @@ -19,7 +19,7 @@ class VaersDescrReaderTest(unittest.TestCase): vaersDescr['VAERSDATA'], TestHelper.createDataFrame( columns = ['RECVDATE', 'DIED', 'L_THREAT', 'ER_VISIT', 'HOSPITAL', 'DISABLE', 'SPLTTYPE'], - data = [ [pd.to_datetime('01/01/2023', format = "%m/%d/%Y"), np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]], + data = [ [pd.to_datetime('01/01/2023', format = "%m/%d/%Y"), 0, 0, 0, 0, 0, np.nan]], index = pd.Index( name = 'VAERS_ID', data = [2547730]))) diff --git a/src/VaersReader.py b/src/VaersReader.py index 94e71bcd4ed..a8537f5a82f 100644 --- a/src/VaersReader.py +++ b/src/VaersReader.py @@ -1,7 +1,6 @@ from CountryColumnAdder import CountryColumnAdder from VaersDescrReader import VaersDescrReader from VaersDescr2DataFrameConverter import VaersDescr2DataFrameConverter -from DataFrameNormalizer import DataFrameNormalizer from SevereColumnAdder import SevereColumnAdder def getVaersForYears(years): @@ -24,6 +23,5 @@ def _getVaersDescrReader(): def _getVaers(vaersDescrs, addCountryColumn): dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs) dataFrame = addCountryColumn(dataFrame) - DataFrameNormalizer.normalize(dataFrame) dataFrame = SevereColumnAdder.addSevereColumn(dataFrame) return dataFrame \ No newline at end of file diff --git a/src/testdata/2023VAERSVAX.csv b/src/testdata/2023VAERSVAX.csv index f69bfff3a1f..fc3b36e29ee 100644 --- a/src/testdata/2023VAERSVAX.csv +++ b/src/testdata/2023VAERSVAX.csv @@ -1,3 +1,4 @@ VAERS_ID,VAX_TYPE,VAX_MANU,VAX_LOT,VAX_DOSE_SERIES,VAX_ROUTE,VAX_SITE,VAX_NAME 2547730,COVID19,JANSSEN,1808982,UNK,SYR,AR,COVID19 (COVID19 (JANSSEN)) -2547730,COVID19,PFIZER\BIONTECH,EW0175,1,IM,,COVID19 (COVID19 (PFIZER-BIONTECH)) +2547730,COVID19,PFIZER\BIONTECH,ew0175,1,IM,,COVID19 (COVID19 (PFIZER-BIONTECH)) +2547731,COVID19,PFIZER\BIONTECH,UNKNOWN TO ME,1,IM,,COVID19 (COVID19 (PFIZER-BIONTECH))