In [None]:
import numpy as np
import pandas as pd

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', None)

In [None]:
import pandas as pd

class VaersDescrReader:
    
    def __init__(self, dataDir):
        self.dataDir = dataDir        

    def readAllVaersDescrs(self):
        return self.readVaersDescrs(["2021", "2022"])
        
    def readVaersDescrs(self, years):
        return [self.readVaersDescr(year) for year in years]

    def readVaersDescr(self, year):
        folder = self.dataDir + "/" + year + "VAERSData/"
        return {
            'VAERSDATA':
                self._read_csv(
                    folder + year + "VAERSDATA.csv",
                    ['VAERS_ID', 'DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT']),
            'VAERSVAX':
                    self._read_csv(
                        folder + year + "VAERSVAX.csv",
                        ['VAERS_ID', 'VAX_DOSE_SERIES', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT'],
                        dtype = {"VAX_DOSE_SERIES": "string"})
            }

    def _read_csv(self, file, usecols, dtype = {}):
        return pd.read_csv(
            file,
            index_col = 'VAERS_ID',
            encoding = 'latin1',
            low_memory = False,
            usecols = usecols,
            dtype = dtype)


In [None]:
import pandas as pd

class VaersDescr2DataFrameConverter:

    @staticmethod
    def createDataFrameFromDescr(vaersDescr):
        return pd.merge(
                vaersDescr['VAERSDATA'],
                vaersDescr['VAERSVAX'],
                how = 'left',
                left_index = True,
                right_index = True,
                validate = 'one_to_many')

    @staticmethod
    def createDataFrameFromDescrs(vaersDescrs):
        dataFrames = [VaersDescr2DataFrameConverter.createDataFrameFromDescr(vaersDescr) for vaersDescr in vaersDescrs]
        return pd.concat(dataFrames)


In [None]:
def filterDataFrame(df, manufacturer = None, dose = None):
    isCovid19 = df["VAX_TYPE"] == "COVID19"
    isManufacturer = df["VAX_MANU"] == manufacturer if manufacturer is not None else True
    isDose = df["VAX_DOSE_SERIES"].str.contains(dose) if dose is not None else True
    return df[isCovid19 & isManufacturer & isDose]

def filterDataFrameForSevereEffects(df, dose):
    return filterDataFrame(df, dose = dose)


In [None]:
def createBatchCodeTable(df : pd.DataFrame):
    def filterDataFrame(df, col):
        return df[df[col] == 'Y'][['VAX_LOT']]

    batchCodeTableDict = {
        'ADRs': df[['VAX_LOT']].value_counts(),
        'DEATHS': filterDataFrame(df, 'DIED').value_counts(),
        'DISABILITIES': filterDataFrame(df, 'DISABLE').value_counts(),
        'LIFE THREATENING ILLNESSES': filterDataFrame(df, 'L_THREAT').value_counts()
    }
    return pd.concat(batchCodeTableDict, axis = 'columns').replace(to_replace = np.nan, value = 0)

def createManufacturerByBatchCodeTable(df):
    manufacturerByBatchCodeTable = df[['VAX_LOT', 'VAX_MANU']]
    manufacturerByBatchCodeTable = manufacturerByBatchCodeTable.drop_duplicates(subset = ['VAX_LOT'])
    return manufacturerByBatchCodeTable.set_index('VAX_LOT')

def createCompanyByBatchCodeTable(df):
    return createManufacturerByBatchCodeTable(df).rename(columns = {"VAX_MANU": "COMPANY"})

# create table from https://www.howbadismybatch.com/combined.html
# FK-TODO: DRY with createBatchCodeTable()
def createSevereEffectsBatchCodeTable(df):
    def filterDataFrame(df, col):
        return df[df[col] == 'Y']['VAX_LOT']

    batchCodeTableDict = {
        'ADRs': df['VAX_LOT'].value_counts(),
        'DEATHS': filterDataFrame(df, 'DIED').value_counts(),
        'DISABILITIES': filterDataFrame(df, 'DISABLE').value_counts(),
        'LIFE THREATENING ILLNESSES': filterDataFrame(df, 'L_THREAT').value_counts(),
        'HOSPITALISATIONS': filterDataFrame(df, 'HOSPITAL').value_counts(),
        'EMERGENCY ROOM OR DOCTOR VISITS': filterDataFrame(df, 'ER_VISIT').value_counts()
    }
    batchCodeTable = pd.concat(batchCodeTableDict, axis = 'columns')
    batchCodeTable.index.name = 'VAX_LOT'
    # add Company column:
    batchCodeTable = pd.merge(
            batchCodeTable,
            createCompanyByBatchCodeTable(df),
            how = 'left',
            left_index = True,
            right_index = True,
            validate = 'one_to_one')
    return batchCodeTable.replace(to_replace = np.nan, value = 0)


In [None]:
import unittest

In [None]:
from pandas.testing import assert_frame_equal

class CreateAndFilterDataFrameTest(unittest.TestCase):

    def test_createAndFilterDataFrameFromDescrs(self):
        # Given
        vaersDescrs = [
               {
                   'VAERSDATA': self.createDataFrame(
                       columns = ['DIED', 'L_THREAT', 'DISABLE'],
                       data = [  ['Y',    np.NaN,     np.NaN],
                                 [np.NaN, np.NaN,     'Y']],
                       index = [
                           "0916600",
                           "0916601"]),
                   'VAERSVAX': self.createDataFrame(
                       columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],
                       data = [  ['COVID19',  'MODERNA',  '037K20A', '1'],
                                 ['COVID19',  'MODERNA',  '025L20A', '1']],
                       index = [
                           "0916600",
                           "0916601"],
                       dtypes = {'VAX_DOSE_SERIES': "string"})
               },
               {
                    'VAERSDATA': self.createDataFrame(
                       columns = ['DIED', 'L_THREAT', 'DISABLE'],
                       data = [  [np.NaN, np.NaN,     np.NaN],
                                 [np.NaN, np.NaN,     'Y']],
                        index = [
                           "1996873",
                           "1996874"]),
                    'VAERSVAX': self.createDataFrame(
                        columns = ['VAX_TYPE', 'VAX_MANU',         'VAX_LOT', 'VAX_DOSE_SERIES'],
                        data = [  ['HPV9',     'MERCK & CO. INC.', 'R017624', 'UNK'],
                                  ['COVID19',  'MODERNA',          '025L20A', '1']],
                        index = [
                            "1996873",
                            "1996874"],
                        dtypes = {'VAX_DOSE_SERIES': "string"})
                }
            ]
        dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)
            
        # When
        dataFrame = filterDataFrame(dataFrame, manufacturer = "MODERNA", dose = '1')
        
        # Then
        dataFrameExpected = self.createDataFrame(
            columns = ['DIED', 'L_THREAT', 'DISABLE',  'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],
            data = [  ['Y',     np.NaN,    np.NaN,     'COVID19',  'MODERNA',  '037K20A', '1'],
                      [np.NaN,  np.NaN,    'Y',        'COVID19',  'MODERNA',  '025L20A', '1'],
                      [np.NaN, np.NaN,     'Y',        'COVID19',  'MODERNA',  '025L20A', '1']],
            index = [
                "0916600",
                "0916601",
                "1996874"],
            dtypes = {'VAX_DOSE_SERIES': "string"})
        assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)

    def test_createDataFrameFromForSevereEffects(self):
        # Given
        vaersDescrs = [
               {
                   'VAERSDATA': self.createDataFrame(
                       columns = ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'],
                       data = [  ['Y',    'Y',        np.NaN,    'Y',        'Y'],
                                 [np.NaN, np.NaN,     'Y',       np.NaN,     'Y']],
                       index = [
                           "0916600",
                           "0916601"]),
                   'VAERSVAX': self.createDataFrame(
                       columns = ['VAX_TYPE', 'VAX_MANU',        'VAX_LOT', 'VAX_DOSE_SERIES'],
                       data = [  ['COVID19',  'MODERNA',         '037K20A', '1'],
                                 ['COVID19',  'PFIZER\BIONTECH', '025L20A', '1']],
                       index = [
                           "0916600",
                           "0916601"],
                       dtypes = {'VAX_DOSE_SERIES': "string"})
               }
            ]
        dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)
        
        # When
        dataFrame = filterDataFrameForSevereEffects(dataFrame, dose = '1')
        
        # Then
        dataFrameExpected = self.createDataFrame(
            columns = ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT', 'VAX_TYPE', 'VAX_MANU',        'VAX_LOT', 'VAX_DOSE_SERIES'],
            data = [  ['Y',    'Y',        np.NaN,    'Y',        'Y',        'COVID19',  'MODERNA',         '037K20A', '1'],
                      [np.NaN,  np.NaN,    'Y',       np.NaN,     'Y',        'COVID19',  'PFIZER\BIONTECH', '025L20A', '1']],
            index = [
                "0916600",
                "0916601"],
            dtypes = {'VAX_DOSE_SERIES': "string"})
        assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)

    def test_createAndFilterDataFrameFromDescrsWithFirstDose(self):
        # Given
        vaersDescrs = [
               {
                   'VAERSDATA': self.createDataFrame(
                       columns = ['DIED', 'L_THREAT', 'DISABLE'],
                       data = [  ['Y',    np.NaN,      np.NaN]],
                       index = [
                           "1048786"]),
                   'VAERSVAX': self.createDataFrame(
                       columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],
                       data = [  ['COVID19',  'MODERNA',  '016M20A', '2'],
                                 ['COVID19',  'MODERNA',  '030L20A', '1']],
                       index = [
                           "1048786",
                           "1048786"],
                       dtypes = {'VAX_DOSE_SERIES': "string"})
               }
            ]
        dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)
            
        # When
        dataFrame = filterDataFrame(dataFrame, manufacturer = "MODERNA", dose = '1')
        
        # Then
        dataFrameExpected = self.createDataFrame(
            columns = ['DIED', 'L_THREAT', 'DISABLE',  'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],
            data = [  ['Y',     np.NaN,    np.NaN,     'COVID19',  'MODERNA',  '030L20A',  '1']],
            index = [
                "1048786"],
            dtypes = {'VAX_DOSE_SERIES': "string"})
        assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)

    def test_createAndFilterDataFrameFromDescrsWithSecondDose(self):
        # Given
        vaersDescrs = [
               {
                   'VAERSDATA': self.createDataFrame(
                       columns = ['DIED', 'L_THREAT', 'DISABLE'],
                       data = [  ['Y',    np.NaN,     np.NaN]],
                       index = [
                           "1048786"]),
                   'VAERSVAX': self.createDataFrame(
                       columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],
                       data = [  ['COVID19',  'MODERNA',  '016M20A',  '2'],
                                 ['COVID19',  'MODERNA',  '030L20A',  '1']],
                       index = [
                           "1048786",
                           "1048786"],
                       dtypes = {'VAX_DOSE_SERIES': "string"})
               }
            ]
        dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)
            
        # When
        dataFrame = filterDataFrame(dataFrame, manufacturer = "MODERNA", dose = '2')
        
        # Then
        dataFrameExpected = self.createDataFrame(
            columns = ['DIED', 'L_THREAT', 'DISABLE',  'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],
            data = [  ['Y',     np.NaN,    np.NaN,     'COVID19',  'MODERNA',  '016M20A',  '2']],
            index = [
                "1048786"],
            dtypes = {'VAX_DOSE_SERIES': "string"})
        assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)

    def createDataFrame(self, index, columns, data, dtypes = {}):
        return pd.DataFrame(index = index, columns = columns, data = data).astype(dtypes)


In [None]:
from pandas.testing import assert_frame_equal

class BatchCodeTableTest(unittest.TestCase):

    def test_createBatchCodeTable2(self):
        dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(
            [
               {
                   'VAERSDATA': self.createDataFrame(
                       columns = ['DIED', 'L_THREAT', 'DISABLE'],
                       data = [  ['Y',    np.NaN,     np.NaN],
                                 [np.NaN, np.NaN,     'Y']],
                       index = [
                           "0916600",
                           "0916601"]),
                   'VAERSVAX': self.createDataFrame(
                       columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],
                       data = [  ['COVID19',  'MODERNA',  '037K20A', '1'],
                                 ['COVID19',  'MODERNA',  '025L20A', '1']],
                       index = [
                           "0916600",
                           "0916601"],
                       dtypes = {'VAX_DOSE_SERIES': "string"})
               },
               {
                    'VAERSDATA': self.createDataFrame(
                       columns = ['DIED', 'L_THREAT', 'DISABLE'],
                       data = [  [np.NaN,  np.NaN,    np.NaN],
                                 [np.NaN,  np.NaN,    'Y']],
                       index = [
                           "1996873",
                           "1996874"]),
                    'VAERSVAX': self.createDataFrame(
                        columns = ['VAX_TYPE', 'VAX_MANU',         'VAX_LOT', 'VAX_DOSE_SERIES'],
                        data = [  ['HPV9',     'MERCK & CO. INC.', 'R017624', 'UNK'],
                                  ['COVID19',  'MODERNA',          '025L20A', '1']],
                        index = [
                            "1996873",
                            "1996874"],
                        dtypes = {'VAX_DOSE_SERIES': "string"})
                }
            ])
        dataFrame = filterDataFrame(dataFrame, manufacturer = "MODERNA", dose = '1')
        self._test_createBatchCodeTable(dataFrame);

    def test_createBatchCodeTable(self):
        self._test_createBatchCodeTable(
            filterDataFrame(
                VaersDescr2DataFrameConverter.createDataFrameFromDescrs(
                    VaersDescrReader("test/VAERS").readAllVaersDescrs()),
                manufacturer = "MODERNA",
                dose = '1'))

    def _test_createBatchCodeTable(self, dataFrame):
        # When
        batchCodeTable = createBatchCodeTable(dataFrame)

        # Then
        batchCodeTableExpected = pd.DataFrame(
            data = {
                'ADRs': [2, 1],
                'DEATHS': [0, 1],
                'DISABILITIES': [2, 0],
                'LIFE THREATENING ILLNESSES': [0, 0]
            },
            index = pd.MultiIndex.from_arrays([['025L20A', '037K20A']], names = ('VAX_LOT',)))
        assert_frame_equal(batchCodeTable, batchCodeTableExpected, check_dtype = False)

    def createDataFrame(self, index, columns, data, dtypes = {}):
        return pd.DataFrame(index = index, columns = columns, data = data).astype(dtypes)


In [None]:
from pandas.testing import assert_frame_equal

class SevereEffectsBatchCodeTableTest(unittest.TestCase):

    def test_createSevereEffectsBatchCodeTable(self):
        # Given
        dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(
            [
               {
                   'VAERSDATA': self.createDataFrame(
                       columns = ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'],
                       data = [  ['Y',    'Y',        np.NaN,    'Y',        'Y'],
                                 [np.NaN, np.NaN,     'Y',       np.NaN,     'Y']],
                       index = [
                           "0916600",
                           "0916601"]),
                   'VAERSVAX': self.createDataFrame(
                       columns = ['VAX_TYPE', 'VAX_MANU',        'VAX_LOT', 'VAX_DOSE_SERIES'],
                       data = [  ['COVID19',  'MODERNA',         '037K20A', '1'],
                                 ['COVID19',  'PFIZER\BIONTECH', '025L20A', '1']],
                       index = [
                           "0916600",
                           "0916601"],
                       dtypes = {'VAX_DOSE_SERIES': "string"})
               }
            ]
        )
        dataFrame = filterDataFrameForSevereEffects(dataFrame, dose = '1')

        # When
        batchCodeTable = createSevereEffectsBatchCodeTable(dataFrame)

        # Then
        batchCodeTableExpected = pd.DataFrame(
            data = {
                'ADRs': [1, 1],
                'DEATHS': [1, 0],
                'DISABILITIES': [0, 1],
                'LIFE THREATENING ILLNESSES': [1, 0],
                'HOSPITALISATIONS': [1, 0],
                'EMERGENCY ROOM OR DOCTOR VISITS': [1, 1],
                'COMPANY': ['MODERNA', 'PFIZER\BIONTECH']
            },
            index = pd.Index(['037K20A', '025L20A'], name='VAX_LOT'))
        assert_frame_equal(batchCodeTable, batchCodeTableExpected, check_dtype = False)

    def createDataFrame(self, index, columns, data, dtypes = {}):
        return pd.DataFrame(index = index, columns = columns, data = data).astype(dtypes)


In [None]:
unittest.main(argv = [''], verbosity = 2, exit = False)

In [None]:
def saveBatchCodeTable(manufacturer, excelFile):
    batchCodeTable = createBatchCodeTable(
        filterDataFrame(
            VaersDescr2DataFrameConverter.createDataFrameFromDescrs(
                VaersDescrReader("VAERS").readAllVaersDescrs()),
            manufacturer = manufacturer,
            dose = '1'))
    display(manufacturer, batchCodeTable)
    batchCodeTable.to_excel(excelFile)

In [None]:
saveBatchCodeTable("MODERNA", "results/moderna.xlsx")
saveBatchCodeTable("PFIZER\BIONTECH", "results/pfizer.xlsx")
saveBatchCodeTable("JANSSEN", "results/janssen.xlsx")

In [None]:
def saveSevereEffectsBatchCodeTable(excelFile):
    severeEffectsBatchCodeTable = createSevereEffectsBatchCodeTable(
        filterDataFrameForSevereEffects(
            VaersDescr2DataFrameConverter.createDataFrameFromDescrs(
                VaersDescrReader("VAERS").readAllVaersDescrs()),
                dose = '1'))
    display('severeEffectsBatchCodeTable', severeEffectsBatchCodeTable)
    severeEffectsBatchCodeTable.to_excel(excelFile)

In [None]:
saveSevereEffectsBatchCodeTable('results/severeEffects.xlsx')