In [None]:
import numpy as np
import pandas as pd

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', None)

In [None]:
def _createDataFrame(vaersDescrs, manufacturer):
    def vaersDescr2Vaers(vaersDescr):
        return pd.merge(vaersDescr['VAERSDATA'], vaersDescr['VAERSVAX'], left_index = True, right_index = True)

    def vaersDescrs2Vaers():
        return map(vaersDescr2Vaers, vaersDescrs);

    df = pd.concat(vaersDescrs2Vaers())
    return df[(df["VAX_TYPE"] == "COVID19") & (df["VAX_MANU"] == manufacturer)]

In [None]:
def read_csv(file, usecols):
    return pd.read_csv(file, index_col = 'VAERS_ID', encoding = 'latin1', low_memory = False, usecols = usecols)

def readVaersDescr(dataDir, year):
    folder = dataDir + "/" + year + "VAERSData/"
    return {
        'VAERSDATA': read_csv(folder + year + "VAERSDATA.csv", ['VAERS_ID', 'DIED', 'L_THREAT', 'DISABLE']),
        'VAERSVAX': read_csv(folder + year + "VAERSVAX.csv", ['VAERS_ID', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT'])
        }

def createDataFrame(dataDir, manufacturer):
    return _createDataFrame(
        [readVaersDescr(dataDir, "2021"), readVaersDescr(dataDir, "2022")],
        manufacturer)

In [None]:
def createBatchCodeTable(df : pd.DataFrame):
    def filter(df, col):
        return df[df[col] == 'Y'][['VAX_LOT']]

    batchCodeTableDict = {
        'ADRs': df[['VAX_LOT']].value_counts(),
        'DEATHS': filter(df, 'DIED').value_counts(),
        'DISABILITIES': filter(df, 'DISABLE').value_counts(),
        'LIFE THREATENING ILLNESSES': filter(df, 'L_THREAT').value_counts()
    }
    return pd.concat(batchCodeTableDict, axis = 1).replace(to_replace = np.nan, value = 0)


In [None]:
import unittest

In [None]:
from pandas.testing import assert_frame_equal

class BatchCodeTableTest(unittest.TestCase):

    def test_createBatchCodeTable2(self):
        vaersData2021 = pd.DataFrame(columns = ['DIED', 'L_THREAT', 'DISABLE'], index = ['0916600', '0916601'])
        vaersData2021.loc['0916600'] = pd.Series({'DIED': 'Y',    'L_THREAT': np.NaN, 'DISABLE': np.NaN})
        vaersData2021.loc['0916601'] = pd.Series({'DIED': np.NaN, 'L_THREAT': np.NaN, 'DISABLE': 'Y'})

        vaersVax2021 = pd.DataFrame(columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT'], index = ['0916600', '0916601'])
        vaersVax2021.loc['0916600'] = pd.Series({'VAX_TYPE': 'COVID19', 'VAX_MANU': 'MODERNA', 'VAX_LOT': '037K20A'})
        vaersVax2021.loc['0916601'] = pd.Series({'VAX_TYPE': 'COVID19', 'VAX_MANU': 'MODERNA', 'VAX_LOT': '025L20A'})

        vaersData2022 = pd.DataFrame(columns = ['DIED', 'L_THREAT', 'DISABLE'], index = ['1996873', '1996874'])
        vaersData2022.loc['1996873'] = pd.Series({'DIED': np.NaN, 'L_THREAT': np.NaN, 'DISABLE': np.NaN})
        vaersData2022.loc['1996874'] = pd.Series({'DIED': np.NaN, 'L_THREAT': np.NaN, 'DISABLE': 'Y'})

        vaersVax2022 = pd.DataFrame(columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT'], index = ['1996873', '1996874'])
        vaersVax2022.loc['1996873'] = pd.Series({'VAX_TYPE': 'HPV9',    'VAX_MANU': 'MERCK & CO. INC.', 'VAX_LOT': 'R017624'})
        vaersVax2022.loc['1996874'] = pd.Series({'VAX_TYPE': 'COVID19', 'VAX_MANU': 'MODERNA', 'VAX_LOT': '025L20A'})
        
        dataFrame = _createDataFrame(
            [
                {'VAERSDATA': vaersData2021, 'VAERSVAX': vaersVax2021},
                {'VAERSDATA': vaersData2022, 'VAERSVAX': vaersVax2022}
            ],
            "MODERNA")

        self._test_createBatchCodeTable(dataFrame);
    
    def test_createBatchCodeTable(self):
        self._test_createBatchCodeTable(createDataFrame("test/VAERS", "MODERNA"));

    def _test_createBatchCodeTable(self, dataFrame):
        # When
        batchCodeTable = createBatchCodeTable(dataFrame)

        # Then
        batchCodeTableExpected = pd.DataFrame(
            data = {
                'ADRs': [2, 1],
                'DEATHS': [0, 1],
                'DISABILITIES': [2, 0],
                'LIFE THREATENING ILLNESSES': [0, 0]
            },
            index = pd.MultiIndex.from_arrays([['025L20A', '037K20A']], names = ('VAX_LOT',)))
        assert_frame_equal(batchCodeTable, batchCodeTableExpected, check_dtype = False)

        

In [None]:
unittest.main(argv = [''], verbosity = 2, exit = False)

In [None]:
def saveBatchCodeTable(manufacturer, excelFile):
    batchCodeTable = createBatchCodeTable(createDataFrame("VAERS", manufacturer))
    display(manufacturer, batchCodeTable)
    batchCodeTable.to_excel(excelFile)

In [None]:
saveBatchCodeTable("MODERNA", "results/moderna.xlsx")
saveBatchCodeTable("PFIZER\BIONTECH", "results/pfizer.xlsx")
saveBatchCodeTable("JANSSEN", "results/janssen.xlsx")