In [None]:
import numpy as np
import pandas as pd

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', None)

In [None]:
def _createDataFrame(vaersDescrs, manufacturer):
    def vaersDescr2DataFrame(vaersDescr):
        return pd.merge(vaersDescr['VAERSDATA'], vaersDescr['VAERSVAX'], left_index = True, right_index = True)

    df = pd.concat(map(vaersDescr2DataFrame, vaersDescrs))
    return df[(df["VAX_TYPE"] == "COVID19") & (df["VAX_MANU"] == manufacturer)]

In [None]:
def createDataFrame(dataDir, manufacturer):
    def readVaersDescr(year):
        def read_csv(file, usecols):
            return pd.read_csv(file, index_col = 'VAERS_ID', encoding = 'latin1', low_memory = False, usecols = usecols)

        folder = dataDir + "/" + year + "VAERSData/"
        return {
            'VAERSDATA': read_csv(folder + year + "VAERSDATA.csv", ['VAERS_ID', 'DIED', 'L_THREAT', 'DISABLE']),
            'VAERSVAX': read_csv(folder + year + "VAERSVAX.csv", ['VAERS_ID', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT'])
            }

    return _createDataFrame(
        [readVaersDescr("2021"), readVaersDescr("2022")],
        manufacturer)

In [None]:
def createBatchCodeTable(df : pd.DataFrame):
    def filter(df, col):
        return df[df[col] == 'Y'][['VAX_LOT']]

    batchCodeTableDict = {
        'ADRs': df[['VAX_LOT']].value_counts(),
        'DEATHS': filter(df, 'DIED').value_counts(),
        'DISABILITIES': filter(df, 'DISABLE').value_counts(),
        'LIFE THREATENING ILLNESSES': filter(df, 'L_THREAT').value_counts()
    }
    return pd.concat(batchCodeTableDict, axis = 1).replace(to_replace = np.nan, value = 0)


In [None]:
import unittest

In [None]:
from pandas.testing import assert_frame_equal


class BatchCodeTableTest(unittest.TestCase):

    def test_createBatchCodeTable2(self):
        dataFrame = _createDataFrame(
            [
                {
                    'VAERSDATA': self.createDataFrame(
                        [              'DIED', 'L_THREAT', 'DISABLE'],
                        {
                            '0916600': ['Y',    np.NaN,     np.NaN],
                            '0916601': [np.NaN, np.NaN,     'Y']
                        }),
                    'VAERSVAX': self.createDataFrame(
                        [               'VAX_TYPE', 'VAX_MANU', 'VAX_LOT'],
                        {
                            '0916600': ['COVID19',  'MODERNA',  '037K20A'],
                            '0916601': ['COVID19',  'MODERNA',  '025L20A']
                        })
                },
                {
                    'VAERSDATA': self.createDataFrame(
                        [              'DIED', 'L_THREAT', 'DISABLE'],
                        {
                            '1996873': [np.NaN, np.NaN,     np.NaN],
                            '1996874': [np.NaN, np.NaN,     'Y']
                        }),
                    'VAERSVAX': self.createDataFrame(
                        [               'VAX_TYPE', 'VAX_MANU',         'VAX_LOT'],
                        {
                            '1996873': ['HPV9',     'MERCK & CO. INC.', 'R017624'],
                            '1996874': ['COVID19',  'MODERNA',          '025L20A']
                        })
                }
            ],
            "MODERNA")

        self._test_createBatchCodeTable(dataFrame);

    def test_createBatchCodeTable(self):
        self._test_createBatchCodeTable(createDataFrame("test/VAERS", "MODERNA"));

    def _test_createBatchCodeTable(self, dataFrame):
        # When
        batchCodeTable=createBatchCodeTable(dataFrame)

        # Then
        batchCodeTableExpected=pd.DataFrame(
            data={
                'ADRs': [2, 1],
                'DEATHS': [0, 1],
                'DISABILITIES': [2, 0],
                'LIFE THREATENING ILLNESSES': [0, 0]
            },
            index=pd.MultiIndex.from_arrays([['025L20A', '037K20A']], names = ('VAX_LOT',)))
        assert_frame_equal(batchCodeTable, batchCodeTableExpected, check_dtype=False)

    def createDataFrame(self, columns, data):
        return pd.DataFrame.from_dict(data, columns = columns, orient = 'index')


In [None]:
unittest.main(argv = [''], verbosity = 2, exit = False)

In [None]:
def saveBatchCodeTable(manufacturer, excelFile):
    batchCodeTable = createBatchCodeTable(createDataFrame("VAERS", manufacturer))
    display(manufacturer, batchCodeTable)
    batchCodeTable.to_excel(excelFile)

In [None]:
saveBatchCodeTable("MODERNA", "results/moderna.xlsx")
saveBatchCodeTable("PFIZER\BIONTECH", "results/pfizer.xlsx")
saveBatchCodeTable("JANSSEN", "results/janssen.xlsx")