In [None]:
import numpy as np
import pandas as pd

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', None)

In [None]:
def createDataFrame(dataDir, manufacturer):
    def read_csv(file, usecols):
        return pd.read_csv(file, index_col = 'VAERS_ID', encoding = 'latin1', low_memory = False, usecols = usecols)

    def createDataFrameForYear(year):
        folder = dataDir + "/" + year + "VAERSData/"
        return pd.merge(
            read_csv(folder + year + "VAERSDATA.csv", ['VAERS_ID', 'DIED', 'L_THREAT', 'DISABLE']),
            read_csv(folder + year + "VAERSVAX.csv", ['VAERS_ID', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT']),
            left_index = True,
            right_index = True)

    df = pd.concat([createDataFrameForYear("2021"), createDataFrameForYear("2022")])
    return df[(df["VAX_TYPE"] == "COVID19") & (df["VAX_MANU"] == manufacturer)]

In [None]:
def createPivotTable(df):
    def filter(df, col):
        return df[df[col] == 'Y'][['VAX_LOT']]

    pivotTableDict = {
        'ADRs': df[['VAX_LOT']].value_counts(),
        'DEATHS': filter(df, 'DIED').value_counts(),
        'DISABILITIES': filter(df, 'DISABLE').value_counts(),
        'LIFE THREATENING ILLNESSES': filter(df, 'L_THREAT').value_counts()
    }
    return pd.concat(pivotTableDict, axis=1).replace(to_replace=np.nan, value=0)


In [None]:
df_moderna = createDataFrame("VAERS", "MODERNA")

In [None]:
df_moderna

In [None]:
pivotTable = createPivotTable(df_moderna)

In [None]:
pivotTable

In [None]:
import unittest

In [None]:
from pandas.testing import assert_frame_equal


class HowBadIsMyBatchTest(unittest.TestCase):

    def test_createPivotTable(self):
        # Given
        pivotTable = createPivotTable(createDataFrame("test/VAERS", "MODERNA"))

        # When
        pivotTableExpected = pd.DataFrame(
            {
                'ADRs': [2, 1],
                'DEATHS': [0, 1],
                'DISABILITIES': [2, 0],
                'LIFE THREATENING ILLNESSES': [0.0, 0.0]
            },
            index = pd.MultiIndex.from_arrays([['025L20A', '037K20A']], names = ('VAX_LOT',)))
        display("actual:", pivotTable)
        display("expected:", pivotTableExpected)

        # Then
        assert_frame_equal(pivotTable, pivotTableExpected, check_dtype = False)


In [None]:
unittest.main(argv=[''], verbosity=2, exit=False)