In [9]:
import numpy as np
import pandas as pd

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', None)

In [10]:
def createDataFrame(dataDir, manufacturer):
    def read_csv(file, usecols):
        return pd.read_csv(file, index_col = 'VAERS_ID', encoding = 'latin1', low_memory = False, usecols = usecols)

    def createDataFrameForYear(year):
        folder = dataDir + "/" + year + "VAERSData/"
        return pd.merge(
            read_csv(folder + year + "VAERSDATA.csv", ['VAERS_ID', 'DIED', 'L_THREAT', 'DISABLE']),
            read_csv(folder + year + "VAERSVAX.csv", ['VAERS_ID', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT']),
            left_index = True,
            right_index = True)

    df = pd.concat([createDataFrameForYear("2021"), createDataFrameForYear("2022")])
    return df[(df["VAX_TYPE"] == "COVID19") & (df["VAX_MANU"] == manufacturer)]

In [11]:
def createBatchCodeTable(df):
    def filter(df, col):
        return df[df[col] == 'Y'][['VAX_LOT']]

    batchCodeTableDict = {
        'ADRs': df[['VAX_LOT']].value_counts(),
        'DEATHS': filter(df, 'DIED').value_counts(),
        'DISABILITIES': filter(df, 'DISABLE').value_counts(),
        'LIFE THREATENING ILLNESSES': filter(df, 'L_THREAT').value_counts()
    }
    return pd.concat(batchCodeTableDict, axis = 1).replace(to_replace = np.nan, value = 0)


In [12]:
def saveBatchCodeTable(manufacturer, excelFile):
    batchCodeTable = createBatchCodeTable(createDataFrame("VAERS", manufacturer))
    display(manufacturer, batchCodeTable)
    batchCodeTable.to_excel(excelFile)

In [13]:
saveBatchCodeTable("MODERNA", "results/moderna.xlsx")
saveBatchCodeTable("PFIZER\BIONTECH", "results/pfizer.xlsx")
saveBatchCodeTable("JANSSEN", "results/janssen.xlsx")

'MODERNA'

Unnamed: 0_level_0,ADRs,DEATHS,DISABILITIES,LIFE THREATENING ILLNESSES
VAX_LOT,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
026L20A,4177,42.0,35.0,28.0
039K20A,4169,93.0,39.0,37.0
011J20A,3661,37.0,33.0,28.0
013L20A,3191,68.0,47.0,32.0
012L20A,3068,72.0,30.0,30.0
...,...,...,...,...
029L2VA,1,0.0,0.0,0.0
029L2oa,1,0.0,0.0,0.0
029L30A,1,0.0,0.0,0.0
029L420A,1,0.0,0.0,0.0




'PFIZER\\BIONTECH'

Unnamed: 0_level_0,ADRs,DEATHS,DISABILITIES,LIFE THREATENING ILLNESSES
VAX_LOT,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
EK9231,3392,48.0,56.0,35.0
ER2613,3345,65.0,62.0,57.0
EN6201,2928,148.0,69.0,55.0
EN5318,2811,114.0,62.0,57.0
ER8732,2705,50.0,51.0,68.0
...,...,...,...,...
EN6203 UPC,1,0.0,0.0,0.0
"EN6203,",1,0.0,0.0,0.0
"EN6203, EL3247",1,0.0,0.0,0.0
"EN6203, EN6204",1,0.0,0.0,0.0




'JANSSEN'

Unnamed: 0_level_0,ADRs,DEATHS,DISABILITIES,LIFE THREATENING ILLNESSES
VAX_LOT,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Unknown,7312,264.0,19.0,31.0
043A21A,2347,37.0,31.0,49.0
042A21A,2170,44.0,41.0,43.0
1805018,1756,35.0,36.0,49.0
202A21A,1726,25.0,21.0,25.0
...,...,...,...,...
180E018,1,0.0,0.0,0.0
180D068,1,0.0,0.0,0.0
180C068,1,0.0,0.0,0.0
180B982,1,0.0,0.0,0.0




In [14]:
import unittest

In [15]:
from pandas.testing import assert_frame_equal


class HowBadIsMyBatchTest(unittest.TestCase):

    def test_createBatchCodeTable(self):
        # Given
        dataFrame = createDataFrame("test/VAERS", "MODERNA")
        display("dataFrame:", dataFrame)

        # When
        batchCodeTable = createBatchCodeTable(dataFrame)
        display("batchCodeTable:", batchCodeTable)

        # Then
        batchCodeTableExpected = pd.DataFrame(
            {
                'ADRs': [2, 1],
                'DEATHS': [0, 1],
                'DISABILITIES': [2, 0],
                'LIFE THREATENING ILLNESSES': [0, 0]
            },
            index = pd.MultiIndex.from_arrays([['025L20A', '037K20A']], names = ('VAX_LOT',)))
        display("batchCodeTableExpected:", batchCodeTableExpected)
        assert_frame_equal(batchCodeTable, batchCodeTableExpected, check_dtype = False)


In [16]:
unittest.main(argv = [''], verbosity = 2, exit = False)

test_createBatchCodeTable (__main__.HowBadIsMyBatchTest) ... 

'dataFrame:'

Unnamed: 0_level_0,DIED,L_THREAT,DISABLE,VAX_TYPE,VAX_MANU,VAX_LOT
VAERS_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
916600,Y,,,COVID19,MODERNA,037K20A
916601,,,Y,COVID19,MODERNA,025L20A
1996874,,,Y,COVID19,MODERNA,025L20A


'batchCodeTable:'

Unnamed: 0_level_0,ADRs,DEATHS,DISABILITIES,LIFE THREATENING ILLNESSES
VAX_LOT,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
025L20A,2,0.0,2.0,0.0
037K20A,1,1.0,0.0,0.0


'batchCodeTableExpected:'

Unnamed: 0_level_0,ADRs,DEATHS,DISABILITIES,LIFE THREATENING ILLNESSES
VAX_LOT,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
025L20A,2,0,2,0
037K20A,1,1,0,0


ok

----------------------------------------------------------------------
Ran 1 test in 0.180s

OK


<unittest.main.TestProgram at 0x7fb670669c10>