In [None]:
import numpy as np
import pandas as pd

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', None)

In [None]:
import pandas as pd

class VaersDescrReader:
    
    def __init__(self, dataDir):
        self.dataDir = dataDir        

    def readVaersDescrs(self, years):
        return [self.readVaersDescr(year) for year in years]

    def readVaersDescr(self, year):
        return {
                    'VAERSDATA': self._readVAERSDATA(self.dataDir + "/" + year + "VAERSDATA.csv"),
                    'VAERSVAX': self._readVAERSVAX(self.dataDir + "/" + year + "VAERSVAX.csv")
               }

    def readNonDomesticVaersDescr(self):
        return {
                    'VAERSDATA': self._readVAERSDATA(self.dataDir + "/" + "NonDomesticVAERSDATA.csv"),
                    'VAERSVAX': self._readVAERSVAX(self.dataDir + "/" + "NonDomesticVAERSVAX.csv")
               }

    def _readVAERSDATA(self, file):
        return self._read_csv(
            file = file,
            usecols = ['VAERS_ID', 'RECVDATE', 'DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT', 'SPLTTYPE'],
            parse_dates = ['RECVDATE'],
            date_parser = lambda dateStr: pd.to_datetime(dateStr, format = "%m/%d/%Y"))

    def _readVAERSVAX(self, file):
        return self._read_csv(
            file = file,
            usecols = ['VAERS_ID', 'VAX_DOSE_SERIES', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT'],
            dtype = {"VAX_DOSE_SERIES": "string"})

    def _read_csv(self, file, **kwargs):
        return pd.read_csv(
            file,
            index_col = 'VAERS_ID',
            encoding = 'latin1',
            low_memory = False,
            **kwargs)


In [None]:
import pandas as pd

class VaersDescr2DataFrameConverter:

    @staticmethod
    def createDataFrameFromDescr(vaersDescr):
        return pd.merge(
                vaersDescr['VAERSDATA'],
                vaersDescr['VAERSVAX'],
                how = 'left',
                left_index = True,
                right_index = True,
                validate = 'one_to_many')

    @staticmethod
    def createDataFrameFromDescrs(vaersDescrs):
        dataFrames = [VaersDescr2DataFrameConverter.createDataFrameFromDescr(vaersDescr) for vaersDescr in vaersDescrs]
        return pd.concat(dataFrames)


In [None]:
class DataFrameNormalizer:
    
    @staticmethod
    def normalize(dataFrame):
        DataFrameNormalizer.convertVAX_LOTColumnToUpperCase(dataFrame)
        DataFrameNormalizer._convertColumnsOfDataFrame_Y_to_1_else_0(
            dataFrame,
            ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'])

    @staticmethod
    def convertVAX_LOTColumnToUpperCase(dataFrame):
        dataFrame['VAX_LOT'] = dataFrame['VAX_LOT'].str.upper()

    @staticmethod
    def _convertColumnsOfDataFrame_Y_to_1_else_0(dataFrame, columns):
        for column in columns:
            DataFrameNormalizer._convertColumnOfDataFrame_Y_to_1_else_0(dataFrame, column)

    @staticmethod
    def _convertColumnOfDataFrame_Y_to_1_else_0(dataFrame, column):
        dataFrame[column] = DataFrameNormalizer._where(
            condition = dataFrame[column] == 'Y',
            trueValue = 1,
            falseValue = 0)

    @staticmethod
    def _where(condition, trueValue, falseValue):
        return np.where(condition, trueValue, falseValue)    
    

In [None]:
import pandas as pd

class DataFrameFilter:
    
    def filterByCovid19(self, dataFrame):
        return dataFrame[self._isCovid19(dataFrame)]

    def filterByFlu(self, dataFrame):
        return dataFrame[self._isFlu(dataFrame)]

    def filterByCountry(self, dataFrame, country, countryColumnName):
        return dataFrame[dataFrame[countryColumnName] == country]

    def filterBy(self, dataFrame, manufacturer = None, dose = None):
        return dataFrame[self._isManufacturer(dataFrame, manufacturer) & self._isDose(dataFrame, dose)]

    def _isCovid19(self, dataFrame):
        return dataFrame["VAX_TYPE"] == "COVID19"

    def _isFlu(self, dataFrame):
        return dataFrame["VAX_TYPE"].str.startswith("FLU")

    def _isManufacturer(self, dataFrame, manufacturer):
        return dataFrame["VAX_MANU"] == manufacturer if manufacturer is not None else True

    def _isDose(self, dataFrame, dose):
        return dataFrame["VAX_DOSE_SERIES"].str.contains(dose) if dose is not None else True


In [None]:
class SummationTableFactory:

    @staticmethod
    def createSummationTable(
        groupBy,
        columnNameMappingsDict = {
                "DIED_size": "Total Number of Adverse Reaction Reports",
                "DIED_sum": "Deaths",
                "L_THREAT_sum": "Life Threatening Illnesses",
                "DISABLE_sum": "Disabilities",
                'HOSPITAL_sum': 'Hospitalisations',
                'ER_VISIT_sum': 'Emergency Room or Doctor Visits'
            }):

        summationTable = groupBy.agg({
                'DIED': ['sum', 'size'],
                'L_THREAT': 'sum',
                'DISABLE': 'sum',
                'HOSPITAL': 'sum',
                'ER_VISIT': 'sum',
                'SEVERE': 'sum'
            })
        SummationTableFactory._flattenColumns(summationTable)
        return summationTable.rename(columns = columnNameMappingsDict)

    @staticmethod
    def createSummationTableHavingSevereReportsColumn(dataFrame):
        summationTable = SummationTableFactory.createSummationTable(
            dataFrame,
            columnNameMappingsDict = {
                "DIED_size": "Total Number of Adverse Reaction Reports",
                "DIED_sum": "Deaths",
                "L_THREAT_sum": "Life Threatening Illnesses",
                "DISABLE_sum": "Disabilities",
                "SEVERE_sum": "Severities"
            })
        summationTable['Severe reports (%)'] = summationTable['Severities'] / summationTable['Total Number of Adverse Reaction Reports'] * 100
        summationTable = summationTable[['Total Number of Adverse Reaction Reports', 'Deaths', 'Disabilities', 'Life Threatening Illnesses', 'Severe reports (%)']]
        return summationTable

    @staticmethod
    def _flattenColumns(dataFrame):
        dataFrame.columns = ["_".join(a) for a in dataFrame.columns.to_flat_index()]


In [None]:
import pandas as pd

class BatchCodeTableFactory:

    @staticmethod
    def createBatchCodeTable(dataFrame : pd.DataFrame, dose, minADRsForLethality = None):
        dataFrame = DataFrameFilter().filterByCovid19(dataFrame)
        dataFrame = DataFrameFilter().filterBy(dataFrame, dose = dose)
        batchCodeTable = BatchCodeTableFactory._createSummationTableByVAX_LOT(dataFrame)[
            [
                'Total Number of Adverse Reaction Reports',
                'Deaths',
                'Disabilities',
                'Life Threatening Illnesses',
                'Company',
                'Lethality'
            ]]
        if minADRsForLethality is not None:
            batchCodeTable.loc[batchCodeTable['Total Number of Adverse Reaction Reports'] < minADRsForLethality, 'Lethality'] = np.nan
        return batchCodeTable

    # create table from https://www.howbadismybatch.com/combined.html
    @staticmethod
    def createSevereEffectsBatchCodeTable(dataFrame : pd.DataFrame, dose):
        dataFrame = DataFrameFilter().filterByCovid19(dataFrame)
        dataFrame = DataFrameFilter().filterBy(dataFrame, dose = dose)
        return BatchCodeTableFactory._createSummationTableByVAX_LOT(dataFrame)[
            [
                'Total Number of Adverse Reaction Reports', 
                'Deaths',
                'Disabilities',
                'Life Threatening Illnesses',
                'Hospitalisations',
                'Emergency Room or Doctor Visits',
                'Company'
            ]]

    @staticmethod
    def _createSummationTableByVAX_LOT(dataFrame):
        batchCodeTable = SummationTableFactory.createSummationTable(dataFrame.groupby('VAX_LOT'))
        batchCodeTable['Lethality'] = batchCodeTable['Deaths'] / batchCodeTable['Total Number of Adverse Reaction Reports'] * 100
        batchCodeTable = batchCodeTable[
            [
                'Total Number of Adverse Reaction Reports',
                'Deaths',
                'Disabilities',
                'Life Threatening Illnesses',
                'Hospitalisations',
                'Emergency Room or Doctor Visits',
                'Lethality'
            ]]
        batchCodeTable = batchCodeTable.sort_values(by = 'Total Number of Adverse Reaction Reports', ascending = False)
        return BatchCodeTableFactory._addCompanyColumn(batchCodeTable, BatchCodeTableFactory._createCompanyByBatchCodeTable(dataFrame))

    @staticmethod
    def _addCompanyColumn(batchCodeTable, companyByBatchCodeTable):
        return pd.merge(
            batchCodeTable,
            companyByBatchCodeTable,
            how = 'left',
            left_index = True,
            right_index = True,
            validate = 'one_to_one')

    @staticmethod
    def _createCompanyByBatchCodeTable(dataFrame):
        return BatchCodeTableFactory._createManufacturerByBatchCodeTable(dataFrame).rename(columns = {"VAX_MANU": "Company"})

    @staticmethod
    def _createManufacturerByBatchCodeTable(dataFrame):
        manufacturerByBatchCodeTable = dataFrame[['VAX_LOT', 'VAX_MANU']]
        manufacturerByBatchCodeTable = manufacturerByBatchCodeTable.drop_duplicates(subset = ['VAX_LOT'])
        return manufacturerByBatchCodeTable.set_index('VAX_LOT')


In [None]:
class DoseTableFactory:
    
    @staticmethod
    def createDoseTable(dataFrame):
        dataFrame = DataFrameFilter().filterByCovid19(dataFrame)
        return SummationTableFactory.createSummationTableHavingSevereReportsColumn(
            dataFrame.groupby(
                dataFrame['VAX_DOSE_SERIES'].rename('Dose')))

    @staticmethod
    def createDoseByMonthTable(dataFrame):
        dataFrame = DataFrameFilter().filterByCovid19(dataFrame)
        return SummationTableFactory.createSummationTableHavingSevereReportsColumn(
            dataFrame.groupby(
                [
                    dataFrame['RECVDATE'].dt.year.rename('Year'),
                    dataFrame['RECVDATE'].dt.month.rename('Month'),
                    dataFrame['VAX_DOSE_SERIES'].rename('Dose')
                ]))


In [None]:
import pycountry

class CountryColumnAdder:
    
    @staticmethod
    def addCountryColumn(dataFrame, countryColumnName):
        dataFrame[countryColumnName] = dataFrame.apply(
            lambda row:
                CountryColumnAdder._getCountryNameOfSplttypeOrDefault(
                 splttype = row['SPLTTYPE'],
                 default = 'Unknown Country'),
            axis = 'columns')
        return dataFrame.astype({countryColumnName: "string"})

    @staticmethod
    def _getCountryNameOfSplttypeOrDefault(splttype, default):
        if not isinstance(splttype, str):
            return default
        
        country = pycountry.countries.get(alpha_2 = splttype[:2])
        return country.name if country is not None else default

In [None]:
import pycountry

class SevereColumnAdder:
    
    @staticmethod
    def addSevereColumn(dataFrame):
        dataFrame['SEVERE'] = (dataFrame['DIED'] + dataFrame['L_THREAT'] + dataFrame['DISABLE']) > 0
        dataFrame['SEVERE'].replace({True: 1, False: 0}, inplace = True)
        return dataFrame


In [None]:
class InternationalLotTableFactory:
    
    @staticmethod
    def createInternationalLotTable(dataFrame):
        dataFrame = DataFrameFilter().filterByCovid19(dataFrame)
        internationalLotTable = InternationalLotTableFactory._createInternationalLotTable(dataFrame)
        return internationalLotTable.sort_values(by = 'Severe reports (%)', ascending = False)

    @staticmethod
    def createBatchCodeTableByCountry(dataFrame : pd.DataFrame, country):
        dataFrame = DataFrameFilter().filterByCovid19(dataFrame)
        batchCodeTable = InternationalLotTableFactory._createBatchCodeTableByCountry(dataFrame, country)
        return batchCodeTable.sort_values(by = 'Severe reports (%)', ascending = False)

    @staticmethod
    def _createInternationalLotTable(dataFrame):
        countryColumnName = 'Country'
        dataFrame = CountryColumnAdder.addCountryColumn(dataFrame, countryColumnName = countryColumnName)
        return SummationTableFactory.createSummationTableHavingSevereReportsColumn(dataFrame.groupby(dataFrame[countryColumnName]))

    @staticmethod
    def _createBatchCodeTableByCountry(dataFrame : pd.DataFrame, country):
        countryColumnName = 'Country'
        dataFrame = CountryColumnAdder.addCountryColumn(dataFrame, countryColumnName = countryColumnName)
        dataFrame = DataFrameFilter().filterByCountry(dataFrame, country = country, countryColumnName = countryColumnName)
        return SummationTableFactory.createSummationTableHavingSevereReportsColumn(dataFrame.groupby('VAX_LOT'))


In [None]:
import os

class IOUtils:

    @staticmethod
    def saveDataFrame(dataFrame, file):
        IOUtils.saveDataFrameAsExcelFile(dataFrame, file)
        IOUtils.saveDataFrameAsHtml(dataFrame, file)
        IOUtils.saveDataFrameAsJson(dataFrame, file)

    @staticmethod
    def saveDataFrameAsExcelFile(dataFrame, file):
        IOUtils.ensurePath(file)
        dataFrame.to_excel(file + '.xlsx')

    @staticmethod
    def saveDataFrameAsHtml(dataFrame, file):
        IOUtils.ensurePath(file)
        dataFrame.reset_index().to_html(
            file + '.html',
            index = False,
            table_id = 'batchCodeTable',
            classes = 'display',
            justify = 'unset',
            border = 0)

    @staticmethod
    def saveDataFrameAsJson(dataFrame, file):
        IOUtils.ensurePath(file)
        dataFrame.reset_index().to_json(
            file + '.json',
            orient = "split",
            index = False)

    @staticmethod
    def ensurePath(file):
        directory = os.path.dirname(file)
        if not os.path.exists(directory):
            os.makedirs(directory)


In [None]:
import unittest

In [None]:
class TestHelper:

    @staticmethod
    def createDataFrame(index, columns, data, dtypes = {}):
        return pd.DataFrame(index = index, columns = columns, data = data).astype(dtypes)


In [None]:
from pandas.testing import assert_frame_equal

class DataFrameNormalizerTest(unittest.TestCase):

    def test_convertVAX_LOTColumnToUpperCase(self):
        # Given
        dataFrame = TestHelper.createDataFrame(
            columns = ['VAX_LOT'],
            data = [  ['037K20A'],
                      ['025l20A'],
                      ['025L20A']],
            index = [
                "0916600",
                "0916601",
                "1996874"])
            
        # When
        DataFrameNormalizer.convertVAX_LOTColumnToUpperCase(dataFrame)
        
        # Then
        dataFrameExpected = TestHelper.createDataFrame(
            columns = ['VAX_LOT'],
            data = [  ['037K20A'],
                      ['025L20A'],
                      ['025L20A']],
            index = [
                "0916600",
                "0916601",
                "1996874"])
        assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)


In [None]:
from pandas.testing import assert_frame_equal

class DataFrameFilterTest(unittest.TestCase):

    def test_filterByFlu(self):
        # Given
        dataFrame = TestHelper.createDataFrame(
            columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE',   'VAX_MANU',                    'VAX_LOT', 'VAX_DOSE_SERIES'],
            data = [  [0,       0,         0,         'FLU(H1N1)',  'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5',   '1'],
                      [0,       0,         0,         'FLU3',       'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5',   '1'],
                      [0,       0,         0,         'FLU4',       'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5',   '1'],
                      [0,       0,         0,         'FLUA3',      'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5',   '1'],
                      [0,       0,         0,         'FLUA4',      'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5',   '1'],
                      [0,       0,         0,         'FLUC3',      'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5',   '1'],
                      [0,       0,         0,         'FLUC4',      'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5',   '1'],
                      [0,       0,         0,         'FLUN(H1N1)', 'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5',   '1'],
                      [0,       0,         0,         'FLUN3',      'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5',   '1'],
                      [0,       0,         0,         'FLUN4',      'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5',   '1'],
                      [0,       0,         0,         'FLUR3',      'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5',   '1'],
                      [0,       0,         0,         'FLUR4',      'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5',   '1'],
                      [0,       0,         0,         'FLUX',       'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5',   '1'],
                      [0,       0,         0,         'FLUX(H1N1)', 'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5',   '1'],
                      [0,       0,         1,         'COVID19',    'MODERNA',                     '025L20A', '1']],
            index = [
                "801410",
                "801411",
                "801412",
                "801413",
                "801414",
                "801415",
                "801416",
                "801417",
                "801418",
                "801419",
                "801420",
                "801421",
                "801422",
                "801423",
                "801424"])
        dataFrameFilter = DataFrameFilter()
            
        # When
        dataFrameActual = dataFrameFilter.filterByFlu(dataFrame)
        
        # Then
        dataFrameExpected = TestHelper.createDataFrame(
            columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE',   'VAX_MANU',                   'VAX_LOT',  'VAX_DOSE_SERIES'],
            data = [  [0,       0,         0,         'FLU(H1N1)',  'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5',   '1'],
                      [0,       0,         0,         'FLU3',       'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5',   '1'],
                      [0,       0,         0,         'FLU4',       'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5',   '1'],
                      [0,       0,         0,         'FLUA3',      'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5',   '1'],
                      [0,       0,         0,         'FLUA4',      'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5',   '1'],
                      [0,       0,         0,         'FLUC3',      'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5',   '1'],
                      [0,       0,         0,         'FLUC4',      'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5',   '1'],
                      [0,       0,         0,         'FLUN(H1N1)', 'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5',   '1'],
                      [0,       0,         0,         'FLUN3',      'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5',   '1'],
                      [0,       0,         0,         'FLUN4',      'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5',   '1'],
                      [0,       0,         0,         'FLUR3',      'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5',   '1'],
                      [0,       0,         0,         'FLUR4',      'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5',   '1'],
                      [0,       0,         0,         'FLUX',       'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5',   '1'],
                      [0,       0,         0,         'FLUX(H1N1)', 'GLAXOSMITHKLINE BIOLOGICALS', '5R3J5',   '1']],
            index = [
                "801410",
                "801411",
                "801412",
                "801413",
                "801414",
                "801415",
                "801416",
                "801417",
                "801418",
                "801419",
                "801420",
                "801421",
                "801422",
                "801423"])
        assert_frame_equal(dataFrameActual, dataFrameExpected, check_dtype = False)

    def test_filterByCovid19_filterBy(self):
        # Given
        dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(
            [
                {
                    'VAERSDATA': TestHelper.createDataFrame(
                        columns = ['DIED', 'L_THREAT', 'DISABLE'],
                        data = [  [1,      0,          0],
                                  [0,      0,          1]],
                        index = [
                            "0916600",
                            "0916601"]),
                    'VAERSVAX': TestHelper.createDataFrame(
                        columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],
                        data = [  ['COVID19',  'MODERNA',  '037K20A', '1'],
                                  ['COVID19',  'MODERNA',  '025L20A', '1']],
                        index = [
                            "0916600",
                            "0916601"],
                        dtypes = {'VAX_DOSE_SERIES': "string"})
                },
                {
                        'VAERSDATA': TestHelper.createDataFrame(
                        columns = ['DIED', 'L_THREAT', 'DISABLE'],
                        data = [  [0,       0,         0],
                                  [0,       0,         1]],
                            index = [
                            "1996873",
                            "1996874"]),
                        'VAERSVAX': TestHelper.createDataFrame(
                            columns = ['VAX_TYPE', 'VAX_MANU',         'VAX_LOT', 'VAX_DOSE_SERIES'],
                            data = [  ['HPV9',     'MERCK & CO. INC.', 'R017624', 'UNK'],
                                      ['COVID19',  'MODERNA',          '025L20A', '1']],
                            index = [
                                "1996873",
                                "1996874"],
                            dtypes = {'VAX_DOSE_SERIES': "string"})
                    }
            ])
        dataFrameFilter = DataFrameFilter()
            
        # When
        dataFrame = dataFrameFilter.filterByCovid19(dataFrame)
        dataFrame = dataFrameFilter.filterBy(dataFrame, manufacturer = "MODERNA", dose = '1')
        
        # Then
        dataFrameExpected = TestHelper.createDataFrame(
            columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],
            data = [  [1,       0,         0,         'COVID19',  'MODERNA',  '037K20A', '1'],
                      [0,       0,         1,         'COVID19',  'MODERNA',  '025L20A', '1'],
                      [0,       0,         1,         'COVID19',  'MODERNA',  '025L20A', '1']],
            index = [
                "0916600",
                "0916601",
                "1996874"],
            dtypes = {'VAX_DOSE_SERIES': "string"})
        assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)

    def test_filterByDose(self):
        # Given
        dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(
            [
                {
                    'VAERSDATA': TestHelper.createDataFrame(
                        columns = ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'],
                        data = [  [1,      1,          0,         1,          1],
                                  [0,      0,          1,         0,          1]],
                        index = [
                            "0916600",
                            "0916601"]),
                    'VAERSVAX': TestHelper.createDataFrame(
                        columns = ['VAX_TYPE', 'VAX_MANU',        'VAX_LOT', 'VAX_DOSE_SERIES'],
                        data = [  ['COVID19',  'MODERNA',         '037K20A', '1'],
                                  ['COVID19',  'PFIZER\BIONTECH', '025L20A', '1']],
                        index = [
                            "0916600",
                            "0916601"],
                        dtypes = {'VAX_DOSE_SERIES': "string"})
                }
            ])
        dataFrameFilter = DataFrameFilter()
        dataFrame = dataFrameFilter.filterByCovid19(dataFrame)

        # When
        dataFrame = dataFrameFilter.filterBy(dataFrame, dose = '1')
        
        # Then
        dataFrameExpected = TestHelper.createDataFrame(
            columns = ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT', 'VAX_TYPE', 'VAX_MANU',        'VAX_LOT', 'VAX_DOSE_SERIES'],
            data = [  [1,      1,          0,         1,          1,          'COVID19',  'MODERNA',         '037K20A', '1'],
                      [0,      0,          1,         0,          1,          'COVID19',  'PFIZER\BIONTECH', '025L20A', '1']],
            index = [
                "0916600",
                "0916601"],
            dtypes = {'VAX_DOSE_SERIES': "string"})
        assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)

    def test_filterByFirstDose(self):
        # Given
        dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(
            [
                {
                    'VAERSDATA': TestHelper.createDataFrame(
                        columns = ['DIED', 'L_THREAT', 'DISABLE'],
                        data = [  [1,      0,          0]],
                        index = [
                            "1048786"]),
                    'VAERSVAX': TestHelper.createDataFrame(
                        columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],
                        data = [  ['COVID19',  'MODERNA',  '016M20A', '2'],
                                  ['COVID19',  'MODERNA',  '030L20A', '1']],
                        index = [
                            "1048786",
                            "1048786"],
                        dtypes = {'VAX_DOSE_SERIES': "string"})
                }
            ])
        dataFrameFilter = DataFrameFilter()
            
        # When
        dataFrame = dataFrameFilter.filterByCovid19(dataFrame)
        dataFrame = dataFrameFilter.filterBy(dataFrame, manufacturer = "MODERNA", dose = '1')
        
        # Then
        dataFrameExpected = TestHelper.createDataFrame(
            columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],
            data = [  [1,      0,          0,         'COVID19',  'MODERNA',  '030L20A', '1']],
            index = [
                "1048786"],
            dtypes = {'VAX_DOSE_SERIES': "string"})
        assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)

    def test_filterBySecondDose(self):
        # Given
        dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(
            [
                {
                    'VAERSDATA': TestHelper.createDataFrame(
                        columns = ['DIED', 'L_THREAT', 'DISABLE'],
                        data = [  [1,      0,          0]],
                        index = [
                            "1048786"]),
                    'VAERSVAX': TestHelper.createDataFrame(
                        columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],
                        data = [  ['COVID19',  'MODERNA',  '016M20A',  '2'],
                                  ['COVID19',  'MODERNA',  '030L20A',  '1']],
                        index = [
                            "1048786",
                            "1048786"],
                        dtypes = {'VAX_DOSE_SERIES': "string"})
                }
            ])
        dataFrameFilter = DataFrameFilter()

        # When
        dataFrame = dataFrameFilter.filterByCovid19(dataFrame)
        dataFrame = dataFrameFilter.filterBy(dataFrame, manufacturer = "MODERNA", dose = '2')
        
        # Then
        dataFrameExpected = TestHelper.createDataFrame(
            columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],
            data = [  [1,      0,          0,         'COVID19',  'MODERNA',  '016M20A', '2']],
            index = [
                "1048786"],
            dtypes = {'VAX_DOSE_SERIES': "string"})
        assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)


In [None]:
from pandas.testing import assert_frame_equal

class BatchCodeTableFactoryTest(unittest.TestCase):

    def test_createSevereEffectsBatchCodeTable(self):
        # Given
        dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(
            [
                {
                    'VAERSDATA': TestHelper.createDataFrame(
                        columns = ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'],
                        data = [  [1,      1,          0,         1,          1],
                                  [0,      0,          1,         0,          1]],
                        index = [
                            "0916600",
                            "0916601"]),
                    'VAERSVAX': TestHelper.createDataFrame(
                        columns = ['VAX_TYPE', 'VAX_MANU',        'VAX_LOT', 'VAX_DOSE_SERIES'],
                        data = [  ['COVID19',  'MODERNA',         '037K20A', '1'],
                                  ['COVID19',  'PFIZER\BIONTECH', '025L20A', '1']],
                        index = [
                            "0916600",
                            "0916601"],
                        dtypes = {'VAX_DOSE_SERIES': "string"})
                }
            ])
        dataFrame = SevereColumnAdder.addSevereColumn(dataFrame)

        # When
        batchCodeTable = BatchCodeTableFactory.createSevereEffectsBatchCodeTable(dataFrame, '1')

        # Then
        batchCodeTableExpected = pd.DataFrame(
            data = {
                'Total Number of Adverse Reaction Reports': [1, 1],
                'Deaths': [0, 1],
                'Disabilities': [1, 0],
                'Life Threatening Illnesses': [0, 1],
                'Hospitalisations': [0, 1],
                'Emergency Room or Doctor Visits': [1, 1],
                'Company': ['PFIZER\BIONTECH', 'MODERNA']
            },
            index = pd.Index(['025L20A', '037K20A'], name = 'VAX_LOT'))
        assert_frame_equal(batchCodeTable, batchCodeTableExpected, check_dtype = False)

    def test_createBatchCodeTable(self):
        # Given
        dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(
            [
               {
                   'VAERSDATA': TestHelper.createDataFrame(
                       columns = ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'],
                       data = [  [1,      0,          0,         0,          0],
                                 [0,      0,          1,         0,          0]],
                       index = [
                           "0916600",
                           "0916601"]),
                   'VAERSVAX': TestHelper.createDataFrame(
                       columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],
                       data = [  ['COVID19',  'MODERNA',  '037K20A', '1'],
                                 ['COVID19',  'MODERNA',  '025L20A', '1']],
                       index = [
                           "0916600",
                           "0916601"],
                       dtypes = {'VAX_DOSE_SERIES': "string"})
               },
               {
                    'VAERSDATA': TestHelper.createDataFrame(
                       columns = ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'],
                       data = [  [0,      0,          0,         0,          0],
                                 [0,      0,          1,         0,          0]],
                       index = [
                           "1996873",
                           "1996874"]),
                    'VAERSVAX': TestHelper.createDataFrame(
                        columns = ['VAX_TYPE', 'VAX_MANU',         'VAX_LOT', 'VAX_DOSE_SERIES'],
                        data = [  ['HPV9',     'MERCK & CO. INC.', 'R017624', 'UNK'],
                                  ['COVID19',  'MODERNA',          '025L20A', '1']],
                        index = [
                            "1996873",
                            "1996874"],
                        dtypes = {'VAX_DOSE_SERIES': "string"})
                }
            ])
        self._test_createBatchCodeTable(dataFrame, '1')

    def test_createBatchCodeTable_minADRsForLethality(self):
        # Given
        dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(
            [
               {
                   'VAERSDATA': TestHelper.createDataFrame(
                       columns = ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'],
                       data = [  [1,      0,          0,         0,          0],
                                 [0,      0,          1,         0,          0]],
                       index = [
                           "0916600",
                           "0916601"]),
                   'VAERSVAX': TestHelper.createDataFrame(
                       columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],
                       data = [  ['COVID19',  'MODERNA',  '037K20A', '1'],
                                 ['COVID19',  'MODERNA',  '025L20A', '1']],
                       index = [
                           "0916600",
                           "0916601"],
                       dtypes = {'VAX_DOSE_SERIES': "string"})
               },
               {
                    'VAERSDATA': TestHelper.createDataFrame(
                       columns = ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'],
                       data = [  [0,      0,          0,         0,          0],
                                 [0,      0,          1,         0,          0]],
                       index = [
                           "1996873",
                           "1996874"]),
                    'VAERSVAX': TestHelper.createDataFrame(
                        columns = ['VAX_TYPE', 'VAX_MANU',         'VAX_LOT', 'VAX_DOSE_SERIES'],
                        data = [  ['HPV9',     'MERCK & CO. INC.', 'R017624', 'UNK'],
                                  ['COVID19',  'MODERNA',          '025L20A', '1']],
                        index = [
                            "1996873",
                            "1996874"],
                        dtypes = {'VAX_DOSE_SERIES': "string"})
                }
            ])
        dataFrame = SevereColumnAdder.addSevereColumn(dataFrame)

        # When
        batchCodeTable = BatchCodeTableFactory.createBatchCodeTable(dataFrame, dose = '1', minADRsForLethality = 2)

        # Then
        batchCodeTableExpected = pd.DataFrame(
            data = {
                'Total Number of Adverse Reaction Reports': [2,                   1],
                'Deaths':                                   [0,                   1],
                'Disabilities':                             [2,                   0],
                'Life Threatening Illnesses':               [0,                   0],
                'Company':                                  ['MODERNA',           'MODERNA'],
                'Lethality':                                [0/2 * 100,           np.nan]
            },
            index = pd.Index(['025L20A', '037K20A'], name = 'VAX_LOT'))
        assert_frame_equal(batchCodeTable, batchCodeTableExpected, check_dtype = False)

    def test_createBatchCodeTableFromFiles(self):
        dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(
            VaersDescrReader(dataDir = "test/VAERS").readVaersDescrs(["2021", "2022"]))
        DataFrameNormalizer.normalize(dataFrame)
        self._test_createBatchCodeTable(dataFrame, '1')

    def _test_createBatchCodeTable(self, dataFrame, dose):
        dataFrame = SevereColumnAdder.addSevereColumn(dataFrame)

        # When
        batchCodeTable = BatchCodeTableFactory.createBatchCodeTable(dataFrame, dose)

        # Then
        batchCodeTableExpected = pd.DataFrame(
            data = {
                'Total Number of Adverse Reaction Reports': [2,                   1],
                'Deaths':                                   [0,                   1],
                'Disabilities':                             [2,                   0],
                'Life Threatening Illnesses':               [0,                   0],
                'Company':                                  ['MODERNA',           'MODERNA'],
                'Lethality':                                [0/2 * 100,           1/1 * 100]
            },
            index = pd.Index(['025L20A', '037K20A'], name = 'VAX_LOT'))
        assert_frame_equal(batchCodeTable, batchCodeTableExpected, check_dtype = False)


In [None]:
from pandas.testing import assert_frame_equal

class DoseTableFactoryTest(unittest.TestCase):

    def test_createDoseTable(self):
        # Given
        dataFrame = TestHelper.createDataFrame(
            columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES', 'HOSPITAL', 'ER_VISIT'],
            data = [  [1,      0,          0,         'COVID19',  'MODERNA',  '016M20A', '2',               0,          0],
                      [1,      0,          0,         'COVID19',  'MODERNA',  '030L20A', '1',               0,          0],
                      [1,      1,          1,         'COVID19',  'MODERNA',  '030L20B', '1',               0,          0]],
            index = [
                "1048786",
                "1048786",
                "4711"],
                dtypes = {'VAX_DOSE_SERIES': "string"})
        dataFrame = SevereColumnAdder.addSevereColumn(dataFrame)
        
        # When
        doseTable = DoseTableFactory.createDoseTable(dataFrame)

        # Then
        assert_frame_equal(
            doseTable,
            pd.DataFrame(
                data = {
                    'Total Number of Adverse Reaction Reports': [2,         1],
                    'Deaths':                                   [2,         1],
                    'Disabilities':                             [1,         0],
                    'Life Threatening Illnesses':               [1,         0],
                    'Severe reports (%)':                       [2/2 * 100, 1/1 * 100]
                },
                index = pd.Index(['1', '2'], dtype = "string", name = 'Dose')))
                
    def test_createDoseByMonthTable(self):
        # Given
        parseDate = lambda dateStr: pd.to_datetime(dateStr, format = "%m/%d/%Y")
        dataFrame = TestHelper.createDataFrame(
            columns = ['RECVDATE',             'DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES', 'HOSPITAL', 'ER_VISIT'],
            data = [  [parseDate('01/01/2021'), 1,     0,          0,         'COVID19',  'MODERNA',  '016M20A', '2',               0,          0],
                      [parseDate('01/01/2021'), 1,     0,          0,         'COVID19',  'MODERNA',  '030L20A', '1',               0,          0],
                      [parseDate('01/01/2021'), 1,     1,          1,         'COVID19',  'MODERNA',  '030L20B', '1',               0,          0]],
            index = [
                "1048786",
                "1048786",
                "4711"],
            dtypes = {'VAX_DOSE_SERIES': "string"})
        dataFrame = SevereColumnAdder.addSevereColumn(dataFrame)
        
        # When
        doseByMonthTable = DoseTableFactory.createDoseByMonthTable(dataFrame)

        # Then
        assert_frame_equal(
            doseByMonthTable,
            pd.DataFrame(
                data = {
                    'Total Number of Adverse Reaction Reports': [2,         1],
                    'Deaths':                                   [2,         1],
                    'Disabilities':                             [1,         0],
                    'Life Threatening Illnesses':               [1,         0],
                    'Severe reports (%)':                       [2/2 * 100, 1/1 * 100]
                },
                index = pd.MultiIndex.from_tuples(
                    [
                        (2021,  1, '1'),
                        (2021,  1, '2'),
                    ],
                names = ('Year', 'Month', 'Dose'))),
                check_index_type = False)


In [None]:
from pandas.testing import assert_frame_equal

class InternationalLotTableFactoryTest(unittest.TestCase):

    def test_createInternationalLotTable(self):
        # Given
        dataFrame = TestHelper.createDataFrame(
            columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES', 'SPLTTYPE',                  'HOSPITAL', 'ER_VISIT'],
            data = [  [1,      0,          0,         'COVID19',  'MODERNA',  '016M20A', '2',               'GBPFIZER INC2020486806',    0,          0],
                      [1,      0,          0,         'COVID19',  'MODERNA',  '030L20A', '1',               'FRMODERNATX, INC.MOD20224', 0,          0],
                      [1,      1,          1,         'COVID19',  'MODERNA',  '030L20B', '1',               'FRMODERNATX, INC.MOD20224', 0,          0],
                      [0,      0,          0,         'COVID19',  'MODERNA',  '030L20B', '1',               'dummy'],
                      [0,      0,          0,         'COVID19',  'MODERNA',  '030L20B', '1',               123]],
            index = [
                "1048786",
                "1048786",
                "4711",
                "0815",
                "0816"])
        dataFrame = SevereColumnAdder.addSevereColumn(dataFrame)
        
        # When
        internationalLotTable = InternationalLotTableFactory.createInternationalLotTable(dataFrame)

        # Then
        assert_frame_equal(
            internationalLotTable,
            TestHelper.createDataFrame(
                columns = ['Total Number of Adverse Reaction Reports', 'Deaths', 'Disabilities', 'Life Threatening Illnesses', 'Severe reports (%)'],
                data = [  [2,                                          2,        1,              1,                            2/2 * 100],
                          [1,                                          1,        0,              0,                            1/1 * 100],
                          [2,                                          0,        0,              0,                            0/2 * 100]],
                index = pd.Index(
                    [
                        'France',
                        'United Kingdom',
                        'Unknown Country'
                    ],
                    dtype = "string",
                    name = 'Country')))

    def test_createBatchCodeTableByCountry(self):
        # Given
        dataFrame = TestHelper.createDataFrame(
            columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES', 'SPLTTYPE',                  'HOSPITAL', 'ER_VISIT'],
            data = [  [1,      0,          0,         'COVID19',  'MODERNA',  '016M20A', '2',               'GBPFIZER INC2020486806',    0,          0],
                      [0,      0,          0,         'COVID19',  'MODERNA',  '030L20A', '1',               'FRMODERNATX, INC.MOD20224', 0,          0],
                      [1,      1,          1,         'COVID19',  'MODERNA',  '030L20B', '1',               'FRMODERNATX, INC.MOD20224', 0,          0],
                      [0,      1,          1,         'COVID19',  'MODERNA',  '030L20B', '1',               'FRMODERNATX, INC.MOD20224', 0,          0]],
            index = [
                "1048786",
                "1048786",
                "4711",
                "0815"])
        dataFrame = SevereColumnAdder.addSevereColumn(dataFrame)
        
        # When
        batchCodeTable = InternationalLotTableFactory.createBatchCodeTableByCountry(dataFrame, 'France')

        # Then
        assert_frame_equal(
            batchCodeTable,
            TestHelper.createDataFrame(
                columns = ['Total Number of Adverse Reaction Reports', 'Deaths', 'Disabilities', 'Life Threatening Illnesses', 'Severe reports (%)'],
                data = [  [2,                                          1,        2,              2,                            2/2 * 100],
                          [1,                                          0,        0,              0,                            0/1 * 100]],
                index = pd.Index(
                    [
                        '030L20B',
                        '030L20A'
                    ],
                    name = 'VAX_LOT')),
            check_dtype = False)


In [None]:
unittest.main(argv = [''], verbosity = 2, exit = False)

In [None]:
def getVaers(vaersDescrsReaderFunc):
    vaersDescrs = vaersDescrsReaderFunc()
    dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)
    DataFrameNormalizer.normalize(dataFrame)
    dataFrame = SevereColumnAdder.addSevereColumn(dataFrame)
    return dataFrame

def getVaersDescrReader():
    return VaersDescrReader(dataDir = "VAERS")

def getAllVaers():
    return getVaers(lambda: getVaersDescrReader().readVaersDescrs(["2021", "2022"]))

def getNonDomesticVaers():
    return getVaers(lambda: [getVaersDescrReader().readNonDomesticVaersDescr()])

def getInternationalVaers():
    return pd.concat([getAllVaers(), getNonDomesticVaers()])

def getVaersForYear(year):
    return getVaers(lambda: [getVaersDescrReader().readVaersDescr(year)])

In [None]:
vaers = getAllVaers()

In [None]:
nonDomesticVaers = getNonDomesticVaers()

In [None]:
internationalVaers = getInternationalVaers()

### Batch codes

In [None]:
def saveBatchCodeTable(vaers, file):
    batchCodeTable = BatchCodeTableFactory.createBatchCodeTable(vaers, dose = '1', minADRsForLethality = 100)
    batchCodeTable.index.set_names("Batch", inplace = True)
    display(batchCodeTable)
    IOUtils.saveDataFrame(batchCodeTable, file)

In [None]:
# https://www.howbadismybatch.com/moderna.html
saveBatchCodeTable(internationalVaers, "../data/batchCodeTable")

### Short-list of 2000 batches having severe effects

In [None]:
def saveSevereEffectsBatchCodeTable(vaers, file):
    severeEffectsBatchCodeTable = BatchCodeTableFactory.createSevereEffectsBatchCodeTable(vaers, dose = '1')
    display(severeEffectsBatchCodeTable)
    IOUtils.saveDataFrame(severeEffectsBatchCodeTable, file)

In [None]:
saveSevereEffectsBatchCodeTable(vaers, 'results/severeEffects')

### Variation in Effect of First and Second Doses

In [None]:
# https://www.howbadismybatch.com/firstsecond.html
DoseTableFactory.createDoseTable(vaers)

In [None]:
doseByMonthTable = DoseTableFactory.createDoseByMonthTable(vaers)
IOUtils.saveDataFrame(doseByMonthTable, 'results/firstsecond/doseByMonthTable')
doseByMonthTable

### International Deadly Lots

In [None]:
# https://www.howbadismybatch.com/international.html

In [None]:
internationalLotTable = InternationalLotTableFactory.createInternationalLotTable(nonDomesticVaers)

In [None]:
internationalLotTable = internationalLotTable[internationalLotTable['Total Number of Adverse Reaction Reports'] > 50]
IOUtils.saveDataFrame(internationalLotTable, 'results/international/International_Deadly_Lots')
internationalLotTable

In [None]:
def createAndSaveAndDisplayBatchCodeTableByCountry(nonDomesticVaers, country):
    batchCodeTable = InternationalLotTableFactory.createBatchCodeTableByCountry(nonDomesticVaers, country)
    batchCodeTable = batchCodeTable[batchCodeTable['Total Number of Adverse Reaction Reports'] > 50]
    IOUtils.saveDataFrame(batchCodeTable, 'results/international/' + country)
    display(country + ":", batchCodeTable)

def createAndSaveAndDisplayBatchCodeTablesByCountry(nonDomesticVaers, countries):
    for country in countries:
        createAndSaveAndDisplayBatchCodeTableByCountry(nonDomesticVaers, country)

In [None]:
createAndSaveAndDisplayBatchCodeTablesByCountry(
    nonDomesticVaers,
    [
        'United Kingdom',
        'France',
        'Germany',
        'Japan',
        'Italy',
        'Austria',
        'Netherlands',
        'Spain',
        'Belgium',
        'Sweden',
        'Portugal',
        'Australia'
    ])

### Batch Clusters

#### Pfizer Batches

see https://www.howbadismybatch.com/clusters.html

In [None]:
def createADRsByVAX_LOTTable(vaers, manufacturer):
    dataFrame = DataFrameFilter().filterByCovid19(vaers)
    dataFrame = DataFrameFilter().filterBy(dataFrame, manufacturer = manufacturer)
    batchCodeTable = BatchCodeTableFactory._createSummationTableByVAX_LOT(dataFrame)[['Total Number of Adverse Reaction Reports']].reset_index()
    return batchCodeTable

def filterColumnOfDataFrameWithRegexp(dataFrame, column, regexp):
    return dataFrame[dataFrame[column].apply(lambda columnValue: bool(regexp.match(columnValue)))]


In [None]:
import re

batchCodeTable = createADRsByVAX_LOTTable(vaers, "PFIZER\BIONTECH")
batchCodeTable['VAX_LOT_PREFIX'] = batchCodeTable['VAX_LOT'].str[:2]
batchCodeTable = batchCodeTable.sort_values(by = 'VAX_LOT_PREFIX', ascending = True)
twoLetterPrefix = re.compile(r'^[a-zA-Z]{2}')
batchCodeTable = filterColumnOfDataFrameWithRegexp(dataFrame = batchCodeTable, column = 'VAX_LOT_PREFIX', regexp = twoLetterPrefix)
batchCodeTable = batchCodeTable[batchCodeTable['VAX_LOT_PREFIX'].isin(['EN', 'EP', 'ER', 'EW', 'FA', 'FC', 'FD', 'FE', 'FH'])]
batchCodeTable = batchCodeTable[batchCodeTable['Total Number of Adverse Reaction Reports'] > 400]
batchCodeTable

In [None]:
import seaborn as sns

sns.set(rc = {'figure.figsize': (11.7, 8.27)})
sns.set_theme()
chart = sns.stripplot(x = "VAX_LOT_PREFIX", y = "Total Number of Adverse Reaction Reports", data = batchCodeTable)

In [None]:
sns.pointplot(x = "VAX_LOT_PREFIX", y = "Total Number of Adverse Reaction Reports", data = batchCodeTable, estimator = np.mean)

In [None]:
import seaborn as sns
sns.set_theme(style = "ticks", palette = "pastel")

sns.boxplot(x = "VAX_LOT_PREFIX", y = "Total Number of Adverse Reaction Reports", data = batchCodeTable)

#### Moderna Batches

In [None]:
import re

batchCodeTable = createADRsByVAX_LOTTable(vaers, "MODERNA")
modernaBatchCodePrefix = re.compile(r'^[0-9]{3}[a-zA-Z]')
batchCodeTable = filterColumnOfDataFrameWithRegexp(dataFrame = batchCodeTable, column = 'VAX_LOT', regexp = modernaBatchCodePrefix)
batchCodeTable['CONCENTRATION'] = batchCodeTable['VAX_LOT'].str[3]
batchCodeTable = batchCodeTable.sort_values(by = 'CONCENTRATION', ascending = True)
batchCodeTable = batchCodeTable[batchCodeTable['Total Number of Adverse Reaction Reports'] > 400]
batchCodeTable

In [None]:
import seaborn as sns

order = ['J', 'K', 'L', 'M', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']

sns.set(rc = {'figure.figsize': (11.7, 8.27)})
sns.set_theme()
chart = sns.stripplot(x = "CONCENTRATION", y = "Total Number of Adverse Reaction Reports", data = batchCodeTable, order = order)

In [None]:
sns.pointplot(x = "CONCENTRATION", y = "Total Number of Adverse Reaction Reports", data = batchCodeTable, estimator = np.mean, order = order)

In [None]:
import seaborn as sns
sns.set_theme(style = "ticks", palette = "pastel")

sns.boxplot(x = "CONCENTRATION", y = "Total Number of Adverse Reaction Reports", data = batchCodeTable, order = order)

### COVID-19 Vaccines vs. Flu Vaccines

see https://www.bitchute.com/video/4HlIyBmOEJeY/ and https://www.bitchute.com/video/8wJYP2NpGwN2/