Files
HowBadIsMyBatch/HowBadIsMyBatch.ipynb
frankknoll ac2deaef81 refactoring
2022-02-01 08:51:55 +01:00

1261 lines
47 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "9de5907f-18f5-4cb1-903e-26028ff1fa03",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"pd.set_option('display.max_rows', 100)\n",
"pd.set_option('display.max_columns', None)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "a271254b",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"class VaersDescrReader:\n",
" \n",
" def __init__(self, dataDir):\n",
" self.dataDir = dataDir \n",
"\n",
" def readAllVaersDescrs(self):\n",
" return self.readVaersDescrs([\"2021\", \"2022\"])\n",
" \n",
" def readVaersDescrs(self, years):\n",
" return [self.readVaersDescr(year) for year in years]\n",
"\n",
" def readVaersDescr(self, year):\n",
" folder = self.dataDir + \"/\" + year + \"VAERSData/\"\n",
" return {\n",
" 'VAERSDATA':\n",
" self._read_csv(\n",
" folder + year + \"VAERSDATA.csv\",\n",
" ['VAERS_ID', 'DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT']),\n",
" 'VAERSVAX':\n",
" self._read_csv(\n",
" folder + year + \"VAERSVAX.csv\",\n",
" ['VAERS_ID', 'VAX_DOSE_SERIES', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT'],\n",
" dtype = {\"VAX_DOSE_SERIES\": \"string\"})\n",
" }\n",
"\n",
" def _read_csv(self, file, usecols, dtype = {}):\n",
" return pd.read_csv(\n",
" file,\n",
" index_col = 'VAERS_ID',\n",
" encoding = 'latin1',\n",
" low_memory = False,\n",
" usecols = usecols,\n",
" dtype = dtype)\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "7b5d6df0",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"class VaersDescr2DataFrameConverter:\n",
"\n",
" @staticmethod\n",
" def createDataFrameFromDescr(vaersDescr):\n",
" return pd.merge(\n",
" vaersDescr['VAERSDATA'],\n",
" vaersDescr['VAERSVAX'],\n",
" how = 'left',\n",
" left_index = True,\n",
" right_index = True,\n",
" validate = 'one_to_many')\n",
"\n",
" @staticmethod\n",
" def createDataFrameFromDescrs(vaersDescrs):\n",
" dataFrames = [VaersDescr2DataFrameConverter.createDataFrameFromDescr(vaersDescr) for vaersDescr in vaersDescrs]\n",
" return pd.concat(dataFrames)\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "3ebcba86",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"class DataFrameFilter:\n",
" \n",
" @staticmethod\n",
" def filterDataFrame(df, manufacturer = None, dose = None):\n",
" isCovid19 = df[\"VAX_TYPE\"] == \"COVID19\"\n",
" isManufacturer = df[\"VAX_MANU\"] == manufacturer if manufacturer is not None else True\n",
" isDose = df[\"VAX_DOSE_SERIES\"].str.contains(dose) if dose is not None else True\n",
" return df[isCovid19 & isManufacturer & isDose]\n",
"\n",
" @staticmethod\n",
" def filterDataFrameForSevereEffects(df, dose):\n",
" return DataFrameFilter.filterDataFrame(df, dose = dose)\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "99945ca8",
"metadata": {},
"outputs": [],
"source": [
"def createBatchCodeTable(df : pd.DataFrame):\n",
" def filterDataFrame(df, col):\n",
" return df[df[col] == 'Y'][['VAX_LOT']]\n",
"\n",
" batchCodeTableDict = {\n",
" 'ADRs': df[['VAX_LOT']].value_counts(),\n",
" 'DEATHS': filterDataFrame(df, 'DIED').value_counts(),\n",
" 'DISABILITIES': filterDataFrame(df, 'DISABLE').value_counts(),\n",
" 'LIFE THREATENING ILLNESSES': filterDataFrame(df, 'L_THREAT').value_counts()\n",
" }\n",
" return pd.concat(batchCodeTableDict, axis = 'columns').replace(to_replace = np.nan, value = 0)\n",
"\n",
"def createManufacturerByBatchCodeTable(df):\n",
" manufacturerByBatchCodeTable = df[['VAX_LOT', 'VAX_MANU']]\n",
" manufacturerByBatchCodeTable = manufacturerByBatchCodeTable.drop_duplicates(subset = ['VAX_LOT'])\n",
" return manufacturerByBatchCodeTable.set_index('VAX_LOT')\n",
"\n",
"def createCompanyByBatchCodeTable(df):\n",
" return createManufacturerByBatchCodeTable(df).rename(columns = {\"VAX_MANU\": \"COMPANY\"})\n",
"\n",
"# create table from https://www.howbadismybatch.com/combined.html\n",
"# FK-TODO: DRY with createBatchCodeTable()\n",
"def createSevereEffectsBatchCodeTable(df):\n",
" def filterDataFrame(df, col):\n",
" return df[df[col] == 'Y']['VAX_LOT']\n",
"\n",
" batchCodeTableDict = {\n",
" 'ADRs': df['VAX_LOT'].value_counts(),\n",
" 'DEATHS': filterDataFrame(df, 'DIED').value_counts(),\n",
" 'DISABILITIES': filterDataFrame(df, 'DISABLE').value_counts(),\n",
" 'LIFE THREATENING ILLNESSES': filterDataFrame(df, 'L_THREAT').value_counts(),\n",
" 'HOSPITALISATIONS': filterDataFrame(df, 'HOSPITAL').value_counts(),\n",
" 'EMERGENCY ROOM OR DOCTOR VISITS': filterDataFrame(df, 'ER_VISIT').value_counts()\n",
" }\n",
" batchCodeTable = pd.concat(batchCodeTableDict, axis = 'columns')\n",
" batchCodeTable.index.name = 'VAX_LOT'\n",
" # add Company column:\n",
" batchCodeTable = pd.merge(\n",
" batchCodeTable,\n",
" createCompanyByBatchCodeTable(df),\n",
" how = 'left',\n",
" left_index = True,\n",
" right_index = True,\n",
" validate = 'one_to_one')\n",
" return batchCodeTable.replace(to_replace = np.nan, value = 0)\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "3dacedfd",
"metadata": {},
"outputs": [],
"source": [
"import unittest"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "e59a1825",
"metadata": {},
"outputs": [],
"source": [
"from pandas.testing import assert_frame_equal\n",
"\n",
"class CreateAndFilterDataFrameTest(unittest.TestCase):\n",
"\n",
" def test_createAndFilterDataFrameFromDescrs(self):\n",
" # Given\n",
" vaersDescrs = [\n",
" {\n",
" 'VAERSDATA': self.createDataFrame(\n",
" columns = ['DIED', 'L_THREAT', 'DISABLE'],\n",
" data = [ ['Y', np.NaN, np.NaN],\n",
" [np.NaN, np.NaN, 'Y']],\n",
" index = [\n",
" \"0916600\",\n",
" \"0916601\"]),\n",
" 'VAERSVAX': self.createDataFrame(\n",
" columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
" data = [ ['COVID19', 'MODERNA', '037K20A', '1'],\n",
" ['COVID19', 'MODERNA', '025L20A', '1']],\n",
" index = [\n",
" \"0916600\",\n",
" \"0916601\"],\n",
" dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
" },\n",
" {\n",
" 'VAERSDATA': self.createDataFrame(\n",
" columns = ['DIED', 'L_THREAT', 'DISABLE'],\n",
" data = [ [np.NaN, np.NaN, np.NaN],\n",
" [np.NaN, np.NaN, 'Y']],\n",
" index = [\n",
" \"1996873\",\n",
" \"1996874\"]),\n",
" 'VAERSVAX': self.createDataFrame(\n",
" columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
" data = [ ['HPV9', 'MERCK & CO. INC.', 'R017624', 'UNK'],\n",
" ['COVID19', 'MODERNA', '025L20A', '1']],\n",
" index = [\n",
" \"1996873\",\n",
" \"1996874\"],\n",
" dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
" }\n",
" ]\n",
" dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n",
" \n",
" # When\n",
" dataFrame = DataFrameFilter.filterDataFrame(dataFrame, manufacturer = \"MODERNA\", dose = '1')\n",
" \n",
" # Then\n",
" dataFrameExpected = self.createDataFrame(\n",
" columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
" data = [ ['Y', np.NaN, np.NaN, 'COVID19', 'MODERNA', '037K20A', '1'],\n",
" [np.NaN, np.NaN, 'Y', 'COVID19', 'MODERNA', '025L20A', '1'],\n",
" [np.NaN, np.NaN, 'Y', 'COVID19', 'MODERNA', '025L20A', '1']],\n",
" index = [\n",
" \"0916600\",\n",
" \"0916601\",\n",
" \"1996874\"],\n",
" dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
" assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)\n",
"\n",
" def test_createDataFrameFromForSevereEffects(self):\n",
" # Given\n",
" vaersDescrs = [\n",
" {\n",
" 'VAERSDATA': self.createDataFrame(\n",
" columns = ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'],\n",
" data = [ ['Y', 'Y', np.NaN, 'Y', 'Y'],\n",
" [np.NaN, np.NaN, 'Y', np.NaN, 'Y']],\n",
" index = [\n",
" \"0916600\",\n",
" \"0916601\"]),\n",
" 'VAERSVAX': self.createDataFrame(\n",
" columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
" data = [ ['COVID19', 'MODERNA', '037K20A', '1'],\n",
" ['COVID19', 'PFIZER\\BIONTECH', '025L20A', '1']],\n",
" index = [\n",
" \"0916600\",\n",
" \"0916601\"],\n",
" dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
" }\n",
" ]\n",
" dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n",
" \n",
" # When\n",
" dataFrame = DataFrameFilter.filterDataFrameForSevereEffects(dataFrame, dose = '1')\n",
" \n",
" # Then\n",
" dataFrameExpected = self.createDataFrame(\n",
" columns = ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
" data = [ ['Y', 'Y', np.NaN, 'Y', 'Y', 'COVID19', 'MODERNA', '037K20A', '1'],\n",
" [np.NaN, np.NaN, 'Y', np.NaN, 'Y', 'COVID19', 'PFIZER\\BIONTECH', '025L20A', '1']],\n",
" index = [\n",
" \"0916600\",\n",
" \"0916601\"],\n",
" dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
" assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)\n",
"\n",
" def test_createAndFilterDataFrameFromDescrsWithFirstDose(self):\n",
" # Given\n",
" vaersDescrs = [\n",
" {\n",
" 'VAERSDATA': self.createDataFrame(\n",
" columns = ['DIED', 'L_THREAT', 'DISABLE'],\n",
" data = [ ['Y', np.NaN, np.NaN]],\n",
" index = [\n",
" \"1048786\"]),\n",
" 'VAERSVAX': self.createDataFrame(\n",
" columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
" data = [ ['COVID19', 'MODERNA', '016M20A', '2'],\n",
" ['COVID19', 'MODERNA', '030L20A', '1']],\n",
" index = [\n",
" \"1048786\",\n",
" \"1048786\"],\n",
" dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
" }\n",
" ]\n",
" dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n",
" \n",
" # When\n",
" dataFrame = DataFrameFilter.filterDataFrame(dataFrame, manufacturer = \"MODERNA\", dose = '1')\n",
" \n",
" # Then\n",
" dataFrameExpected = self.createDataFrame(\n",
" columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
" data = [ ['Y', np.NaN, np.NaN, 'COVID19', 'MODERNA', '030L20A', '1']],\n",
" index = [\n",
" \"1048786\"],\n",
" dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
" assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)\n",
"\n",
" def test_createAndFilterDataFrameFromDescrsWithSecondDose(self):\n",
" # Given\n",
" vaersDescrs = [\n",
" {\n",
" 'VAERSDATA': self.createDataFrame(\n",
" columns = ['DIED', 'L_THREAT', 'DISABLE'],\n",
" data = [ ['Y', np.NaN, np.NaN]],\n",
" index = [\n",
" \"1048786\"]),\n",
" 'VAERSVAX': self.createDataFrame(\n",
" columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
" data = [ ['COVID19', 'MODERNA', '016M20A', '2'],\n",
" ['COVID19', 'MODERNA', '030L20A', '1']],\n",
" index = [\n",
" \"1048786\",\n",
" \"1048786\"],\n",
" dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
" }\n",
" ]\n",
" dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n",
" \n",
" # When\n",
" dataFrame = DataFrameFilter.filterDataFrame(dataFrame, manufacturer = \"MODERNA\", dose = '2')\n",
" \n",
" # Then\n",
" dataFrameExpected = self.createDataFrame(\n",
" columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
" data = [ ['Y', np.NaN, np.NaN, 'COVID19', 'MODERNA', '016M20A', '2']],\n",
" index = [\n",
" \"1048786\"],\n",
" dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
" assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)\n",
"\n",
" def createDataFrame(self, index, columns, data, dtypes = {}):\n",
" return pd.DataFrame(index = index, columns = columns, data = data).astype(dtypes)\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "e14465d7",
"metadata": {},
"outputs": [],
"source": [
"from pandas.testing import assert_frame_equal\n",
"\n",
"class BatchCodeTableTest(unittest.TestCase):\n",
"\n",
" def test_createBatchCodeTable2(self):\n",
" dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(\n",
" [\n",
" {\n",
" 'VAERSDATA': self.createDataFrame(\n",
" columns = ['DIED', 'L_THREAT', 'DISABLE'],\n",
" data = [ ['Y', np.NaN, np.NaN],\n",
" [np.NaN, np.NaN, 'Y']],\n",
" index = [\n",
" \"0916600\",\n",
" \"0916601\"]),\n",
" 'VAERSVAX': self.createDataFrame(\n",
" columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
" data = [ ['COVID19', 'MODERNA', '037K20A', '1'],\n",
" ['COVID19', 'MODERNA', '025L20A', '1']],\n",
" index = [\n",
" \"0916600\",\n",
" \"0916601\"],\n",
" dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
" },\n",
" {\n",
" 'VAERSDATA': self.createDataFrame(\n",
" columns = ['DIED', 'L_THREAT', 'DISABLE'],\n",
" data = [ [np.NaN, np.NaN, np.NaN],\n",
" [np.NaN, np.NaN, 'Y']],\n",
" index = [\n",
" \"1996873\",\n",
" \"1996874\"]),\n",
" 'VAERSVAX': self.createDataFrame(\n",
" columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
" data = [ ['HPV9', 'MERCK & CO. INC.', 'R017624', 'UNK'],\n",
" ['COVID19', 'MODERNA', '025L20A', '1']],\n",
" index = [\n",
" \"1996873\",\n",
" \"1996874\"],\n",
" dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
" }\n",
" ])\n",
" dataFrame = DataFrameFilter.filterDataFrame(dataFrame, manufacturer = \"MODERNA\", dose = '1')\n",
" self._test_createBatchCodeTable(dataFrame);\n",
"\n",
" def test_createBatchCodeTable(self):\n",
" self._test_createBatchCodeTable(\n",
" DataFrameFilter.filterDataFrame(\n",
" VaersDescr2DataFrameConverter.createDataFrameFromDescrs(\n",
" VaersDescrReader(\"test/VAERS\").readAllVaersDescrs()),\n",
" manufacturer = \"MODERNA\",\n",
" dose = '1'))\n",
"\n",
" def _test_createBatchCodeTable(self, dataFrame):\n",
" # When\n",
" batchCodeTable = createBatchCodeTable(dataFrame)\n",
"\n",
" # Then\n",
" batchCodeTableExpected = pd.DataFrame(\n",
" data = {\n",
" 'ADRs': [2, 1],\n",
" 'DEATHS': [0, 1],\n",
" 'DISABILITIES': [2, 0],\n",
" 'LIFE THREATENING ILLNESSES': [0, 0]\n",
" },\n",
" index = pd.MultiIndex.from_arrays([['025L20A', '037K20A']], names = ('VAX_LOT',)))\n",
" assert_frame_equal(batchCodeTable, batchCodeTableExpected, check_dtype = False)\n",
"\n",
" def createDataFrame(self, index, columns, data, dtypes = {}):\n",
" return pd.DataFrame(index = index, columns = columns, data = data).astype(dtypes)\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "ded70c87",
"metadata": {},
"outputs": [],
"source": [
"from pandas.testing import assert_frame_equal\n",
"\n",
"class SevereEffectsBatchCodeTableTest(unittest.TestCase):\n",
"\n",
" def test_createSevereEffectsBatchCodeTable(self):\n",
" # Given\n",
" dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(\n",
" [\n",
" {\n",
" 'VAERSDATA': self.createDataFrame(\n",
" columns = ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'],\n",
" data = [ ['Y', 'Y', np.NaN, 'Y', 'Y'],\n",
" [np.NaN, np.NaN, 'Y', np.NaN, 'Y']],\n",
" index = [\n",
" \"0916600\",\n",
" \"0916601\"]),\n",
" 'VAERSVAX': self.createDataFrame(\n",
" columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
" data = [ ['COVID19', 'MODERNA', '037K20A', '1'],\n",
" ['COVID19', 'PFIZER\\BIONTECH', '025L20A', '1']],\n",
" index = [\n",
" \"0916600\",\n",
" \"0916601\"],\n",
" dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
" }\n",
" ]\n",
" )\n",
" dataFrame = DataFrameFilter.filterDataFrameForSevereEffects(dataFrame, dose = '1')\n",
"\n",
" # When\n",
" batchCodeTable = createSevereEffectsBatchCodeTable(dataFrame)\n",
"\n",
" # Then\n",
" batchCodeTableExpected = pd.DataFrame(\n",
" data = {\n",
" 'ADRs': [1, 1],\n",
" 'DEATHS': [1, 0],\n",
" 'DISABILITIES': [0, 1],\n",
" 'LIFE THREATENING ILLNESSES': [1, 0],\n",
" 'HOSPITALISATIONS': [1, 0],\n",
" 'EMERGENCY ROOM OR DOCTOR VISITS': [1, 1],\n",
" 'COMPANY': ['MODERNA', 'PFIZER\\BIONTECH']\n",
" },\n",
" index = pd.Index(['037K20A', '025L20A'], name='VAX_LOT'))\n",
" assert_frame_equal(batchCodeTable, batchCodeTableExpected, check_dtype = False)\n",
"\n",
" def createDataFrame(self, index, columns, data, dtypes = {}):\n",
" return pd.DataFrame(index = index, columns = columns, data = data).astype(dtypes)\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "5a8bff1b",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"test_createBatchCodeTable (__main__.BatchCodeTableTest) ... ok\n",
"test_createBatchCodeTable2 (__main__.BatchCodeTableTest) ... ok\n",
"test_createAndFilterDataFrameFromDescrs (__main__.CreateAndFilterDataFrameTest) ... ok\n",
"test_createAndFilterDataFrameFromDescrsWithFirstDose (__main__.CreateAndFilterDataFrameTest) ... ok\n",
"test_createAndFilterDataFrameFromDescrsWithSecondDose (__main__.CreateAndFilterDataFrameTest) ... ok\n",
"test_createDataFrameFromForSevereEffects (__main__.CreateAndFilterDataFrameTest) ... ok\n",
"test_createSevereEffectsBatchCodeTable (__main__.SevereEffectsBatchCodeTableTest) ... ok\n",
"\n",
"----------------------------------------------------------------------\n",
"Ran 7 tests in 0.156s\n",
"\n",
"OK\n"
]
},
{
"data": {
"text/plain": [
"<unittest.main.TestProgram at 0x7f8aa7824220>"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"unittest.main(argv = [''], verbosity = 2, exit = False)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "86e0e4f2",
"metadata": {},
"outputs": [],
"source": [
"def saveBatchCodeTable(manufacturer, excelFile):\n",
" batchCodeTable = createBatchCodeTable(\n",
" DataFrameFilter.filterDataFrame(\n",
" VaersDescr2DataFrameConverter.createDataFrameFromDescrs(\n",
" VaersDescrReader(\"VAERS\").readAllVaersDescrs()),\n",
" manufacturer = manufacturer,\n",
" dose = '1'))\n",
" display(manufacturer, batchCodeTable)\n",
" batchCodeTable.to_excel(excelFile)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "ab170c16",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'MODERNA'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ADRs</th>\n",
" <th>DEATHS</th>\n",
" <th>DISABILITIES</th>\n",
" <th>LIFE THREATENING ILLNESSES</th>\n",
" </tr>\n",
" <tr>\n",
" <th>VAX_LOT</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>039K20A</th>\n",
" <td>3330</td>\n",
" <td>67.0</td>\n",
" <td>22.0</td>\n",
" <td>29.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>026L20A</th>\n",
" <td>3327</td>\n",
" <td>25.0</td>\n",
" <td>24.0</td>\n",
" <td>21.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>011J20A</th>\n",
" <td>3072</td>\n",
" <td>33.0</td>\n",
" <td>27.0</td>\n",
" <td>26.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>025L20A</th>\n",
" <td>2391</td>\n",
" <td>42.0</td>\n",
" <td>16.0</td>\n",
" <td>26.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>012L20A</th>\n",
" <td>2352</td>\n",
" <td>57.0</td>\n",
" <td>28.0</td>\n",
" <td>22.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>028-20A</th>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>028(L or Z?)20A</th>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>028 L20A</th>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>028 L 20A</th>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>xx3A21A</th>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>10248 rows × 4 columns</p>\n",
"</div>"
],
"text/plain": [
" ADRs DEATHS DISABILITIES LIFE THREATENING ILLNESSES\n",
"VAX_LOT \n",
"039K20A 3330 67.0 22.0 29.0\n",
"026L20A 3327 25.0 24.0 21.0\n",
"011J20A 3072 33.0 27.0 26.0\n",
"025L20A 2391 42.0 16.0 26.0\n",
"012L20A 2352 57.0 28.0 22.0\n",
"... ... ... ... ...\n",
"028-20A 1 0.0 0.0 0.0\n",
"028(L or Z?)20A 1 0.0 0.0 0.0\n",
"028 L20A 1 0.0 0.0 0.0\n",
"028 L 20A 1 0.0 0.0 0.0\n",
"xx3A21A 1 0.0 0.0 0.0\n",
"\n",
"[10248 rows x 4 columns]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'PFIZER\\\\BIONTECH'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ADRs</th>\n",
" <th>DEATHS</th>\n",
" <th>DISABILITIES</th>\n",
" <th>LIFE THREATENING ILLNESSES</th>\n",
" </tr>\n",
" <tr>\n",
" <th>VAX_LOT</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>EK5730</th>\n",
" <td>1741</td>\n",
" <td>18.0</td>\n",
" <td>25.0</td>\n",
" <td>16.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>EK9231</th>\n",
" <td>1631</td>\n",
" <td>37.0</td>\n",
" <td>19.0</td>\n",
" <td>21.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>EH9899</th>\n",
" <td>1570</td>\n",
" <td>14.0</td>\n",
" <td>36.0</td>\n",
" <td>18.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>EN6205</th>\n",
" <td>1487</td>\n",
" <td>25.0</td>\n",
" <td>37.0</td>\n",
" <td>31.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>EN6208</th>\n",
" <td>1475</td>\n",
" <td>33.0</td>\n",
" <td>27.0</td>\n",
" <td>25.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>EN6266</th>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>EN626207</th>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>EN6262</th>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>EN6251</th>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>zw0151</th>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>6765 rows × 4 columns</p>\n",
"</div>"
],
"text/plain": [
" ADRs DEATHS DISABILITIES LIFE THREATENING ILLNESSES\n",
"VAX_LOT \n",
"EK5730 1741 18.0 25.0 16.0\n",
"EK9231 1631 37.0 19.0 21.0\n",
"EH9899 1570 14.0 36.0 18.0\n",
"EN6205 1487 25.0 37.0 31.0\n",
"EN6208 1475 33.0 27.0 25.0\n",
"... ... ... ... ...\n",
"EN6266 1 0.0 0.0 0.0\n",
"EN626207 1 0.0 0.0 0.0\n",
"EN6262 1 0.0 0.0 0.0\n",
"EN6251 1 0.0 0.0 0.0\n",
"zw0151 1 0.0 1.0 0.0\n",
"\n",
"[6765 rows x 4 columns]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'JANSSEN'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ADRs</th>\n",
" <th>DEATHS</th>\n",
" <th>DISABILITIES</th>\n",
" <th>LIFE THREATENING ILLNESSES</th>\n",
" </tr>\n",
" <tr>\n",
" <th>VAX_LOT</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>043A21A</th>\n",
" <td>1692</td>\n",
" <td>28.0</td>\n",
" <td>29.0</td>\n",
" <td>37.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>042A21A</th>\n",
" <td>1461</td>\n",
" <td>43.0</td>\n",
" <td>33.0</td>\n",
" <td>35.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>202A21A</th>\n",
" <td>1159</td>\n",
" <td>22.0</td>\n",
" <td>16.0</td>\n",
" <td>21.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1805018</th>\n",
" <td>1129</td>\n",
" <td>32.0</td>\n",
" <td>30.0</td>\n",
" <td>39.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>201A21A</th>\n",
" <td>1129</td>\n",
" <td>15.0</td>\n",
" <td>27.0</td>\n",
" <td>24.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1808998</th>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1808996</th>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1808992</th>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>180898le</th>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>z03az1a</th>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1797 rows × 4 columns</p>\n",
"</div>"
],
"text/plain": [
" ADRs DEATHS DISABILITIES LIFE THREATENING ILLNESSES\n",
"VAX_LOT \n",
"043A21A 1692 28.0 29.0 37.0\n",
"042A21A 1461 43.0 33.0 35.0\n",
"202A21A 1159 22.0 16.0 21.0\n",
"1805018 1129 32.0 30.0 39.0\n",
"201A21A 1129 15.0 27.0 24.0\n",
"... ... ... ... ...\n",
"1808998 1 0.0 0.0 0.0\n",
"1808996 1 0.0 0.0 0.0\n",
"1808992 1 0.0 0.0 0.0\n",
"180898le 1 0.0 0.0 0.0\n",
"z03az1a 1 0.0 0.0 0.0\n",
"\n",
"[1797 rows x 4 columns]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"saveBatchCodeTable(\"MODERNA\", \"results/moderna.xlsx\")\n",
"saveBatchCodeTable(\"PFIZER\\BIONTECH\", \"results/pfizer.xlsx\")\n",
"saveBatchCodeTable(\"JANSSEN\", \"results/janssen.xlsx\")"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "bc56831d",
"metadata": {},
"outputs": [],
"source": [
"def saveSevereEffectsBatchCodeTable(excelFile):\n",
" severeEffectsBatchCodeTable = createSevereEffectsBatchCodeTable(\n",
" DataFrameFilter.filterDataFrameForSevereEffects(\n",
" VaersDescr2DataFrameConverter.createDataFrameFromDescrs(\n",
" VaersDescrReader(\"VAERS\").readAllVaersDescrs()),\n",
" dose = '1'))\n",
" display('severeEffectsBatchCodeTable', severeEffectsBatchCodeTable)\n",
" severeEffectsBatchCodeTable.to_excel(excelFile)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "ace3fed9",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'severeEffectsBatchCodeTable'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ADRs</th>\n",
" <th>DEATHS</th>\n",
" <th>DISABILITIES</th>\n",
" <th>LIFE THREATENING ILLNESSES</th>\n",
" <th>HOSPITALISATIONS</th>\n",
" <th>EMERGENCY ROOM OR DOCTOR VISITS</th>\n",
" <th>COMPANY</th>\n",
" </tr>\n",
" <tr>\n",
" <th>VAX_LOT</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>026L20A</th>\n",
" <td>3335</td>\n",
" <td>25.0</td>\n",
" <td>24.0</td>\n",
" <td>21.0</td>\n",
" <td>125.0</td>\n",
" <td>0.0</td>\n",
" <td>MODERNA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>039K20A</th>\n",
" <td>3335</td>\n",
" <td>67.0</td>\n",
" <td>22.0</td>\n",
" <td>29.0</td>\n",
" <td>134.0</td>\n",
" <td>0.0</td>\n",
" <td>MODERNA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>011J20A</th>\n",
" <td>3078</td>\n",
" <td>34.0</td>\n",
" <td>27.0</td>\n",
" <td>26.0</td>\n",
" <td>106.0</td>\n",
" <td>0.0</td>\n",
" <td>MODERNA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>025L20A</th>\n",
" <td>2397</td>\n",
" <td>42.0</td>\n",
" <td>16.0</td>\n",
" <td>26.0</td>\n",
" <td>83.0</td>\n",
" <td>0.0</td>\n",
" <td>MODERNA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>012L20A</th>\n",
" <td>2357</td>\n",
" <td>57.0</td>\n",
" <td>29.0</td>\n",
" <td>22.0</td>\n",
" <td>135.0</td>\n",
" <td>1.0</td>\n",
" <td>MODERNA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>EN6207-</th>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>PFIZER\\BIONTECH</td>\n",
" </tr>\n",
" <tr>\n",
" <th>039820A</th>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>MODERNA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Blue-218</th>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>MODERNA</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1808973</th>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>UNKNOWN MANUFACTURER</td>\n",
" </tr>\n",
" <tr>\n",
" <th>FJ1161</th>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>PFIZER\\BIONTECH</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>18428 rows × 7 columns</p>\n",
"</div>"
],
"text/plain": [
" ADRs DEATHS DISABILITIES LIFE THREATENING ILLNESSES \\\n",
"VAX_LOT \n",
"026L20A 3335 25.0 24.0 21.0 \n",
"039K20A 3335 67.0 22.0 29.0 \n",
"011J20A 3078 34.0 27.0 26.0 \n",
"025L20A 2397 42.0 16.0 26.0 \n",
"012L20A 2357 57.0 29.0 22.0 \n",
"... ... ... ... ... \n",
"EN6207- 1 0.0 0.0 0.0 \n",
"039820A 1 0.0 0.0 0.0 \n",
"Blue-218 1 0.0 0.0 0.0 \n",
"1808973 1 0.0 0.0 0.0 \n",
"FJ1161 1 0.0 0.0 0.0 \n",
"\n",
" HOSPITALISATIONS EMERGENCY ROOM OR DOCTOR VISITS \\\n",
"VAX_LOT \n",
"026L20A 125.0 0.0 \n",
"039K20A 134.0 0.0 \n",
"011J20A 106.0 0.0 \n",
"025L20A 83.0 0.0 \n",
"012L20A 135.0 1.0 \n",
"... ... ... \n",
"EN6207- 0.0 0.0 \n",
"039820A 0.0 0.0 \n",
"Blue-218 0.0 0.0 \n",
"1808973 0.0 0.0 \n",
"FJ1161 0.0 0.0 \n",
"\n",
" COMPANY \n",
"VAX_LOT \n",
"026L20A MODERNA \n",
"039K20A MODERNA \n",
"011J20A MODERNA \n",
"025L20A MODERNA \n",
"012L20A MODERNA \n",
"... ... \n",
"EN6207- PFIZER\\BIONTECH \n",
"039820A MODERNA \n",
"Blue-218 MODERNA \n",
"1808973 UNKNOWN MANUFACTURER \n",
"FJ1161 PFIZER\\BIONTECH \n",
"\n",
"[18428 rows x 7 columns]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"saveSevereEffectsBatchCodeTable('results/severeEffects.xlsx')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}