diff --git a/HowBadIsMyBatch.ipynb b/HowBadIsMyBatch.ipynb index 7e0034e032d..d298a0f5a2b 100644 --- a/HowBadIsMyBatch.ipynb +++ b/HowBadIsMyBatch.ipynb @@ -21,34 +21,41 @@ "metadata": {}, "outputs": [], "source": [ - "def read_csv(file, usecols, dtype = {}):\n", - " return pd.read_csv(\n", - " file,\n", - " index_col = 'VAERS_ID',\n", - " encoding = 'latin1',\n", - " low_memory = False,\n", - " usecols = usecols,\n", - " dtype = dtype)\n", + "import pandas as pd\n", "\n", - "def readVaersDescr(dataDir, year):\n", - " folder = dataDir + \"/\" + year + \"VAERSData/\"\n", - " return {\n", - " 'VAERSDATA':\n", - " read_csv(\n", - " folder + year + \"VAERSDATA.csv\",\n", - " ['VAERS_ID', 'DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT']),\n", - " 'VAERSVAX':\n", - " read_csv(\n", - " folder + year + \"VAERSVAX.csv\",\n", - " ['VAERS_ID', 'VAX_DOSE_SERIES', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT'],\n", - " dtype = {\"VAX_DOSE_SERIES\": \"string\"})\n", - " }\n", + "class VaersDescrReader:\n", + " \n", + " def __init__(self, dataDir):\n", + " self.dataDir = dataDir \n", "\n", - "def readVaersDescrs(dataDir, years):\n", - " return [readVaersDescr(dataDir, year) for year in years]\n", + " def readAllVaersDescrs(self):\n", + " return self.readVaersDescrs([\"2021\", \"2022\"])\n", + " \n", + " def readVaersDescrs(self, years):\n", + " return [self.readVaersDescr(year) for year in years]\n", "\n", - "def readAllVaersDescrs(dataDir):\n", - " return readVaersDescrs(dataDir, [\"2021\", \"2022\"])" + " def readVaersDescr(self, year):\n", + " folder = self.dataDir + \"/\" + year + \"VAERSData/\"\n", + " return {\n", + " 'VAERSDATA':\n", + " self._read_csv(\n", + " folder + year + \"VAERSDATA.csv\",\n", + " ['VAERS_ID', 'DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT']),\n", + " 'VAERSVAX':\n", + " self._read_csv(\n", + " folder + year + \"VAERSVAX.csv\",\n", + " ['VAERS_ID', 'VAX_DOSE_SERIES', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT'],\n", + " dtype = {\"VAX_DOSE_SERIES\": \"string\"})\n", + " }\n", + "\n", + " def _read_csv(self, file, usecols, dtype = {}):\n", + " return pd.read_csv(\n", + " file,\n", + " index_col = 'VAERS_ID',\n", + " encoding = 'latin1',\n", + " low_memory = False,\n", + " usecols = usecols,\n", + " dtype = dtype)\n" ] }, { @@ -383,7 +390,7 @@ " self._test_createBatchCodeTable(\n", " filterDataFrame(\n", " createDataFrameFromDescrs(\n", - " readAllVaersDescrs(\"test/VAERS\")),\n", + " VaersDescrReader(\"test/VAERS\").readAllVaersDescrs()),\n", " manufacturer = \"MODERNA\",\n", " dose = '1'))\n", "\n", @@ -484,7 +491,7 @@ " batchCodeTable = createBatchCodeTable(\n", " filterDataFrame(\n", " createDataFrameFromDescrs(\n", - " readAllVaersDescrs(\"VAERS\")),\n", + " VaersDescrReader(\"VAERS\").readAllVaersDescrs()),\n", " manufacturer = manufacturer,\n", " dose = '1'))\n", " display(manufacturer, batchCodeTable)\n", @@ -514,7 +521,7 @@ " severeEffectsBatchCodeTable = createSevereEffectsBatchCodeTable(\n", " filterDataFrameForSevereEffects(\n", " createDataFrameFromDescrs(\n", - " readAllVaersDescrs(\"VAERS\")),\n", + " VaersDescrReader(\"VAERS\").readAllVaersDescrs()),\n", " dose = '1'))\n", " display('severeEffectsBatchCodeTable', severeEffectsBatchCodeTable)\n", " severeEffectsBatchCodeTable.to_excel(excelFile)"