diff --git a/HowBadIsMyBatch.ipynb b/HowBadIsMyBatch.ipynb index a942762e787..bc8d2b2da96 100644 --- a/HowBadIsMyBatch.ipynb +++ b/HowBadIsMyBatch.ipynb @@ -14,30 +14,6 @@ "pd.set_option('display.max_columns', None)" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "b136967b", - "metadata": {}, - "outputs": [], - "source": [ - "class DataFrameNormalizer:\n", - " \n", - " @staticmethod\n", - " def convertVAX_LOTColumnToUpperCase(dataFrame):\n", - " dataFrame['VAX_LOT'] = dataFrame['VAX_LOT'].str.upper()\n", - "\n", - " @staticmethod\n", - " def convertColumnsOfDataFrameToNumerics(dataFrame, columns):\n", - " for column in columns:\n", - " DataFrameNormalizer._convertColumnOfDataFrameToNumeric(dataFrame, column)\n", - "\n", - " @staticmethod\n", - " def _convertColumnOfDataFrameToNumeric(dataFrame, column):\n", - " dataFrame[column] = np.where(dataFrame[column] == 'Y', 1, 0)\n", - " " - ] - }, { "cell_type": "code", "execution_count": null, @@ -66,23 +42,17 @@ " }\n", "\n", " def _readVAERSDATA(self, file):\n", - " VAERSDATA = self._read_csv(\n", + " return self._read_csv(\n", " file = file,\n", " usecols = ['VAERS_ID', 'RECVDATE', 'DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'],\n", " parse_dates = ['RECVDATE'],\n", " date_parser = lambda dateStr: pd.to_datetime(dateStr, format = \"%m/%d/%Y\"))\n", - " DataFrameNormalizer.convertColumnsOfDataFrameToNumerics(\n", - " VAERSDATA,\n", - " ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'])\n", - " return VAERSDATA\n", "\n", " def _readVAERSVAX(self, file):\n", - " VAERSVAX = self._read_csv(\n", + " return self._read_csv(\n", " file = file,\n", " usecols = ['VAERS_ID', 'VAX_DOSE_SERIES', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT'],\n", " dtype = {\"VAX_DOSE_SERIES\": \"string\"})\n", - " DataFrameNormalizer.convertVAX_LOTColumnToUpperCase(VAERSVAX)\n", - " return VAERSVAX\n", "\n", " def _read_csv(self, file, **kwargs):\n", " return pd.read_csv(\n", @@ -120,6 +90,37 @@ " return pd.concat(dataFrames)\n" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "6b639196", + "metadata": {}, + "outputs": [], + "source": [ + "class DataFrameNormalizer:\n", + " \n", + " @staticmethod\n", + " def normalize(dataFrame):\n", + " DataFrameNormalizer.convertVAX_LOTColumnToUpperCase(dataFrame)\n", + " DataFrameNormalizer.convertColumnsOfDataFrameToNumerics(\n", + " dataFrame,\n", + " ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'])\n", + "\n", + " @staticmethod\n", + " def convertVAX_LOTColumnToUpperCase(dataFrame):\n", + " dataFrame['VAX_LOT'] = dataFrame['VAX_LOT'].str.upper()\n", + "\n", + " @staticmethod\n", + " def convertColumnsOfDataFrameToNumerics(dataFrame, columns):\n", + " for column in columns:\n", + " DataFrameNormalizer._convertColumnOfDataFrameToNumeric(dataFrame, column)\n", + "\n", + " @staticmethod\n", + " def _convertColumnOfDataFrameToNumeric(dataFrame, column):\n", + " dataFrame[column] = np.where(dataFrame[column] == 'Y', 1, 0)\n", + " " + ] + }, { "cell_type": "code", "execution_count": null, @@ -179,7 +180,7 @@ "class BatchCodeTableHelper:\n", " \n", " def __init__(self, dataFrame : pd.DataFrame):\n", - " self.dataFrame = dataFrame \n", + " self.dataFrame = dataFrame\n", "\n", " def createBatchCodeTable(self):\n", " batchCodeTable = AggregationHelper.aggregateAndFlattenColumnsAndRenameColumns(\n", @@ -244,8 +245,8 @@ " @staticmethod\n", " def createBatchCodeTable(dataFrame : pd.DataFrame, manufacturer, dose):\n", " dataFrame = DataFrameFilter().filterByCovid19(dataFrame)\n", - " filteredDataFrame = DataFrameFilter().filterBy(dataFrame, manufacturer = manufacturer, dose = dose)\n", - " return BatchCodeTableHelper(filteredDataFrame).createBatchCodeTable()\n", + " dataFrame = DataFrameFilter().filterBy(dataFrame, manufacturer = manufacturer, dose = dose)\n", + " return BatchCodeTableHelper(dataFrame).createBatchCodeTable()\n", "\n", " # create table from https://www.howbadismybatch.com/combined.html\n", " @staticmethod\n", @@ -630,6 +631,7 @@ " def test_createBatchCodeTable(self):\n", " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(\n", " VaersDescrReader(dataDir = \"test/VAERS\").readAllVaersDescrs())\n", + " DataFrameNormalizer.normalize(dataFrame)\n", " self._test_createBatchCodeTable(dataFrame, \"MODERNA\", '1')\n", "\n", " def _test_createBatchCodeTable(self, dataFrame, manufacturer, dose):\n", @@ -751,6 +753,7 @@ "def saveBatchCodeTable(manufacturer, excelFile):\n", " vaersDescrs = VaersDescrReader(dataDir = \"VAERS\").readAllVaersDescrs()\n", " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n", + " DataFrameNormalizer.normalize(dataFrame)\n", " batchCodeTable = BatchCodeTableFactory.createBatchCodeTable(dataFrame, manufacturer = manufacturer, dose = '1')\n", " display(batchCodeTable)\n", " batchCodeTable.to_excel(excelFile)" @@ -831,6 +834,7 @@ "def saveSevereEffectsBatchCodeTable(excelFile):\n", " vaersDescrs = VaersDescrReader(dataDir = \"VAERS\").readAllVaersDescrs()\n", " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n", + " DataFrameNormalizer.normalize(dataFrame)\n", " severeEffectsBatchCodeTable = BatchCodeTableFactory.createSevereEffectsBatchCodeTable(dataFrame, dose = '1')\n", " display(severeEffectsBatchCodeTable)\n", " severeEffectsBatchCodeTable.to_excel(excelFile)" @@ -866,11 +870,13 @@ "def getDoseTable():\n", " vaersDescrs = VaersDescrReader(dataDir = \"VAERS\").readAllVaersDescrs()\n", " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n", + " DataFrameNormalizer.normalize(dataFrame)\n", " return DoseAnalysis.getDoseTable(dataFrame)\n", "\n", "def getDoseByMonthTable():\n", " vaersDescrs = VaersDescrReader(dataDir = \"VAERS\").readAllVaersDescrs()\n", " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n", + " DataFrameNormalizer.normalize(dataFrame)\n", " return DoseAnalysis.getDoseByMonthTable(dataFrame)" ] },