diff --git a/src/HowBadIsMyBatch.ipynb b/src/HowBadIsMyBatch.ipynb index 6636ef642eb..49c75820686 100644 --- a/src/HowBadIsMyBatch.ipynb +++ b/src/HowBadIsMyBatch.ipynb @@ -191,7 +191,8 @@ " 'L_THREAT': 'sum',\n", " 'DISABLE': 'sum',\n", " 'HOSPITAL': 'sum',\n", - " 'ER_VISIT': 'sum'\n", + " 'ER_VISIT': 'sum',\n", + " 'SEVERE': 'sum'\n", " })\n", " SummationTableFactory._flattenColumns(summationTable)\n", " return summationTable.rename(columns = columnNameMappingsDict)\n", @@ -204,9 +205,10 @@ " \"DIED_size\": \"Total Number of Adverse Reaction Reports\",\n", " \"DIED_sum\": \"Deaths\",\n", " \"L_THREAT_sum\": \"Life Threatening Illnesses\",\n", - " \"DISABLE_sum\": \"Disabilities\"\n", + " \"DISABLE_sum\": \"Disabilities\",\n", + " \"SEVERE_sum\": \"Severities\"\n", " })\n", - " summationTable['Severe reports (%)'] = (summationTable['Deaths'] + summationTable['Disabilities'] + summationTable['Life Threatening Illnesses']) / summationTable['Total Number of Adverse Reaction Reports'] * 100\n", + " summationTable['Severe reports (%)'] = summationTable['Severities'] / summationTable['Total Number of Adverse Reaction Reports'] * 100\n", " summationTable = summationTable[['Total Number of Adverse Reaction Reports', 'Deaths', 'Disabilities', 'Life Threatening Illnesses', 'Severe reports (%)']]\n", " return summationTable\n", "\n", @@ -352,6 +354,25 @@ " return country.name if country is not None else default" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "3abe3384", + "metadata": {}, + "outputs": [], + "source": [ + "import pycountry\n", + "\n", + "class SevereColumnAdder:\n", + " \n", + " @staticmethod\n", + " def addSevereColumn(dataFrame):\n", + " dataFrame['SEVERE'] = dataFrame.apply(\n", + " lambda row: 1 if (row['DIED'] + row['L_THREAT'] + row['DISABLE']) > 0 else 0,\n", + " axis = 'columns')\n", + " return dataFrame\n" + ] + }, { "cell_type": "code", "execution_count": null, @@ -688,7 +709,7 @@ "\n", "class BatchCodeTableFactoryTest(unittest.TestCase):\n", "\n", - " def testcreateSummationTable(self):\n", + " def test_createSummationTable(self):\n", " # Given\n", " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(\n", " [\n", @@ -710,6 +731,7 @@ " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", " }\n", " ])\n", + " dataFrame = SevereColumnAdder.addSevereColumn(dataFrame)\n", "\n", " # When\n", " batchCodeTable = BatchCodeTableFactory.createSevereEffectsBatchCodeTable(dataFrame, '1')\n", @@ -776,6 +798,7 @@ "\n", " def _test_createBatchCodeTable(self, dataFrame, dose):\n", " # When\n", + " dataFrame = SevereColumnAdder.addSevereColumn(dataFrame)\n", " batchCodeTable = BatchCodeTableFactory.createBatchCodeTable(dataFrame, dose)\n", "\n", " # Then\n", @@ -815,6 +838,7 @@ " \"1048786\",\n", " \"4711\"],\n", " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", + " dataFrame = SevereColumnAdder.addSevereColumn(dataFrame)\n", " \n", " # When\n", " doseTable = DoseTableFactory.createDoseTable(dataFrame)\n", @@ -824,11 +848,11 @@ " doseTable,\n", " pd.DataFrame(\n", " data = {\n", - " 'Total Number of Adverse Reaction Reports': [2, 1],\n", - " 'Deaths': [2, 1],\n", - " 'Disabilities': [1, 0],\n", - " 'Life Threatening Illnesses': [1, 0],\n", - " 'Severe reports (%)': [(2 + 1 + 1)/2 * 100, (1 + 0 + 0)/1 * 100]\n", + " 'Total Number of Adverse Reaction Reports': [2, 1],\n", + " 'Deaths': [2, 1],\n", + " 'Disabilities': [1, 0],\n", + " 'Life Threatening Illnesses': [1, 0],\n", + " 'Severe reports (%)': [2/2 * 100, 1/1 * 100]\n", " },\n", " index = pd.Index(['1', '2'], dtype = \"string\", name = 'Dose')))\n", " \n", @@ -845,6 +869,7 @@ " \"1048786\",\n", " \"4711\"],\n", " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", + " dataFrame = SevereColumnAdder.addSevereColumn(dataFrame)\n", " \n", " # When\n", " doseByMonthTable = DoseTableFactory.createDoseByMonthTable(dataFrame)\n", @@ -854,11 +879,11 @@ " doseByMonthTable,\n", " pd.DataFrame(\n", " data = {\n", - " 'Total Number of Adverse Reaction Reports': [2, 1],\n", - " 'Deaths': [2, 1],\n", - " 'Disabilities': [1, 0],\n", - " 'Life Threatening Illnesses': [1, 0],\n", - " 'Severe reports (%)': [(2 + 1 + 1)/2 * 100, (1 + 0 + 0)/1 * 100]\n", + " 'Total Number of Adverse Reaction Reports': [2, 1],\n", + " 'Deaths': [2, 1],\n", + " 'Disabilities': [1, 0],\n", + " 'Life Threatening Illnesses': [1, 0],\n", + " 'Severe reports (%)': [2/2 * 100, 1/1 * 100]\n", " },\n", " index = pd.MultiIndex.from_tuples(\n", " [\n", @@ -895,6 +920,7 @@ " \"4711\",\n", " \"0815\",\n", " \"0816\"])\n", + " dataFrame = SevereColumnAdder.addSevereColumn(dataFrame)\n", " \n", " # When\n", " internationalLotTable = InternationalLotTableFactory.createInternationalLotTable(dataFrame)\n", @@ -904,9 +930,9 @@ " internationalLotTable,\n", " TestHelper.createDataFrame(\n", " columns = ['Total Number of Adverse Reaction Reports', 'Deaths', 'Disabilities', 'Life Threatening Illnesses', 'Severe reports (%)'],\n", - " data = [ [2, 2, 1, 1, (2 + 1 + 1) / 2 * 100],\n", - " [1, 1, 0, 0, (1 + 0 + 0) / 1 * 100],\n", - " [2, 0, 0, 0, (0 + 0 + 0) / 2 * 100]],\n", + " data = [ [2, 2, 1, 1, 2/2 * 100],\n", + " [1, 1, 0, 0, 1/1 * 100],\n", + " [2, 0, 0, 0, 0/2 * 100]],\n", " index = pd.Index(\n", " [\n", " 'France',\n", @@ -921,7 +947,7 @@ " dataFrame = TestHelper.createDataFrame(\n", " columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES', 'SPLTTYPE', 'HOSPITAL', 'ER_VISIT'],\n", " data = [ [1, 0, 0, 'COVID19', 'MODERNA', '016M20A', '2', 'GBPFIZER INC2020486806', 0, 0],\n", - " [1, 0, 0, 'COVID19', 'MODERNA', '030L20A', '1', 'FRMODERNATX, INC.MOD20224', 0, 0],\n", + " [0, 0, 0, 'COVID19', 'MODERNA', '030L20A', '1', 'FRMODERNATX, INC.MOD20224', 0, 0],\n", " [1, 1, 1, 'COVID19', 'MODERNA', '030L20B', '1', 'FRMODERNATX, INC.MOD20224', 0, 0],\n", " [0, 1, 1, 'COVID19', 'MODERNA', '030L20B', '1', 'FRMODERNATX, INC.MOD20224', 0, 0]],\n", " index = [\n", @@ -929,6 +955,7 @@ " \"1048786\",\n", " \"4711\",\n", " \"0815\"])\n", + " dataFrame = SevereColumnAdder.addSevereColumn(dataFrame)\n", " \n", " # When\n", " batchCodeTable = InternationalLotTableFactory.createBatchCodeTableByCountry(dataFrame, 'France')\n", @@ -938,8 +965,8 @@ " batchCodeTable,\n", " TestHelper.createDataFrame(\n", " columns = ['Total Number of Adverse Reaction Reports', 'Deaths', 'Disabilities', 'Life Threatening Illnesses', 'Severe reports (%)'],\n", - " data = [ [2, 1, 2, 2, (1 + 2 + 2) / 2 * 100],\n", - " [1, 1, 0, 0, (1 + 0 + 0) / 1 * 100]],\n", + " data = [ [2, 1, 2, 2, 2/2 * 100],\n", + " [1, 0, 0, 0, 0/1 * 100]],\n", " index = pd.Index(\n", " [\n", " '030L20B',\n", @@ -970,6 +997,7 @@ " vaersDescrs = vaersDescrsReaderFunc()\n", " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n", " DataFrameNormalizer.normalize(dataFrame)\n", + " dataFrame = SevereColumnAdder.addSevereColumn(dataFrame)\n", " return dataFrame\n", " \n", "def getAllVaers():\n", @@ -1359,6 +1387,14 @@ "\n", "sns.boxplot(x = \"CONCENTRATION\", y = \"Total Number of Adverse Reaction Reports\", data = batchCodeTable, order = order)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c355356a", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {