diff --git a/HowBadIsMyBatch.ipynb b/HowBadIsMyBatch.ipynb index 8e4dba5ebe7..4d7fa23b888 100644 --- a/HowBadIsMyBatch.ipynb +++ b/HowBadIsMyBatch.ipynb @@ -1176,7 +1176,10 @@ " dataFrame = DataFrameFilter().filterByCovid19(vaers)\n", " dataFrame = DataFrameFilter().filterBy(dataFrame, manufacturer = manufacturer)\n", " batchCodeTable = BatchCodeTableFactory._createSummationTableByVAX_LOT(dataFrame)[['ADRs']].reset_index()\n", - " return batchCodeTable\n" + " return batchCodeTable\n", + "\n", + "def filterColumnOfDataFrameWithRegexp(dataFrame, column, regexp):\n", + " return dataFrame[dataFrame[column].apply(lambda columnValue: bool(regexp.match(columnValue)))]\n" ] }, { @@ -1193,9 +1196,9 @@ "batchCodeTable = createADRsByVAX_LOTTable(vaers, \"PFIZER\\BIONTECH\")\n", "batchCodeTable['VAX_LOT_PREFIX'] = batchCodeTable['VAX_LOT'].str[:2]\n", "batchCodeTable = batchCodeTable.sort_values(by = 'VAX_LOT_PREFIX', ascending = True)\n", - "\n", "twoLetters = re.compile(r'^[a-zA-Z]{2}')\n", - "batchCodeTable = batchCodeTable[batchCodeTable['VAX_LOT_PREFIX'].apply(lambda vax_lot_prefix: bool(twoLetters.match(vax_lot_prefix)))]\n", + "batchCodeTable = filterColumnOfDataFrameWithRegexp(dataFrame = batchCodeTable, column = 'VAX_LOT_PREFIX', regexp = twoLetters)\n", + "batchCodeTable[batchCodeTable['VAX_LOT_PREFIX'].apply(lambda vax_lot_prefix: bool(twoLetters.match(vax_lot_prefix)))]\n", "batchCodeTable = batchCodeTable[batchCodeTable['VAX_LOT_PREFIX'].isin(['EN', 'EP', 'ER', 'EW', 'FA', 'FC', 'FD', 'FE', 'FH'])]\n", "batchCodeTable = batchCodeTable[batchCodeTable['ADRs'] > 400]\n", "batchCodeTable" @@ -1212,8 +1215,7 @@ "\n", "sns.set(rc = {'figure.figsize': (11.7, 8.27)})\n", "sns.set_theme()\n", - "chart = sns.stripplot(x = \"VAX_LOT_PREFIX\", y = \"ADRs\", data = batchCodeTable)\n", - "# _ = chart.set_xticklabels(chart.get_xticklabels(), rotation = 90)" + "chart = sns.stripplot(x = \"VAX_LOT_PREFIX\", y = \"ADRs\", data = batchCodeTable)" ] }, { @@ -1258,7 +1260,7 @@ "\n", "batchCodeTable = createADRsByVAX_LOTTable(vaers, \"MODERNA\")\n", "modernaBatchCodePrefix = re.compile(r'^[0-9]{3}[a-zA-Z]')\n", - "batchCodeTable = batchCodeTable[batchCodeTable['VAX_LOT'].apply(lambda vax_lot: bool(modernaBatchCodePrefix.match(vax_lot)))]\n", + "batchCodeTable = filterColumnOfDataFrameWithRegexp(dataFrame = batchCodeTable, column = 'VAX_LOT', regexp = modernaBatchCodePrefix)\n", "batchCodeTable['CONCENTRATION'] = batchCodeTable['VAX_LOT'].str[3]\n", "batchCodeTable = batchCodeTable.sort_values(by = 'CONCENTRATION', ascending = True)\n", "batchCodeTable = batchCodeTable[batchCodeTable['ADRs'] > 400]\n",