diff --git a/HowBadIsMyBatch.ipynb b/HowBadIsMyBatch.ipynb index a117fcd1477..d99a9d62502 100644 --- a/HowBadIsMyBatch.ipynb +++ b/HowBadIsMyBatch.ipynb @@ -1148,6 +1148,84 @@ " 'Australia'\n", " ])" ] + }, + { + "cell_type": "markdown", + "id": "ba02139d", + "metadata": {}, + "source": [ + "### Batch Clusters" + ] + }, + { + "cell_type": "markdown", + "id": "9649a32d", + "metadata": {}, + "source": [ + "#### Pfizer Batches" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "020b0d90", + "metadata": {}, + "outputs": [], + "source": [ + "# https://www.howbadismybatch.com/clusters.html\n", + "\n", + "import re\n", + "\n", + "dataFrame = DataFrameFilter().filterByCovid19(vaers)\n", + "dataFrame = DataFrameFilter().filterBy(dataFrame, manufacturer = \"PFIZER\\BIONTECH\")\n", + "batchCodeTable = BatchCodeTableFactory._createSummationTableByVAX_LOT(dataFrame)[['ADRs']].reset_index()\n", + "batchCodeTable['VAX_LOT_PREFIX'] = batchCodeTable['VAX_LOT'].str[:2]\n", + "batchCodeTable = batchCodeTable.sort_values(by = 'VAX_LOT_PREFIX', ascending = True)\n", + "\n", + "twoLetters = re.compile(r'^[a-zA-Z]{2}')\n", + "batchCodeTable = batchCodeTable[batchCodeTable['VAX_LOT_PREFIX'].apply(lambda vax_lot_prefix: bool(twoLetters.match(vax_lot_prefix)))]\n", + "batchCodeTable = batchCodeTable[batchCodeTable['VAX_LOT_PREFIX'].isin(['EN', 'EP', 'ER', 'EW', 'FA', 'FC', 'FD', 'FE', 'FH'])]\n", + "batchCodeTable = batchCodeTable[batchCodeTable['ADRs'] > 400]\n", + "batchCodeTable" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "02201726", + "metadata": {}, + "outputs": [], + "source": [ + "import seaborn as sns\n", + "\n", + "sns.set(rc = {'figure.figsize': (11.7, 8.27)})\n", + "sns.set_theme()\n", + "chart = sns.stripplot(x = \"VAX_LOT_PREFIX\", y = \"ADRs\", data = batchCodeTable)\n", + "# _ = chart.set_xticklabels(chart.get_xticklabels(), rotation = 90)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d6000b48", + "metadata": {}, + "outputs": [], + "source": [ + "sns.pointplot(x = \"VAX_LOT_PREFIX\", y = \"ADRs\", data = batchCodeTable, estimator = np.mean)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cf53c8c8", + "metadata": {}, + "outputs": [], + "source": [ + "import seaborn as sns\n", + "sns.set_theme(style = \"ticks\", palette = \"pastel\")\n", + "\n", + "sns.boxplot(x = \"VAX_LOT_PREFIX\", y = \"ADRs\", data = batchCodeTable)" + ] } ], "metadata": { diff --git a/help.txt b/help.txt index 95b610d5197..60ae04122a3 100644 --- a/help.txt +++ b/help.txt @@ -4,7 +4,6 @@ get VAERS data: - download data (e.g. 2022VAERSData.zip) from https://vaers.hhs.gov/data/datasets.html and save and unzip in VAERS folder FK-TODO: -- https://www.howbadismybatch.com/international.html nachprogrammieren - https://www.howbadismybatch.com/geography.html nachprogrammieren - handle VAX_DOSE_SERIES = 'UNK' or 'N/A' like '1'? - Format des jeweiligen Herstellers berücksichtigen und "verschmutzte" Einträge säubern, denn sie stellen alle dieselbe Charge dar: