{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "9de5907f-18f5-4cb1-903e-26028ff1fa03", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "\n", "pd.set_option('display.max_rows', 100)\n", "pd.set_option('display.max_columns', None)\n", "\n", "import os\n", "from VAERSFileDownloader import updateVAERSFiles\n", "from datetime import datetime\n", "from DateProvider import DateProvider\n", "from InternationalVaersCovid19Provider import getInternationalVaersCovid19,get_international_VAERSVAX_VAERSSYMPTOMS_Covid19\n", "from BatchCodeTableHtmlUpdater import updateBatchCodeTableHtmlFile\n", "from BatchCodeTablePersister import createAndSaveBatchCodeTables" ] }, { "cell_type": "code", "execution_count": null, "id": "d1e4fa9e", "metadata": {}, "outputs": [], "source": [ "print(datetime.now().strftime(\"%d.%m.%Y, %H:%M:%S Uhr\"))" ] }, { "cell_type": "code", "execution_count": null, "id": "ffad1c04", "metadata": {}, "outputs": [], "source": [ "dateProvider = DateProvider()\n", "print(' lastUpdated:', dateProvider.getLastUpdated())\n", "print('lastUpdatedDataSource:', dateProvider.getLastUpdatedDataSource())\n", "needsUpdate = dateProvider.needsUpdate()\n", "print('needsUpdate:', needsUpdate)" ] }, { "cell_type": "code", "execution_count": null, "id": "194b7357", "metadata": {}, "outputs": [], "source": [ "years_from_2020_to_present = list(range(2020, datetime.now().year + 1))" ] }, { "cell_type": "code", "execution_count": null, "id": "a793dff0", "metadata": {}, "outputs": [], "source": [ "updateVAERSFiles(\n", " years = years_from_2020_to_present,\n", " workingDirectory = os.getcwd())" ] }, { "cell_type": "code", "execution_count": null, "id": "58333a19", "metadata": {}, "outputs": [], "source": [ "international_VAERSVAX_Covid19, international_VAERSSYMPTOMS = get_international_VAERSVAX_VAERSSYMPTOMS_Covid19(years = years_from_2020_to_present)" ] }, { "cell_type": "code", "execution_count": null, "id": "f10b558f", "metadata": {}, "outputs": [], "source": [ "international_VAERSVAX_Covid19" ] }, { "cell_type": "code", "execution_count": null, "id": "4119f1a3", "metadata": {}, "outputs": [], "source": [ "international_VAERSSYMPTOMS" ] }, { "cell_type": "code", "execution_count": null, "id": "204af94d", "metadata": {}, "outputs": [], "source": [ "from SymptomByBatchcodeTableFactory import SymptomByBatchcodeTableFactory\n", "symptomByBatchcodeTable = SymptomByBatchcodeTableFactory.createSymptomByBatchcodeTable(international_VAERSVAX_Covid19, international_VAERSSYMPTOMS)\n", "symptomByBatchcodeTable" ] }, { "cell_type": "code", "execution_count": null, "id": "817525c1", "metadata": {}, "outputs": [], "source": [ "symptomByBatchcodeTable.to_pickle('tmp/symptomByBatchcodeTable.pkl')" ] }, { "cell_type": "code", "execution_count": null, "id": "b5a70fa0", "metadata": {}, "outputs": [], "source": [ "symptomByBatchcodeTable = pd.read_pickle('tmp/symptomByBatchcodeTable.pkl')\n", "symptomByBatchcodeTable" ] }, { "cell_type": "code", "execution_count": null, "id": "d3b45ee3", "metadata": {}, "outputs": [], "source": [ "# import cProfile\n", "# from SymptomByBatchcodeTableFactory import SymptomByBatchcodeTableFactory\n", "# cProfile.run('SymptomByBatchcodeTableFactory.createSymptomByBatchcodeTable(international_VAERSVAX_Covid19, international_VAERSSYMPTOMS)')\n", "# # symptomByBatchcodeTable" ] }, { "cell_type": "code", "execution_count": null, "id": "9268d60d", "metadata": {}, "outputs": [], "source": [ "from SymptomHistogramByBatchcodeTableFactory import SymptomHistogramByBatchcodeTableFactory\n", "\n", "symptomHistogramByBatchcodeTable = SymptomHistogramByBatchcodeTableFactory.createSymptomHistogramByBatchcodeTable(symptomByBatchcodeTable)\n", "symptomHistogramByBatchcodeTable" ] }, { "cell_type": "code", "execution_count": null, "id": "5170efad", "metadata": {}, "outputs": [], "source": [ "from HistogramTable2DictTableConverter import HistogramTable2DictTableConverter\n", "dictByBatchcodeTable = HistogramTable2DictTableConverter.convertHistogramTable2DictTable(symptomHistogramByBatchcodeTable)\n", "dictByBatchcodeTable" ] }, { "cell_type": "code", "execution_count": null, "id": "5c77a3d4", "metadata": {}, "outputs": [], "source": [ "dictByBatchcodeTable" ] }, { "cell_type": "code", "execution_count": null, "id": "ba9f665a", "metadata": {}, "outputs": [], "source": [ "from TableByBatchcodeFilter import TableByBatchcodeFilter\n", "filteredTable = TableByBatchcodeFilter.filterTableByBatchcode('FE6208', dictByBatchcodeTable)\n", "filteredTable" ] }, { "cell_type": "code", "execution_count": null, "id": "c7027164", "metadata": {}, "outputs": [], "source": [ "from DictByBatchcodeTable2JsonConverter import DictByBatchcodeTable2JsonConverter\n", "import json\n", "# FK-TODO: convertDictByBatchcodeTable2Json() für alle Batchcodes aufrufen und jedes Ergebnis in einer Datei batchcode.json speichern.\n", "jsonActual = DictByBatchcodeTable2JsonConverter.convertDictByBatchcodeTable2Json(filteredTable, 'FE6208')\n", "print(json.dumps(json.loads(jsonActual), indent=2))" ] }, { "cell_type": "code", "execution_count": null, "id": "97e2a4b6", "metadata": {}, "outputs": [], "source": [ "print(json.dumps(json.loads(jsonActual), indent=2))" ] }, { "cell_type": "code", "execution_count": null, "id": "25a0b41d", "metadata": {}, "outputs": [], "source": [ "jsonTable.to_excel('tmp/jsonTable.xlsx')" ] }, { "cell_type": "code", "execution_count": null, "id": "1d5a7864", "metadata": {}, "outputs": [], "source": [ "filteredTable.to_excel('tmp/filteredTable.xlsx')" ] }, { "cell_type": "code", "execution_count": null, "id": "0daf7fdc", "metadata": {}, "outputs": [], "source": [ "from TestHelper import TestHelper\n", "batchcode = '1808982'\n", "symptomHistogramByBatchcodeTable = TestHelper.createDataFrame(\n", " columns = ['SYMPTOM_COUNT_BY_VAX_LOT'],\n", " data = [ ['{\"Blood pressure orthostatic abnormal\":5,\"Chest discomfort\":1}'],\n", " ['{\"Chest discomfort\":2}']],\n", " index = pd.MultiIndex.from_tuples(\n", " names = ['VAX_LOT1', 'VAX_LOT2'],\n", " tuples = [[batchcode, 'EW0175'],\n", " ['015M20A', batchcode]]))\n", "symptomHistogramByBatchcodeTable" ] }, { "cell_type": "code", "execution_count": null, "id": "8e63a507", "metadata": {}, "outputs": [], "source": [ "import json\n", "result= symptomHistogramByBatchcodeTable.to_json()\n", "parsed = json.loads(result)\n", "parsed\n", "#print(json.dumps(parsed, indent=4))" ] }, { "cell_type": "code", "execution_count": null, "id": "e55301c8", "metadata": {}, "outputs": [], "source": [ "res = jsonTable.loc[('!D0181', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan')]['SYMPTOM_COUNT_BY_VAX_LOT']\n", "res" ] }, { "cell_type": "code", "execution_count": null, "id": "b5710177", "metadata": {}, "outputs": [], "source": [ "res.to_json()" ] }, { "cell_type": "code", "execution_count": null, "id": "6b2fc717", "metadata": {}, "outputs": [], "source": [ "symptomHistogramByBatchcodeTable.loc[('!D0181', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan')].plot(kind='bar')" ] }, { "cell_type": "code", "execution_count": null, "id": "07b1b418", "metadata": {}, "outputs": [], "source": [ "symptomHistogramByBatchcodeTable.sort_values(by='SYMPTOM_COUNT_BY_VAX_LOT', ascending=False)" ] }, { "cell_type": "code", "execution_count": null, "id": "9acd49bc", "metadata": {}, "outputs": [], "source": [ "table.loc[('!D0181', 'nan')]" ] }, { "cell_type": "code", "execution_count": null, "id": "d9a7eb51", "metadata": {}, "outputs": [], "source": [ "table.loc[('!D0181', 'nan')][:30].plot(kind='bar')" ] }, { "cell_type": "code", "execution_count": null, "id": "9e3f86a0", "metadata": {}, "outputs": [], "source": [ "df.groupby('VAX_LOT1')['SYMPTOMS']" ] }, { "cell_type": "code", "execution_count": null, "id": "6ab1c3f7", "metadata": {}, "outputs": [], "source": [ "df[(df['VAX_LOT1'] != 'nan') & (df['VAX_LOT2'] != 'nan') & (df['VAX_LOT3'] != 'nan') & (df['VAX_LOT4'] != 'nan')& (df['VAX_LOT5'] != 'nan') & (df['VAX_LOT6'] != 'nan') & (df['VAX_LOT7'] != 'nan') & (df['VAX_LOT8'] != 'nan')]" ] }, { "cell_type": "code", "execution_count": null, "id": "4753dd56", "metadata": {}, "outputs": [], "source": [ "df[df['VAX_LOT3'] == 'EN6201']" ] }, { "cell_type": "code", "execution_count": null, "id": "ce3d6dfd", "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "# EN6201, FE6208\n", "df[df['VAX_LOT2'] == 'EN6201'].hist(by=['VAX_LOT2'], column='SYMPTOMS', figsize=(200, 60))\n", "# plt.savefig('EN6201.png')" ] }, { "cell_type": "code", "execution_count": null, "id": "cc962341", "metadata": {}, "outputs": [], "source": [ "df['SYMPTOMS'].hist(by=df['VAX_LOT1'])" ] }, { "cell_type": "code", "execution_count": null, "id": "63fa4111", "metadata": {}, "outputs": [], "source": [ "df.hist(by=['VAX_LOT1'], column='SYMPTOMS')" ] }, { "cell_type": "code", "execution_count": null, "id": "1e97b5e3", "metadata": {}, "outputs": [], "source": [ "from pandas import DataFrame\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", "grouped = df.groupby(['VAX_LOT1'])\n", "\n", "for index, group in grouped:\n", " display(index, group)\n", " #plt.figure(figsize=(20, 10), edgecolor='green')\n", " #plt.title(index)\n", " #plt.hist(group['SYMPTOMS'], align='left')\n", " #plt.show()" ] }, { "cell_type": "code", "execution_count": null, "id": "6429c3d9", "metadata": {}, "outputs": [], "source": [ "type(pd.NA)" ] }, { "cell_type": "code", "execution_count": null, "id": "3ebf8ada", "metadata": {}, "outputs": [], "source": [ "from pandas import DataFrame\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", "x = ['A']*300 + ['B']*400 + ['C']*300\n", "y = np.random.randn(1000)\n", "df = DataFrame({'Letter':x, 'N':y})\n", "grouped = df.groupby('Letter')\n", "\n", "for index, group in grouped:\n", " display(group)\n", " plt.figure()\n", " plt.title(index)\n", " plt.hist(group.N)\n", "\n", "# plt.show()" ] }, { "cell_type": "code", "execution_count": null, "id": "5b4df095", "metadata": {}, "outputs": [], "source": [ "symptomByBatchcodeTable.iloc[:1000].to_excel('tmp/symptomByBatchcodeTable.xlsx')" ] }, { "cell_type": "code", "execution_count": null, "id": "781ac80e", "metadata": {}, "outputs": [], "source": [ "internationalVaersCovid19 = getInternationalVaersCovid19(years = years_from_2020_to_present)\n", "internationalVaersCovid19" ] }, { "cell_type": "code", "execution_count": null, "id": "8d6507ca", "metadata": {}, "outputs": [], "source": [ "updateBatchCodeTableHtmlFile(internationalVaersCovid19, batchCodeTableHtmlFile=\"../docs/batchCodeTable.html\")" ] }, { "cell_type": "code", "execution_count": null, "id": "0915aa5a", "metadata": {}, "outputs": [], "source": [ "createAndSaveBatchCodeTables(internationalVaersCovid19, minADRsForLethality=100)" ] } ], "metadata": { "kernelspec": { "display_name": "howbadismybatch-venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.8 (main, Nov 24 2022, 14:13:03) [GCC 11.2.0]" }, "vscode": { "interpreter": { "hash": "1bce2b9b19ce5f16d695ff75ac05095b3e564c169ff454b58b87cb796c0695b8" } } }, "nbformat": 4, "nbformat_minor": 5 }