520 lines
13 KiB
Plaintext
520 lines
13 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "9de5907f-18f5-4cb1-903e-26028ff1fa03",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"\n",
|
|
"pd.set_option('display.max_rows', 100)\n",
|
|
"pd.set_option('display.max_columns', None)\n",
|
|
"\n",
|
|
"import os\n",
|
|
"from VAERSFileDownloader import updateVAERSFiles\n",
|
|
"from datetime import datetime\n",
|
|
"from DateProvider import DateProvider\n",
|
|
"from InternationalVaersCovid19Provider import getInternationalVaersCovid19,get_international_VAERSVAX_VAERSSYMPTOMS_Covid19\n",
|
|
"from BatchCodeTableHtmlUpdater import updateBatchCodeTableHtmlFile\n",
|
|
"from BatchCodeTablePersister import createAndSaveBatchCodeTables"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "d1e4fa9e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"print(datetime.now().strftime(\"%d.%m.%Y, %H:%M:%S Uhr\"))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "ffad1c04",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"dateProvider = DateProvider()\n",
|
|
"print(' lastUpdated:', dateProvider.getLastUpdated())\n",
|
|
"print('lastUpdatedDataSource:', dateProvider.getLastUpdatedDataSource())\n",
|
|
"needsUpdate = dateProvider.needsUpdate()\n",
|
|
"print('needsUpdate:', needsUpdate)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "194b7357",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"years_from_2020_to_present = list(range(2020, datetime.now().year + 1))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "a793dff0",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"updateVAERSFiles(\n",
|
|
" years = years_from_2020_to_present,\n",
|
|
" workingDirectory = os.getcwd())"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "58333a19",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"international_VAERSVAX_Covid19, international_VAERSSYMPTOMS = get_international_VAERSVAX_VAERSSYMPTOMS_Covid19(years = years_from_2020_to_present)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "f10b558f",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"international_VAERSVAX_Covid19"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "4119f1a3",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"international_VAERSSYMPTOMS"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "204af94d",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from SymptomByBatchcodeTableFactory import SymptomByBatchcodeTableFactory\n",
|
|
"symptomByBatchcodeTable = SymptomByBatchcodeTableFactory.createSymptomByBatchcodeTable(international_VAERSVAX_Covid19, international_VAERSSYMPTOMS)\n",
|
|
"symptomByBatchcodeTable"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "817525c1",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"symptomByBatchcodeTable.to_pickle('tmp/symptomByBatchcodeTable.pkl')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "b5a70fa0",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"symptomByBatchcodeTable = pd.read_pickle('tmp/symptomByBatchcodeTable.pkl')\n",
|
|
"symptomByBatchcodeTable"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "d3b45ee3",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# import cProfile\n",
|
|
"# from SymptomByBatchcodeTableFactory import SymptomByBatchcodeTableFactory\n",
|
|
"# cProfile.run('SymptomByBatchcodeTableFactory.createSymptomByBatchcodeTable(international_VAERSVAX_Covid19, international_VAERSSYMPTOMS)')\n",
|
|
"# # symptomByBatchcodeTable"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "9268d60d",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from SymptomHistogramByBatchcodeTableFactory import SymptomHistogramByBatchcodeTableFactory\n",
|
|
"\n",
|
|
"symptomHistogramByBatchcodeTable = SymptomHistogramByBatchcodeTableFactory.createSymptomHistogramByBatchcodeTable(symptomByBatchcodeTable)\n",
|
|
"symptomHistogramByBatchcodeTable"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "5170efad",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from HistogramTable2DictTableConverter import HistogramTable2DictTableConverter\n",
|
|
"dictByBatchcodeTable = HistogramTable2DictTableConverter.convertHistogramTable2DictTable(symptomHistogramByBatchcodeTable)\n",
|
|
"dictByBatchcodeTable"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "5c77a3d4",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"dictByBatchcodeTable"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "ba9f665a",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from TableByBatchcodeFilter import TableByBatchcodeFilter\n",
|
|
"filteredTable = TableByBatchcodeFilter.filterTableByBatchcode('FE6208', dictByBatchcodeTable)\n",
|
|
"filteredTable"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "c7027164",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from DictByBatchcodeTable2JsonConverter import DictByBatchcodeTable2JsonConverter\n",
|
|
"import json\n",
|
|
"# FK-TODO: convertDictByBatchcodeTable2Json() für alle Batchcodes aufrufen und jedes Ergebnis in einer Datei batchcode.json speichern.\n",
|
|
"jsonActual = DictByBatchcodeTable2JsonConverter.convertDictByBatchcodeTable2Json(filteredTable, 'FE6208')\n",
|
|
"print(json.dumps(json.loads(jsonActual), indent=2))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "97e2a4b6",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"print(json.dumps(json.loads(jsonActual), indent=2))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "25a0b41d",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"jsonTable.to_excel('tmp/jsonTable.xlsx')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "1d5a7864",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"filteredTable.to_excel('tmp/filteredTable.xlsx')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "0daf7fdc",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from TestHelper import TestHelper\n",
|
|
"batchcode = '1808982'\n",
|
|
"symptomHistogramByBatchcodeTable = TestHelper.createDataFrame(\n",
|
|
" columns = ['SYMPTOM_COUNT_BY_VAX_LOT'],\n",
|
|
" data = [ ['{\"Blood pressure orthostatic abnormal\":5,\"Chest discomfort\":1}'],\n",
|
|
" ['{\"Chest discomfort\":2}']],\n",
|
|
" index = pd.MultiIndex.from_tuples(\n",
|
|
" names = ['VAX_LOT1', 'VAX_LOT2'],\n",
|
|
" tuples = [[batchcode, 'EW0175'],\n",
|
|
" ['015M20A', batchcode]]))\n",
|
|
"symptomHistogramByBatchcodeTable"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "8e63a507",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import json\n",
|
|
"result= symptomHistogramByBatchcodeTable.to_json()\n",
|
|
"parsed = json.loads(result)\n",
|
|
"parsed\n",
|
|
"#print(json.dumps(parsed, indent=4))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "e55301c8",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"res = jsonTable.loc[('!D0181', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan')]['SYMPTOM_COUNT_BY_VAX_LOT']\n",
|
|
"res"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "b5710177",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"res.to_json()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "6b2fc717",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"symptomHistogramByBatchcodeTable.loc[('!D0181', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan')].plot(kind='bar')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "07b1b418",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"symptomHistogramByBatchcodeTable.sort_values(by='SYMPTOM_COUNT_BY_VAX_LOT', ascending=False)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "9acd49bc",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"table.loc[('!D0181', 'nan')]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "d9a7eb51",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"table.loc[('!D0181', 'nan')][:30].plot(kind='bar')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "9e3f86a0",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df.groupby('VAX_LOT1')['SYMPTOMS']"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "6ab1c3f7",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df[(df['VAX_LOT1'] != 'nan') & (df['VAX_LOT2'] != 'nan') & (df['VAX_LOT3'] != 'nan') & (df['VAX_LOT4'] != 'nan')& (df['VAX_LOT5'] != 'nan') & (df['VAX_LOT6'] != 'nan') & (df['VAX_LOT7'] != 'nan') & (df['VAX_LOT8'] != 'nan')]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "4753dd56",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df[df['VAX_LOT3'] == 'EN6201']"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "ce3d6dfd",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import matplotlib.pyplot as plt\n",
|
|
"# EN6201, FE6208\n",
|
|
"df[df['VAX_LOT2'] == 'EN6201'].hist(by=['VAX_LOT2'], column='SYMPTOMS', figsize=(200, 60))\n",
|
|
"# plt.savefig('EN6201.png')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "cc962341",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df['SYMPTOMS'].hist(by=df['VAX_LOT1'])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "63fa4111",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df.hist(by=['VAX_LOT1'], column='SYMPTOMS')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "1e97b5e3",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from pandas import DataFrame\n",
|
|
"import numpy as np\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"\n",
|
|
"grouped = df.groupby(['VAX_LOT1'])\n",
|
|
"\n",
|
|
"for index, group in grouped:\n",
|
|
" display(index, group)\n",
|
|
" #plt.figure(figsize=(20, 10), edgecolor='green')\n",
|
|
" #plt.title(index)\n",
|
|
" #plt.hist(group['SYMPTOMS'], align='left')\n",
|
|
" #plt.show()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "6429c3d9",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"type(pd.NA)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "3ebf8ada",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from pandas import DataFrame\n",
|
|
"import numpy as np\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"\n",
|
|
"x = ['A']*300 + ['B']*400 + ['C']*300\n",
|
|
"y = np.random.randn(1000)\n",
|
|
"df = DataFrame({'Letter':x, 'N':y})\n",
|
|
"grouped = df.groupby('Letter')\n",
|
|
"\n",
|
|
"for index, group in grouped:\n",
|
|
" display(group)\n",
|
|
" plt.figure()\n",
|
|
" plt.title(index)\n",
|
|
" plt.hist(group.N)\n",
|
|
"\n",
|
|
"# plt.show()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "5b4df095",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"symptomByBatchcodeTable.iloc[:1000].to_excel('tmp/symptomByBatchcodeTable.xlsx')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "781ac80e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"internationalVaersCovid19 = getInternationalVaersCovid19(years = years_from_2020_to_present)\n",
|
|
"internationalVaersCovid19"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "8d6507ca",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"updateBatchCodeTableHtmlFile(internationalVaersCovid19, batchCodeTableHtmlFile=\"../docs/batchCodeTable.html\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "0915aa5a",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"createAndSaveBatchCodeTables(internationalVaersCovid19, minADRsForLethality=100)"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "howbadismybatch-venv",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.10.8 (main, Nov 24 2022, 14:13:03) [GCC 11.2.0]"
|
|
},
|
|
"vscode": {
|
|
"interpreter": {
|
|
"hash": "1bce2b9b19ce5f16d695ff75ac05095b3e564c169ff454b58b87cb796c0695b8"
|
|
}
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|