This commit is contained in:
frankknoll
2023-01-26 08:42:29 +01:00
parent 7c520c8f64
commit 30f66576c8

View File

@@ -16,7 +16,7 @@
"from VAERSFileDownloader import updateVAERSFiles\n",
"from datetime import datetime\n",
"from DateProvider import DateProvider\n",
"from InternationalVaersCovid19Provider import getInternationalVaersCovid19\n",
"from InternationalVaersCovid19Provider import getInternationalVaersCovid19,get_international_VAERSVAX_VAERSSYMPTOMS_Covid19\n",
"from BatchCodeTableHtmlUpdater import updateBatchCodeTableHtmlFile\n",
"from BatchCodeTablePersister import createAndSaveBatchCodeTables"
]
@@ -67,6 +67,364 @@
" workingDirectory = os.getcwd())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "58333a19",
"metadata": {},
"outputs": [],
"source": [
"international_VAERSVAX_Covid19, international_VAERSSYMPTOMS = get_international_VAERSVAX_VAERSSYMPTOMS_Covid19(years = years_from_2020_to_present)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f10b558f",
"metadata": {},
"outputs": [],
"source": [
"international_VAERSVAX_Covid19"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4119f1a3",
"metadata": {},
"outputs": [],
"source": [
"international_VAERSSYMPTOMS"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "204af94d",
"metadata": {},
"outputs": [],
"source": [
"from SymptomByBatchcodeTableFactory import SymptomByBatchcodeTableFactory\n",
"symptomByBatchcodeTable = SymptomByBatchcodeTableFactory.createSymptomByBatchcodeTable(international_VAERSVAX_Covid19, international_VAERSSYMPTOMS)\n",
"symptomByBatchcodeTable"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "817525c1",
"metadata": {},
"outputs": [],
"source": [
"symptomByBatchcodeTable.to_pickle('tmp/symptomByBatchcodeTable.pkl')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b5a70fa0",
"metadata": {},
"outputs": [],
"source": [
"symptomByBatchcodeTable = pd.read_pickle('tmp/symptomByBatchcodeTable.pkl')\n",
"symptomByBatchcodeTable"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d3b45ee3",
"metadata": {},
"outputs": [],
"source": [
"# import cProfile\n",
"# from SymptomByBatchcodeTableFactory import SymptomByBatchcodeTableFactory\n",
"# cProfile.run('SymptomByBatchcodeTableFactory.createSymptomByBatchcodeTable(international_VAERSVAX_Covid19, international_VAERSSYMPTOMS)')\n",
"# # symptomByBatchcodeTable"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9268d60d",
"metadata": {},
"outputs": [],
"source": [
"from SymptomHistogramByBatchcodeTableFactory import SymptomHistogramByBatchcodeTableFactory\n",
"\n",
"symptomHistogramByBatchcodeTable = SymptomHistogramByBatchcodeTableFactory.createSymptomHistogramByBatchcodeTable(symptomByBatchcodeTable)\n",
"symptomHistogramByBatchcodeTable"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5170efad",
"metadata": {},
"outputs": [],
"source": [
"from HistogramTable2JsonTableConverter import HistogramTable2JsonTableConverter\n",
"jsonTable = HistogramTable2JsonTableConverter.convertHistogramTable2JsonTable(symptomHistogramByBatchcodeTable)\n",
"jsonTable"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "25a0b41d",
"metadata": {},
"outputs": [],
"source": [
"jsonTable.to_excel('tmp/jsonTable.xlsx')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e55301c8",
"metadata": {},
"outputs": [],
"source": [
"res = jsonTable.loc[('!D0181', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan')]['SYMPTOM_COUNT_BY_VAX_LOT']\n",
"res"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b5710177",
"metadata": {},
"outputs": [],
"source": [
"res.to_json()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f2e59589",
"metadata": {},
"outputs": [],
"source": [
"df = symptomHistogramByBatchcodeTable[:].reset_index()\n",
"vax_lot = '094F21A'\n",
"df[(df['VAX_LOT1'] == vax_lot) | (df['VAX_LOT2'] == vax_lot) | (df['VAX_LOT3'] == vax_lot) | (df['VAX_LOT4'] == vax_lot) | (df['VAX_LOT5'] == vax_lot) | (df['VAX_LOT6'] == vax_lot) | (df['VAX_LOT7'] == vax_lot) | (df['VAX_LOT8'] == vax_lot) | (df['VAX_LOT9'] == vax_lot) | (df['VAX_LOT10'] == vax_lot)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6b2fc717",
"metadata": {},
"outputs": [],
"source": [
"symptomHistogramByBatchcodeTable.loc[('!D0181', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan')].plot(kind='bar')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "07b1b418",
"metadata": {},
"outputs": [],
"source": [
"symptomHistogramByBatchcodeTable.sort_values(by='SYMPTOM_COUNT_BY_VAX_LOT', ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "206980b7",
"metadata": {},
"outputs": [],
"source": [
"df = symptomByBatchcodeTable[:].reset_index()\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "690197ca",
"metadata": {},
"outputs": [],
"source": [
"table = df.groupby(['VAX_LOT1', 'VAX_LOT2'])['SYMPTOM'].value_counts()\n",
"table.name = 'SYMPTOM_count'\n",
"# table = table.reset_index(level = table.index.names.difference(['VAX_LOT1', 'VAX_LOT2']))\n",
"table"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c6e2c82e",
"metadata": {},
"outputs": [],
"source": [
"table2 = table.reset_index(level=2)\n",
"table2"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cb7bf7e4",
"metadata": {},
"outputs": [],
"source": [
"table2.index"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9acd49bc",
"metadata": {},
"outputs": [],
"source": [
"table.loc[('!D0181', 'nan')]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d9a7eb51",
"metadata": {},
"outputs": [],
"source": [
"table.loc[('!D0181', 'nan')][:30].plot(kind='bar')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9e3f86a0",
"metadata": {},
"outputs": [],
"source": [
"df.groupby('VAX_LOT1')['SYMPTOMS']"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6ab1c3f7",
"metadata": {},
"outputs": [],
"source": [
"df[(df['VAX_LOT1'] != 'nan') & (df['VAX_LOT2'] != 'nan') & (df['VAX_LOT3'] != 'nan') & (df['VAX_LOT4'] != 'nan')& (df['VAX_LOT5'] != 'nan') & (df['VAX_LOT6'] != 'nan') & (df['VAX_LOT7'] != 'nan') & (df['VAX_LOT8'] != 'nan')]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4753dd56",
"metadata": {},
"outputs": [],
"source": [
"df[df['VAX_LOT3'] == 'EN6201']"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ce3d6dfd",
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"# EN6201, FE6208\n",
"df[df['VAX_LOT2'] == 'EN6201'].hist(by=['VAX_LOT2'], column='SYMPTOMS', figsize=(200, 60))\n",
"# plt.savefig('EN6201.png')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cc962341",
"metadata": {},
"outputs": [],
"source": [
"df['SYMPTOMS'].hist(by=df['VAX_LOT1'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "63fa4111",
"metadata": {},
"outputs": [],
"source": [
"df.hist(by=['VAX_LOT1'], column='SYMPTOMS')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1e97b5e3",
"metadata": {},
"outputs": [],
"source": [
"from pandas import DataFrame\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"\n",
"grouped = df.groupby(['VAX_LOT1'])\n",
"\n",
"for index, group in grouped:\n",
" display(index, group)\n",
" #plt.figure(figsize=(20, 10), edgecolor='green')\n",
" #plt.title(index)\n",
" #plt.hist(group['SYMPTOMS'], align='left')\n",
" #plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6429c3d9",
"metadata": {},
"outputs": [],
"source": [
"type(pd.NA)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3ebf8ada",
"metadata": {},
"outputs": [],
"source": [
"from pandas import DataFrame\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"\n",
"x = ['A']*300 + ['B']*400 + ['C']*300\n",
"y = np.random.randn(1000)\n",
"df = DataFrame({'Letter':x, 'N':y})\n",
"grouped = df.groupby('Letter')\n",
"\n",
"for index, group in grouped:\n",
" display(group)\n",
" plt.figure()\n",
" plt.title(index)\n",
" plt.hist(group.N)\n",
"\n",
"# plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5b4df095",
"metadata": {},
"outputs": [],
"source": [
"symptomByBatchcodeTable.iloc[:1000].to_excel('tmp/symptomByBatchcodeTable.xlsx')"
]
},
{
"cell_type": "code",
"execution_count": null,
@@ -101,7 +459,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.10.8 ('howbadismybatch-venv')",
"display_name": "howbadismybatch-venv",
"language": "python",
"name": "python3"
},