This commit is contained in:
frankknoll
2023-01-26 08:42:29 +01:00
parent 7c520c8f64
commit 30f66576c8

View File

@@ -16,7 +16,7 @@
"from VAERSFileDownloader import updateVAERSFiles\n", "from VAERSFileDownloader import updateVAERSFiles\n",
"from datetime import datetime\n", "from datetime import datetime\n",
"from DateProvider import DateProvider\n", "from DateProvider import DateProvider\n",
"from InternationalVaersCovid19Provider import getInternationalVaersCovid19\n", "from InternationalVaersCovid19Provider import getInternationalVaersCovid19,get_international_VAERSVAX_VAERSSYMPTOMS_Covid19\n",
"from BatchCodeTableHtmlUpdater import updateBatchCodeTableHtmlFile\n", "from BatchCodeTableHtmlUpdater import updateBatchCodeTableHtmlFile\n",
"from BatchCodeTablePersister import createAndSaveBatchCodeTables" "from BatchCodeTablePersister import createAndSaveBatchCodeTables"
] ]
@@ -67,6 +67,364 @@
" workingDirectory = os.getcwd())" " workingDirectory = os.getcwd())"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"id": "58333a19",
"metadata": {},
"outputs": [],
"source": [
"international_VAERSVAX_Covid19, international_VAERSSYMPTOMS = get_international_VAERSVAX_VAERSSYMPTOMS_Covid19(years = years_from_2020_to_present)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f10b558f",
"metadata": {},
"outputs": [],
"source": [
"international_VAERSVAX_Covid19"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4119f1a3",
"metadata": {},
"outputs": [],
"source": [
"international_VAERSSYMPTOMS"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "204af94d",
"metadata": {},
"outputs": [],
"source": [
"from SymptomByBatchcodeTableFactory import SymptomByBatchcodeTableFactory\n",
"symptomByBatchcodeTable = SymptomByBatchcodeTableFactory.createSymptomByBatchcodeTable(international_VAERSVAX_Covid19, international_VAERSSYMPTOMS)\n",
"symptomByBatchcodeTable"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "817525c1",
"metadata": {},
"outputs": [],
"source": [
"symptomByBatchcodeTable.to_pickle('tmp/symptomByBatchcodeTable.pkl')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b5a70fa0",
"metadata": {},
"outputs": [],
"source": [
"symptomByBatchcodeTable = pd.read_pickle('tmp/symptomByBatchcodeTable.pkl')\n",
"symptomByBatchcodeTable"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d3b45ee3",
"metadata": {},
"outputs": [],
"source": [
"# import cProfile\n",
"# from SymptomByBatchcodeTableFactory import SymptomByBatchcodeTableFactory\n",
"# cProfile.run('SymptomByBatchcodeTableFactory.createSymptomByBatchcodeTable(international_VAERSVAX_Covid19, international_VAERSSYMPTOMS)')\n",
"# # symptomByBatchcodeTable"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9268d60d",
"metadata": {},
"outputs": [],
"source": [
"from SymptomHistogramByBatchcodeTableFactory import SymptomHistogramByBatchcodeTableFactory\n",
"\n",
"symptomHistogramByBatchcodeTable = SymptomHistogramByBatchcodeTableFactory.createSymptomHistogramByBatchcodeTable(symptomByBatchcodeTable)\n",
"symptomHistogramByBatchcodeTable"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5170efad",
"metadata": {},
"outputs": [],
"source": [
"from HistogramTable2JsonTableConverter import HistogramTable2JsonTableConverter\n",
"jsonTable = HistogramTable2JsonTableConverter.convertHistogramTable2JsonTable(symptomHistogramByBatchcodeTable)\n",
"jsonTable"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "25a0b41d",
"metadata": {},
"outputs": [],
"source": [
"jsonTable.to_excel('tmp/jsonTable.xlsx')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e55301c8",
"metadata": {},
"outputs": [],
"source": [
"res = jsonTable.loc[('!D0181', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan')]['SYMPTOM_COUNT_BY_VAX_LOT']\n",
"res"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b5710177",
"metadata": {},
"outputs": [],
"source": [
"res.to_json()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f2e59589",
"metadata": {},
"outputs": [],
"source": [
"df = symptomHistogramByBatchcodeTable[:].reset_index()\n",
"vax_lot = '094F21A'\n",
"df[(df['VAX_LOT1'] == vax_lot) | (df['VAX_LOT2'] == vax_lot) | (df['VAX_LOT3'] == vax_lot) | (df['VAX_LOT4'] == vax_lot) | (df['VAX_LOT5'] == vax_lot) | (df['VAX_LOT6'] == vax_lot) | (df['VAX_LOT7'] == vax_lot) | (df['VAX_LOT8'] == vax_lot) | (df['VAX_LOT9'] == vax_lot) | (df['VAX_LOT10'] == vax_lot)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6b2fc717",
"metadata": {},
"outputs": [],
"source": [
"symptomHistogramByBatchcodeTable.loc[('!D0181', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan')].plot(kind='bar')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "07b1b418",
"metadata": {},
"outputs": [],
"source": [
"symptomHistogramByBatchcodeTable.sort_values(by='SYMPTOM_COUNT_BY_VAX_LOT', ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "206980b7",
"metadata": {},
"outputs": [],
"source": [
"df = symptomByBatchcodeTable[:].reset_index()\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "690197ca",
"metadata": {},
"outputs": [],
"source": [
"table = df.groupby(['VAX_LOT1', 'VAX_LOT2'])['SYMPTOM'].value_counts()\n",
"table.name = 'SYMPTOM_count'\n",
"# table = table.reset_index(level = table.index.names.difference(['VAX_LOT1', 'VAX_LOT2']))\n",
"table"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c6e2c82e",
"metadata": {},
"outputs": [],
"source": [
"table2 = table.reset_index(level=2)\n",
"table2"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cb7bf7e4",
"metadata": {},
"outputs": [],
"source": [
"table2.index"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9acd49bc",
"metadata": {},
"outputs": [],
"source": [
"table.loc[('!D0181', 'nan')]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d9a7eb51",
"metadata": {},
"outputs": [],
"source": [
"table.loc[('!D0181', 'nan')][:30].plot(kind='bar')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9e3f86a0",
"metadata": {},
"outputs": [],
"source": [
"df.groupby('VAX_LOT1')['SYMPTOMS']"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6ab1c3f7",
"metadata": {},
"outputs": [],
"source": [
"df[(df['VAX_LOT1'] != 'nan') & (df['VAX_LOT2'] != 'nan') & (df['VAX_LOT3'] != 'nan') & (df['VAX_LOT4'] != 'nan')& (df['VAX_LOT5'] != 'nan') & (df['VAX_LOT6'] != 'nan') & (df['VAX_LOT7'] != 'nan') & (df['VAX_LOT8'] != 'nan')]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4753dd56",
"metadata": {},
"outputs": [],
"source": [
"df[df['VAX_LOT3'] == 'EN6201']"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ce3d6dfd",
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"# EN6201, FE6208\n",
"df[df['VAX_LOT2'] == 'EN6201'].hist(by=['VAX_LOT2'], column='SYMPTOMS', figsize=(200, 60))\n",
"# plt.savefig('EN6201.png')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cc962341",
"metadata": {},
"outputs": [],
"source": [
"df['SYMPTOMS'].hist(by=df['VAX_LOT1'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "63fa4111",
"metadata": {},
"outputs": [],
"source": [
"df.hist(by=['VAX_LOT1'], column='SYMPTOMS')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1e97b5e3",
"metadata": {},
"outputs": [],
"source": [
"from pandas import DataFrame\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"\n",
"grouped = df.groupby(['VAX_LOT1'])\n",
"\n",
"for index, group in grouped:\n",
" display(index, group)\n",
" #plt.figure(figsize=(20, 10), edgecolor='green')\n",
" #plt.title(index)\n",
" #plt.hist(group['SYMPTOMS'], align='left')\n",
" #plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6429c3d9",
"metadata": {},
"outputs": [],
"source": [
"type(pd.NA)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3ebf8ada",
"metadata": {},
"outputs": [],
"source": [
"from pandas import DataFrame\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"\n",
"x = ['A']*300 + ['B']*400 + ['C']*300\n",
"y = np.random.randn(1000)\n",
"df = DataFrame({'Letter':x, 'N':y})\n",
"grouped = df.groupby('Letter')\n",
"\n",
"for index, group in grouped:\n",
" display(group)\n",
" plt.figure()\n",
" plt.title(index)\n",
" plt.hist(group.N)\n",
"\n",
"# plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5b4df095",
"metadata": {},
"outputs": [],
"source": [
"symptomByBatchcodeTable.iloc[:1000].to_excel('tmp/symptomByBatchcodeTable.xlsx')"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
@@ -101,7 +459,7 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3.10.8 ('howbadismybatch-venv')", "display_name": "howbadismybatch-venv",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },