diff --git a/src/HowBadIsMyBatch.ipynb b/src/HowBadIsMyBatch.ipynb index f81388a2c8e..4c36f3838d6 100644 --- a/src/HowBadIsMyBatch.ipynb +++ b/src/HowBadIsMyBatch.ipynb @@ -16,7 +16,7 @@ "from VAERSFileDownloader import updateVAERSFiles\n", "from datetime import datetime\n", "from DateProvider import DateProvider\n", - "from InternationalVaersCovid19Provider import getInternationalVaersCovid19\n", + "from InternationalVaersCovid19Provider import getInternationalVaersCovid19,get_international_VAERSVAX_VAERSSYMPTOMS_Covid19\n", "from BatchCodeTableHtmlUpdater import updateBatchCodeTableHtmlFile\n", "from BatchCodeTablePersister import createAndSaveBatchCodeTables" ] @@ -67,6 +67,364 @@ " workingDirectory = os.getcwd())" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "58333a19", + "metadata": {}, + "outputs": [], + "source": [ + "international_VAERSVAX_Covid19, international_VAERSSYMPTOMS = get_international_VAERSVAX_VAERSSYMPTOMS_Covid19(years = years_from_2020_to_present)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f10b558f", + "metadata": {}, + "outputs": [], + "source": [ + "international_VAERSVAX_Covid19" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4119f1a3", + "metadata": {}, + "outputs": [], + "source": [ + "international_VAERSSYMPTOMS" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "204af94d", + "metadata": {}, + "outputs": [], + "source": [ + "from SymptomByBatchcodeTableFactory import SymptomByBatchcodeTableFactory\n", + "symptomByBatchcodeTable = SymptomByBatchcodeTableFactory.createSymptomByBatchcodeTable(international_VAERSVAX_Covid19, international_VAERSSYMPTOMS)\n", + "symptomByBatchcodeTable" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "817525c1", + "metadata": {}, + "outputs": [], + "source": [ + "symptomByBatchcodeTable.to_pickle('tmp/symptomByBatchcodeTable.pkl')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b5a70fa0", + "metadata": {}, + "outputs": [], + "source": [ + "symptomByBatchcodeTable = pd.read_pickle('tmp/symptomByBatchcodeTable.pkl')\n", + "symptomByBatchcodeTable" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d3b45ee3", + "metadata": {}, + "outputs": [], + "source": [ + "# import cProfile\n", + "# from SymptomByBatchcodeTableFactory import SymptomByBatchcodeTableFactory\n", + "# cProfile.run('SymptomByBatchcodeTableFactory.createSymptomByBatchcodeTable(international_VAERSVAX_Covid19, international_VAERSSYMPTOMS)')\n", + "# # symptomByBatchcodeTable" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9268d60d", + "metadata": {}, + "outputs": [], + "source": [ + "from SymptomHistogramByBatchcodeTableFactory import SymptomHistogramByBatchcodeTableFactory\n", + "\n", + "symptomHistogramByBatchcodeTable = SymptomHistogramByBatchcodeTableFactory.createSymptomHistogramByBatchcodeTable(symptomByBatchcodeTable)\n", + "symptomHistogramByBatchcodeTable" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5170efad", + "metadata": {}, + "outputs": [], + "source": [ + "from HistogramTable2JsonTableConverter import HistogramTable2JsonTableConverter\n", + "jsonTable = HistogramTable2JsonTableConverter.convertHistogramTable2JsonTable(symptomHistogramByBatchcodeTable)\n", + "jsonTable" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25a0b41d", + "metadata": {}, + "outputs": [], + "source": [ + "jsonTable.to_excel('tmp/jsonTable.xlsx')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e55301c8", + "metadata": {}, + "outputs": [], + "source": [ + "res = jsonTable.loc[('!D0181', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan')]['SYMPTOM_COUNT_BY_VAX_LOT']\n", + "res" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b5710177", + "metadata": {}, + "outputs": [], + "source": [ + "res.to_json()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f2e59589", + "metadata": {}, + "outputs": [], + "source": [ + "df = symptomHistogramByBatchcodeTable[:].reset_index()\n", + "vax_lot = '094F21A'\n", + "df[(df['VAX_LOT1'] == vax_lot) | (df['VAX_LOT2'] == vax_lot) | (df['VAX_LOT3'] == vax_lot) | (df['VAX_LOT4'] == vax_lot) | (df['VAX_LOT5'] == vax_lot) | (df['VAX_LOT6'] == vax_lot) | (df['VAX_LOT7'] == vax_lot) | (df['VAX_LOT8'] == vax_lot) | (df['VAX_LOT9'] == vax_lot) | (df['VAX_LOT10'] == vax_lot)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6b2fc717", + "metadata": {}, + "outputs": [], + "source": [ + "symptomHistogramByBatchcodeTable.loc[('!D0181', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan')].plot(kind='bar')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "07b1b418", + "metadata": {}, + "outputs": [], + "source": [ + "symptomHistogramByBatchcodeTable.sort_values(by='SYMPTOM_COUNT_BY_VAX_LOT', ascending=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "206980b7", + "metadata": {}, + "outputs": [], + "source": [ + "df = symptomByBatchcodeTable[:].reset_index()\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "690197ca", + "metadata": {}, + "outputs": [], + "source": [ + "table = df.groupby(['VAX_LOT1', 'VAX_LOT2'])['SYMPTOM'].value_counts()\n", + "table.name = 'SYMPTOM_count'\n", + "# table = table.reset_index(level = table.index.names.difference(['VAX_LOT1', 'VAX_LOT2']))\n", + "table" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c6e2c82e", + "metadata": {}, + "outputs": [], + "source": [ + "table2 = table.reset_index(level=2)\n", + "table2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb7bf7e4", + "metadata": {}, + "outputs": [], + "source": [ + "table2.index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9acd49bc", + "metadata": {}, + "outputs": [], + "source": [ + "table.loc[('!D0181', 'nan')]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d9a7eb51", + "metadata": {}, + "outputs": [], + "source": [ + "table.loc[('!D0181', 'nan')][:30].plot(kind='bar')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9e3f86a0", + "metadata": {}, + "outputs": [], + "source": [ + "df.groupby('VAX_LOT1')['SYMPTOMS']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6ab1c3f7", + "metadata": {}, + "outputs": [], + "source": [ + "df[(df['VAX_LOT1'] != 'nan') & (df['VAX_LOT2'] != 'nan') & (df['VAX_LOT3'] != 'nan') & (df['VAX_LOT4'] != 'nan')& (df['VAX_LOT5'] != 'nan') & (df['VAX_LOT6'] != 'nan') & (df['VAX_LOT7'] != 'nan') & (df['VAX_LOT8'] != 'nan')]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4753dd56", + "metadata": {}, + "outputs": [], + "source": [ + "df[df['VAX_LOT3'] == 'EN6201']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ce3d6dfd", + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "# EN6201, FE6208\n", + "df[df['VAX_LOT2'] == 'EN6201'].hist(by=['VAX_LOT2'], column='SYMPTOMS', figsize=(200, 60))\n", + "# plt.savefig('EN6201.png')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cc962341", + "metadata": {}, + "outputs": [], + "source": [ + "df['SYMPTOMS'].hist(by=df['VAX_LOT1'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "63fa4111", + "metadata": {}, + "outputs": [], + "source": [ + "df.hist(by=['VAX_LOT1'], column='SYMPTOMS')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1e97b5e3", + "metadata": {}, + "outputs": [], + "source": [ + "from pandas import DataFrame\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "grouped = df.groupby(['VAX_LOT1'])\n", + "\n", + "for index, group in grouped:\n", + " display(index, group)\n", + " #plt.figure(figsize=(20, 10), edgecolor='green')\n", + " #plt.title(index)\n", + " #plt.hist(group['SYMPTOMS'], align='left')\n", + " #plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6429c3d9", + "metadata": {}, + "outputs": [], + "source": [ + "type(pd.NA)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3ebf8ada", + "metadata": {}, + "outputs": [], + "source": [ + "from pandas import DataFrame\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "x = ['A']*300 + ['B']*400 + ['C']*300\n", + "y = np.random.randn(1000)\n", + "df = DataFrame({'Letter':x, 'N':y})\n", + "grouped = df.groupby('Letter')\n", + "\n", + "for index, group in grouped:\n", + " display(group)\n", + " plt.figure()\n", + " plt.title(index)\n", + " plt.hist(group.N)\n", + "\n", + "# plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5b4df095", + "metadata": {}, + "outputs": [], + "source": [ + "symptomByBatchcodeTable.iloc[:1000].to_excel('tmp/symptomByBatchcodeTable.xlsx')" + ] + }, { "cell_type": "code", "execution_count": null, @@ -101,7 +459,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.10.8 ('howbadismybatch-venv')", + "display_name": "howbadismybatch-venv", "language": "python", "name": "python3" },