diff --git a/src/DictByBatchcodeTable2DictConverter.py b/src/DictByBatchcodeTable2DictConverter.py index 95d7cc11a45..2c69451f295 100644 --- a/src/DictByBatchcodeTable2DictConverter.py +++ b/src/DictByBatchcodeTable2DictConverter.py @@ -1,6 +1,3 @@ -import json - - class DictByBatchcodeTable2DictConverter: @staticmethod diff --git a/src/HowBadIsMyBatch.ipynb b/src/HowBadIsMyBatch.ipynb index 65881e7904f..a57d66c7f38 100644 --- a/src/HowBadIsMyBatch.ipynb +++ b/src/HowBadIsMyBatch.ipynb @@ -87,6 +87,16 @@ "international_VAERSVAX_Covid19" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "3e0908fe", + "metadata": {}, + "outputs": [], + "source": [ + "international_VAERSSYMPTOMS" + ] + }, { "cell_type": "code", "execution_count": null, @@ -98,16 +108,6 @@ "batchcodes" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "4119f1a3", - "metadata": {}, - "outputs": [], - "source": [ - "international_VAERSSYMPTOMS" - ] - }, { "cell_type": "code", "execution_count": null, @@ -116,20 +116,12 @@ "outputs": [], "source": [ "from SymptomByBatchcodeTableFactory import SymptomByBatchcodeTableFactory\n", + "\n", "symptomByBatchcodeTable = SymptomByBatchcodeTableFactory.createSymptomByBatchcodeTable(international_VAERSVAX_Covid19, international_VAERSSYMPTOMS)\n", + "symptomByBatchcodeTable.to_pickle('tmp/symptomByBatchcodeTable.pkl')\n", "symptomByBatchcodeTable" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "817525c1", - "metadata": {}, - "outputs": [], - "source": [ - "symptomByBatchcodeTable.to_pickle('tmp/symptomByBatchcodeTable.pkl')" - ] - }, { "cell_type": "code", "execution_count": null, @@ -175,6 +167,7 @@ "outputs": [], "source": [ "from HistogramTable2DictTableConverter import HistogramTable2DictTableConverter\n", + "\n", "dictByBatchcodeTable = HistogramTable2DictTableConverter.convertHistogramTable2DictTable(symptomHistogramByBatchcodeTable)\n", "dictByBatchcodeTable" ] @@ -190,96 +183,11 @@ "\n", "histogramDescriptionPersister = HistogramDescriptionPersister('../docs/data/histograms')\n", "histogramDescriptionPersister.saveHistogramDescriptionsForBatchcodes(\n", - " batchcodes[:100],\n", + " batchcodes,\n", " dictByBatchcodeTable,\n", " progress = lambda count, size, batchcode: print(f'{count}/{size}: {batchcode}'))" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "97e2a4b6", - "metadata": {}, - "outputs": [], - "source": [ - "print(json.dumps(json.loads(jsonActual), indent=2))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "25a0b41d", - "metadata": {}, - "outputs": [], - "source": [ - "jsonTable.to_excel('tmp/jsonTable.xlsx')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1d5a7864", - "metadata": {}, - "outputs": [], - "source": [ - "filteredTable.to_excel('tmp/filteredTable.xlsx')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0daf7fdc", - "metadata": {}, - "outputs": [], - "source": [ - "from TestHelper import TestHelper\n", - "batchcode = '1808982'\n", - "symptomHistogramByBatchcodeTable = TestHelper.createDataFrame(\n", - " columns = ['SYMPTOM_COUNT_BY_VAX_LOT'],\n", - " data = [ ['{\"Blood pressure orthostatic abnormal\":5,\"Chest discomfort\":1}'],\n", - " ['{\"Chest discomfort\":2}']],\n", - " index = pd.MultiIndex.from_tuples(\n", - " names = ['VAX_LOT1', 'VAX_LOT2'],\n", - " tuples = [[batchcode, 'EW0175'],\n", - " ['015M20A', batchcode]]))\n", - "symptomHistogramByBatchcodeTable" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8e63a507", - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "result= symptomHistogramByBatchcodeTable.to_json()\n", - "parsed = json.loads(result)\n", - "parsed\n", - "#print(json.dumps(parsed, indent=4))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e55301c8", - "metadata": {}, - "outputs": [], - "source": [ - "res = jsonTable.loc[('!D0181', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan')]['SYMPTOM_COUNT_BY_VAX_LOT']\n", - "res" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b5710177", - "metadata": {}, - "outputs": [], - "source": [ - "res.to_json()" - ] - }, { "cell_type": "code", "execution_count": null, @@ -287,166 +195,8 @@ "metadata": {}, "outputs": [], "source": [ - "symptomHistogramByBatchcodeTable.loc[('!D0181', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan')].plot(kind='bar')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "07b1b418", - "metadata": {}, - "outputs": [], - "source": [ - "symptomHistogramByBatchcodeTable.sort_values(by='SYMPTOM_COUNT_BY_VAX_LOT', ascending=False)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9acd49bc", - "metadata": {}, - "outputs": [], - "source": [ - "table.loc[('!D0181', 'nan')]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d9a7eb51", - "metadata": {}, - "outputs": [], - "source": [ - "table.loc[('!D0181', 'nan')][:30].plot(kind='bar')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9e3f86a0", - "metadata": {}, - "outputs": [], - "source": [ - "df.groupby('VAX_LOT1')['SYMPTOMS']" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6ab1c3f7", - "metadata": {}, - "outputs": [], - "source": [ - "df[(df['VAX_LOT1'] != 'nan') & (df['VAX_LOT2'] != 'nan') & (df['VAX_LOT3'] != 'nan') & (df['VAX_LOT4'] != 'nan')& (df['VAX_LOT5'] != 'nan') & (df['VAX_LOT6'] != 'nan') & (df['VAX_LOT7'] != 'nan') & (df['VAX_LOT8'] != 'nan')]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4753dd56", - "metadata": {}, - "outputs": [], - "source": [ - "df[df['VAX_LOT3'] == 'EN6201']" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ce3d6dfd", - "metadata": {}, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "# EN6201, FE6208\n", - "df[df['VAX_LOT2'] == 'EN6201'].hist(by=['VAX_LOT2'], column='SYMPTOMS', figsize=(200, 60))\n", - "# plt.savefig('EN6201.png')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cc962341", - "metadata": {}, - "outputs": [], - "source": [ - "df['SYMPTOMS'].hist(by=df['VAX_LOT1'])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "63fa4111", - "metadata": {}, - "outputs": [], - "source": [ - "df.hist(by=['VAX_LOT1'], column='SYMPTOMS')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1e97b5e3", - "metadata": {}, - "outputs": [], - "source": [ - "from pandas import DataFrame\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "\n", - "grouped = df.groupby(['VAX_LOT1'])\n", - "\n", - "for index, group in grouped:\n", - " display(index, group)\n", - " #plt.figure(figsize=(20, 10), edgecolor='green')\n", - " #plt.title(index)\n", - " #plt.hist(group['SYMPTOMS'], align='left')\n", - " #plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6429c3d9", - "metadata": {}, - "outputs": [], - "source": [ - "type(pd.NA)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3ebf8ada", - "metadata": {}, - "outputs": [], - "source": [ - "from pandas import DataFrame\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "\n", - "x = ['A']*300 + ['B']*400 + ['C']*300\n", - "y = np.random.randn(1000)\n", - "df = DataFrame({'Letter':x, 'N':y})\n", - "grouped = df.groupby('Letter')\n", - "\n", - "for index, group in grouped:\n", - " display(group)\n", - " plt.figure()\n", - " plt.title(index)\n", - " plt.hist(group.N)\n", - "\n", - "# plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5b4df095", - "metadata": {}, - "outputs": [], - "source": [ - "symptomByBatchcodeTable.iloc[:1000].to_excel('tmp/symptomByBatchcodeTable.xlsx')" + "# symptomHistogramByBatchcodeTable.loc[('!D0181', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan', 'nan')].plot(kind='bar')\n", + "# symptomByBatchcodeTable.iloc[:1000].to_excel('tmp/symptomByBatchcodeTable.xlsx')" ] }, { @@ -497,7 +247,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.8" + "version": "3.10.8 (main, Nov 24 2022, 14:13:03) [GCC 11.2.0]" }, "vscode": { "interpreter": { diff --git a/src/InternationalVaersCovid19Provider.py b/src/InternationalVaersCovid19Provider.py index 93b965c9676..323b6d9d1b8 100644 --- a/src/InternationalVaersCovid19Provider.py +++ b/src/InternationalVaersCovid19Provider.py @@ -16,6 +16,7 @@ def getInternationalVaersCovid19(years): def get_international_VAERSVAX_VAERSSYMPTOMS_Covid19(years): international_VAERSVAX, international_VAERSSYMPTOMS = _get_international_VAERSVAX_VAERSSYMPTOMS(years) + international_VAERSVAX.dropna(subset = ['VAX_LOT'], inplace = True) international_VAERSVAX_Covid19 = DataFrameFilter().filterByCovid19(international_VAERSVAX) return international_VAERSVAX_Covid19, international_VAERSSYMPTOMS