diff --git a/environment.yml b/environment.yml index d2542baa7cc..3832ca6ddab 100644 --- a/environment.yml +++ b/environment.yml @@ -17,6 +17,7 @@ dependencies: - pillow - openpyxl - tqdm + - simplejson # - python-decouple # - selenium # - webdriver-manager diff --git a/src/BatchCodeTableIntoHistogramDescriptionTableMerger.py b/src/BatchCodeTableIntoHistogramDescriptionTableMerger.py new file mode 100644 index 00000000000..85f5dcc6792 --- /dev/null +++ b/src/BatchCodeTableIntoHistogramDescriptionTableMerger.py @@ -0,0 +1,30 @@ +import pandas as pd + +class BatchCodeTableIntoHistogramDescriptionTableMerger: + + # FK-TODO: refactor + @staticmethod + def mergeBatchCodeTableIntoHistogramDescriptionTable(batchCodeTable, histogramDescriptionTable): + def merge(src): + dst = src['HISTOGRAM_DESCRIPTION'] + # dict_3 = {**dict_1, **dict_2} + dst['Adverse Reaction Reports'] = src['Adverse Reaction Reports'] + dst['Deaths'] = src['Deaths'] + dst['Disabilities'] = src['Disabilities'] + dst['Life Threatening Illnesses'] = src['Life Threatening Illnesses'] + dst['Company'] = src['Company'] + dst['Severe reports'] = src['Severe reports'] + dst['Lethality'] = src['Lethality'] + return dst + mergedTable = pd.merge( + histogramDescriptionTable, + batchCodeTable, + how = 'left', + left_index = True, + right_index = True, + validate = 'one_to_one') + mergedTable = mergedTable[['HISTOGRAM_DESCRIPTION', 'Adverse Reaction Reports', 'Deaths', 'Disabilities', 'Life Threatening Illnesses', 'Company', 'Severe reports', 'Lethality']].apply(merge, axis='columns') + mergedTable.name = 'HISTOGRAM_DESCRIPTION' + mergedTable = mergedTable.to_frame() + mergedTable['COUNTRY'] = histogramDescriptionTable['COUNTRY'] + return mergedTable diff --git a/src/BatchCodeTableIntoHistogramDescriptionTableMergerTest.py b/src/BatchCodeTableIntoHistogramDescriptionTableMergerTest.py new file mode 100644 index 00000000000..3d91505233a --- /dev/null +++ b/src/BatchCodeTableIntoHistogramDescriptionTableMergerTest.py @@ -0,0 +1,76 @@ +import unittest +from pandas.testing import assert_frame_equal +from BatchCodeTableIntoHistogramDescriptionTableMerger import BatchCodeTableIntoHistogramDescriptionTableMerger +from TestHelper import TestHelper +import pandas as pd +import numpy as np + +class BatchCodeTableIntoHistogramDescriptionTableMergerTest(unittest.TestCase): + + def test_mergeBatchCodeTableIntoHistogramDescriptionTable(self): + # Given + histogramDescriptionTable = TestHelper.createDataFrame( + columns = ['HISTOGRAM_DESCRIPTION', 'COUNTRY'], + data = [ [ + { + 'batchcode': '1808982', + 'histograms': [ + { + 'batchcodes': ['1808982', 'EW0175', 'FD1921'], + 'histogram': {'Blood pressure orthostatic abnormal': 5, 'Chest discomfort': 1} + }, + { + 'batchcodes': ['015M20A', '1808982'], + 'histogram': {'Chest discomfort': 2} + } + ] + }, + 'Global' + ] + ], + index = pd.Index( + name = 'VAX_LOT', + data = ['1808982'])) + + batchCodeTable = TestHelper.createDataFrame( + columns = ['Adverse Reaction Reports', 'Deaths', 'Disabilities', 'Life Threatening Illnesses', 'Company', 'Severe reports', 'Lethality'], + data = [ [2, 1, 2, 2, 'MODERNA', 2/2 * 100, np.nan]], + index = pd.Index( + name = 'VAX_LOT', + data = ['1808982'])) + + # When + mergedTable = BatchCodeTableIntoHistogramDescriptionTableMerger.mergeBatchCodeTableIntoHistogramDescriptionTable(batchCodeTable = batchCodeTable, histogramDescriptionTable = histogramDescriptionTable) + + # Then + assert_frame_equal( + mergedTable, + TestHelper.createDataFrame( + columns = ['HISTOGRAM_DESCRIPTION', 'COUNTRY'], + data = [ [ + { + 'batchcode': '1808982', + 'Adverse Reaction Reports': 2, + 'Deaths': 1, + 'Disabilities': 2, + 'Life Threatening Illnesses': 2, + 'Company': 'MODERNA', + 'Severe reports': 2/2 * 100, + 'Lethality': np.nan, + 'histograms': [ + { + 'batchcodes': ['1808982', 'EW0175', 'FD1921'], + 'histogram': {'Blood pressure orthostatic abnormal': 5, 'Chest discomfort': 1} + }, + { + 'batchcodes': ['015M20A', '1808982'], + 'histogram': {'Chest discomfort': 2} + } + ] + }, + 'Global' + ] + ], + index = pd.Index( + name = 'VAX_LOT', + data = ['1808982']))) diff --git a/src/HistogramFactoryAndPersister.py b/src/HistogramFactoryAndPersister.py index f74156503e4..1dba4e30f35 100644 --- a/src/HistogramFactoryAndPersister.py +++ b/src/HistogramFactoryAndPersister.py @@ -2,13 +2,23 @@ from HistogramFactory import createHistograms from HistogramPersister import saveHistograms from MultiIndexExploder import MultiIndexExploder from HistogramDescriptionTableFactory import HistogramDescriptionTableFactory +from BatchCodeTableIntoHistogramDescriptionTableMerger import BatchCodeTableIntoHistogramDescriptionTableMerger -def createAndSaveGlobalHistograms(symptomByBatchcodeTable): +def createAndSaveGlobalHistograms(symptomByBatchcodeTable, batchCodeTable): symptomByBatchcodeTable = symptomByBatchcodeTable.assign(COUNTRY = 'Global') dictByBatchcodeTable = createHistograms(symptomByBatchcodeTable) explodedTable = MultiIndexExploder.explodeMultiIndexOfTable(dictByBatchcodeTable) histogramDescriptionTable = HistogramDescriptionTableFactory.createHistogramDescriptionTable(explodedTable) + histogramDescriptionTable = BatchCodeTableIntoHistogramDescriptionTableMerger.mergeBatchCodeTableIntoHistogramDescriptionTable( + batchCodeTable = _rearrange(batchCodeTable), + histogramDescriptionTable = histogramDescriptionTable) for country, histogramDescriptionTableForCountry in histogramDescriptionTable.groupby('COUNTRY'): print(f'saving histograms for {country}') - saveHistograms(histogramDescriptionTableForCountry, country) \ No newline at end of file + saveHistograms(histogramDescriptionTableForCountry, country) + + +def _rearrange(batchCodeTable): + batchCodeTable = batchCodeTable.set_index('Batch') + batchCodeTable.index.rename('VAX_LOT', inplace = True) + return batchCodeTable \ No newline at end of file diff --git a/src/HowBadIsMyBatch.ipynb b/src/HowBadIsMyBatch.ipynb index 19b06bd06c8..39da46d2e36 100644 --- a/src/HowBadIsMyBatch.ipynb +++ b/src/HowBadIsMyBatch.ipynb @@ -112,16 +112,6 @@ "symptomByBatchcodeTable" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "23731536", - "metadata": {}, - "outputs": [], - "source": [ - "createAndSaveGlobalHistograms(symptomByBatchcodeTable)" - ] - }, { "cell_type": "code", "execution_count": null, @@ -149,13 +139,23 @@ { "cell_type": "code", "execution_count": null, - "id": "89d57e0f", + "id": "189a639e", "metadata": {}, "outputs": [], "source": [ "updateBatchCodeTableHtmlFile(batchCodeTable, batchCodeTableHtmlFile=\"../docs/batchCodeTable.html\")" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec2a692e", + "metadata": {}, + "outputs": [], + "source": [ + "createAndSaveGlobalHistograms(symptomByBatchcodeTable, batchCodeTable)" + ] + }, { "attachments": {}, "cell_type": "markdown", diff --git a/src/IOUtils.py b/src/IOUtils.py index d58cbe18221..4085d1e39e4 100644 --- a/src/IOUtils.py +++ b/src/IOUtils.py @@ -1,5 +1,5 @@ import os -import json +import simplejson as json class IOUtils: @@ -31,7 +31,7 @@ class IOUtils: def saveDictAsJson(dict, file): IOUtils.ensurePath(file) with open(file, 'w') as outfile: - json.dump(dict, outfile) + json.dump(dict, outfile, ignore_nan=True) @staticmethod def ensurePath(file):