From 099fdd52aa5647ab591d06590acc6688b347b6da Mon Sep 17 00:00:00 2001 From: frankknoll Date: Wed, 19 Apr 2023 09:39:29 +0200 Subject: [PATCH] minifying histogram JSON files --- docs/HistoDescrsProvider.js | 9 +--- ...ableIntoHistogramDescriptionTableMerger.py | 4 +- ...IntoHistogramDescriptionTableMergerTest.py | 2 - src/HistogramDescriptionTableSelector.py | 25 +++++++++ src/HistogramDescriptionTableSelectorTest.py | 51 +++++++++++++++++++ src/HistogramFactoryAndPersister.py | 2 + src/HowBadIsMyBatch.ipynb | 2 +- src/help.txt | 1 - 8 files changed, 81 insertions(+), 15 deletions(-) create mode 100644 src/HistogramDescriptionTableSelector.py create mode 100644 src/HistogramDescriptionTableSelectorTest.py diff --git a/docs/HistoDescrsProvider.js b/docs/HistoDescrsProvider.js index db099f508e5..2e8624b18bf 100644 --- a/docs/HistoDescrsProvider.js +++ b/docs/HistoDescrsProvider.js @@ -1,13 +1,6 @@ class HistoDescrsProvider { static getHistoDescrs(batchcode) { - return fetch(`data/histograms/Global/${batchcode}.json`) - .then(response => response.json()) - .then(histoDescrs => { - histoDescrs.histograms.sort((histoDescr1, histoDescr2) => histoDescr1.batchcodes.length - histoDescr2.batchcodes.length); - histoDescrs.histogram = histoDescrs.histograms[0].histogram; - delete histoDescrs.histograms; - return histoDescrs; - }); + return fetch(`data/histograms/Global/${batchcode}.json`).then(response => response.json()) } } \ No newline at end of file diff --git a/src/BatchCodeTableIntoHistogramDescriptionTableMerger.py b/src/BatchCodeTableIntoHistogramDescriptionTableMerger.py index 2b260af2cba..654a8ce7b7c 100644 --- a/src/BatchCodeTableIntoHistogramDescriptionTableMerger.py +++ b/src/BatchCodeTableIntoHistogramDescriptionTableMerger.py @@ -28,9 +28,7 @@ class BatchCodeTableIntoHistogramDescriptionTableMerger: 'Deaths', 'Disabilities', 'Life Threatening Illnesses', - 'Company', - 'Severe reports', - 'Lethality' + 'Company' ]] def _merge_columns_into_HISTOGRAM_DESCRIPTION(self, table): diff --git a/src/BatchCodeTableIntoHistogramDescriptionTableMergerTest.py b/src/BatchCodeTableIntoHistogramDescriptionTableMergerTest.py index 7c400ddb61f..8a5e837b7a5 100644 --- a/src/BatchCodeTableIntoHistogramDescriptionTableMergerTest.py +++ b/src/BatchCodeTableIntoHistogramDescriptionTableMergerTest.py @@ -55,8 +55,6 @@ class BatchCodeTableIntoHistogramDescriptionTableMergerTest(unittest.TestCase): 'Disabilities': 2, 'Life Threatening Illnesses': 2, 'Company': 'MODERNA', - 'Severe reports': 2/2 * 100, - 'Lethality': np.nan, 'histograms': [ { 'batchcodes': ['1808982', 'EW0175', 'FD1921'], diff --git a/src/HistogramDescriptionTableSelector.py b/src/HistogramDescriptionTableSelector.py new file mode 100644 index 00000000000..278551b5726 --- /dev/null +++ b/src/HistogramDescriptionTableSelector.py @@ -0,0 +1,25 @@ +import warnings + +class HistogramDescriptionTableSelector: + + @staticmethod + def selectHistogramsWithShortestBatchcodeCombinations(histogramDescriptionTable): + histogramDescriptionTable['HISTOGRAM_DESCRIPTION'] = histogramDescriptionTable['HISTOGRAM_DESCRIPTION'].map(HistogramDescriptionTableSelector._selectHistogramWithShortestBatchcodeCombination) + return histogramDescriptionTable + + @staticmethod + def _selectHistogramWithShortestBatchcodeCombination(histoDescr): + return { + "batchcode": histoDescr["batchcode"], + "histogram": HistogramDescriptionTableSelector._getHistogramWithShortestBatchcodeCombination(histoDescr) + } + + @staticmethod + def _getHistogramWithShortestBatchcodeCombination(histoDescr): + histogramsSortedByShortestBatchcodeCombination = sorted( + histoDescr["histograms"], + key = lambda histogram: len(histogram["batchcodes"])) + histogramWithShortestBatchcodeCombination = histogramsSortedByShortestBatchcodeCombination[0] + if len(histogramWithShortestBatchcodeCombination["batchcodes"]) != 1: + warnings.warn(f"batchcode {histoDescr['batchcode']} has non unique batchcode combination {histogramWithShortestBatchcodeCombination['batchcodes']} for it's histogram") + return histogramWithShortestBatchcodeCombination["histogram"] diff --git a/src/HistogramDescriptionTableSelectorTest.py b/src/HistogramDescriptionTableSelectorTest.py new file mode 100644 index 00000000000..7b344f6da9f --- /dev/null +++ b/src/HistogramDescriptionTableSelectorTest.py @@ -0,0 +1,51 @@ +import unittest +from pandas.testing import assert_frame_equal +from HistogramDescriptionTableSelector import HistogramDescriptionTableSelector +from TestHelper import TestHelper +import pandas as pd + +class HistogramDescriptionTableSelectorTest(unittest.TestCase): + + def test_selectHistogramsWithShortestBatchcodeCombinations(self): + # Given + histogramDescriptionTable = TestHelper.createDataFrame( + columns = ['HISTOGRAM_DESCRIPTION'], + data = [ [ + { + "batchcode": "1808982", + "histograms": [ + { + "batchcodes": ["1808982", "EW0175", "FD1921"], + "histogram": {"Blood pressure orthostatic abnormal": 5, "Chest discomfort": 1} + }, + { + "batchcodes": ["015M20A", "1808982"], + "histogram": {"Chest discomfort": 2} + } + ] + } + ] + ], + index = pd.Index( + name = 'VAX_LOT', + data = ['1808982'])) + + # When + histogramsWithShortestBatchcodeCombinationsTable = HistogramDescriptionTableSelector.selectHistogramsWithShortestBatchcodeCombinations(histogramDescriptionTable) + + # Then + assert_frame_equal( + histogramsWithShortestBatchcodeCombinationsTable, + TestHelper.createDataFrame( + columns = ['HISTOGRAM_DESCRIPTION'], + data = [ [ + { + "batchcode": "1808982", + "histogram": {"Chest discomfort": 2} + } + ] + ], + index = pd.Index( + name = 'VAX_LOT', + data = ['1808982']))) + \ No newline at end of file diff --git a/src/HistogramFactoryAndPersister.py b/src/HistogramFactoryAndPersister.py index 39864bfc1d5..e04a8d15cfc 100644 --- a/src/HistogramFactoryAndPersister.py +++ b/src/HistogramFactoryAndPersister.py @@ -2,6 +2,7 @@ from HistogramFactory import createHistograms from HistogramPersister import saveHistograms from MultiIndexExploder import MultiIndexExploder from HistogramDescriptionTableFactory import HistogramDescriptionTableFactory +from HistogramDescriptionTableSelector import HistogramDescriptionTableSelector from BatchCodeTableIntoHistogramDescriptionTableMerger import BatchCodeTableIntoHistogramDescriptionTableMerger @@ -10,6 +11,7 @@ def createAndSaveGlobalHistograms(symptomByBatchcodeTable, batchCodeTable): dictByBatchcodeTable = createHistograms(symptomByBatchcodeTable) explodedTable = MultiIndexExploder.explodeMultiIndexOfTable(dictByBatchcodeTable) histogramDescriptionTable = HistogramDescriptionTableFactory.createHistogramDescriptionTable(explodedTable) + histogramDescriptionTable = HistogramDescriptionTableSelector.selectHistogramsWithShortestBatchcodeCombinations(histogramDescriptionTable) histogramDescriptionTable = BatchCodeTableIntoHistogramDescriptionTableMerger().mergeBatchCodeTableIntoHistogramDescriptionTable( batchCodeTable = _rearrange(batchCodeTable), histogramDescriptionTable = histogramDescriptionTable) diff --git a/src/HowBadIsMyBatch.ipynb b/src/HowBadIsMyBatch.ipynb index 16658a2fb56..939616b2973 100644 --- a/src/HowBadIsMyBatch.ipynb +++ b/src/HowBadIsMyBatch.ipynb @@ -120,7 +120,7 @@ "outputs": [], "source": [ "batchCodeTable = createGlobalBatchCodeTable(\n", - " deleteEntriesWithADRsLessThanOrEqual = 1,\n", + " deleteEntriesWithADRsLessThanOrEqual = 2,\n", " minADRsForLethality = 100,\n", " batchCodeTableFactory = BatchCodeTableFactory(internationalVaersCovid19))\n", "batchCodeTable" diff --git a/src/help.txt b/src/help.txt index 29595fb3df5..72fc8945b8e 100644 --- a/src/help.txt +++ b/src/help.txt @@ -4,7 +4,6 @@ FK-FIXME: FK-TODO: - add google captcha to batchCodeTable.html -- aus den JSON-Dateien lethality und severity entfernen. anacron job: sudo cp src/intensivstationen_howbadismybatch.sh /etc/cron.daily/intensivstationen_howbadismybatch