diff --git a/src/DictByBatchcodeTable2DictConverter.py b/src/DictByBatchcodeTable2DictConverter.py deleted file mode 100644 index 510c072409f..00000000000 --- a/src/DictByBatchcodeTable2DictConverter.py +++ /dev/null @@ -1,25 +0,0 @@ -class DictByBatchcodeTable2DictConverter: - - @staticmethod - def convertDictByBatchcodeTable2Dict(dictByBatchcodeTable, batchcode): - return { - "batchcode": batchcode, - "histograms": DictByBatchcodeTable2DictConverter._getHistograms(dictByBatchcodeTable) - } - - @staticmethod - def _getHistograms(dictByBatchcodeTable): - dictByBatchcodeTable = dictByBatchcodeTable.rename(columns = { "SYMPTOM_COUNT_BY_VAX_LOT": "histogram" }) - DictByBatchcodeTable2DictConverter._addBatchcodesColumn(dictByBatchcodeTable) - return dictByBatchcodeTable.to_dict('records') - - @staticmethod - def _addBatchcodesColumn(dictByBatchcodeTable): - batchcodeColumns = dictByBatchcodeTable.index.names - dictByBatchcodeTable['batchcodes'] = dictByBatchcodeTable.reset_index()[batchcodeColumns].values.tolist() - dictByBatchcodeTable['batchcodes'] = dictByBatchcodeTable['batchcodes'].map(DictByBatchcodeTable2DictConverter._getNaNBatchcodes) - - @staticmethod - def _getNaNBatchcodes(batchcodes): - # FK-TODO: handle 'nan' everywhere correctly - return [batchcode for batchcode in batchcodes if batchcode != 'nan'] diff --git a/src/DictByBatchcodeTable2DictConverterTest.py b/src/DictByBatchcodeTable2DictConverterTest.py deleted file mode 100644 index a63279df731..00000000000 --- a/src/DictByBatchcodeTable2DictConverterTest.py +++ /dev/null @@ -1,53 +0,0 @@ -import unittest -import json -from DictByBatchcodeTable2DictConverter import DictByBatchcodeTable2DictConverter -from TestHelper import TestHelper -import pandas as pd - -class DictByBatchcodeTable2DictConverterTest(unittest.TestCase): - - def test_convertDictByBatchcodeTable2Json(self): - # Given - dictByBatchcodeTable = TestHelper.createDataFrame( - columns = ['SYMPTOM_COUNT_BY_VAX_LOT'], - data = [ [ - { - "Blood pressure orthostatic abnormal": 5, - "Chest discomfort": 1 - } - ], - [ - { - "Chest discomfort": 2 - } - ] - ], - index = pd.MultiIndex.from_tuples( - names = ['VAX_LOT1', 'VAX_LOT2', 'VAX_LOT3'], - tuples = [['1808982', 'EW0175', 'FD1921'], - ['015M20A', '1808982', 'nan']])) - - # When - dict = DictByBatchcodeTable2DictConverter.convertDictByBatchcodeTable2Dict(dictByBatchcodeTable, '1808982') - - # Then - self.assertEqual( - dict, - { - "batchcode": "1808982", - "histograms": [ - { - "batchcodes": ["1808982", "EW0175", "FD1921"], - "histogram": { - "Blood pressure orthostatic abnormal": 5, - "Chest discomfort": 1 - } - }, - { - "batchcodes": ["015M20A", "1808982"], - "histogram": { - "Chest discomfort": 2 - } - } - ] - }) diff --git a/src/HistogramDescriptionTableFactory.py b/src/HistogramDescriptionTableFactory.py new file mode 100644 index 00000000000..67d3b928920 --- /dev/null +++ b/src/HistogramDescriptionTableFactory.py @@ -0,0 +1,36 @@ +import pandas as pd + +class HistogramDescriptionTableFactory: + + @staticmethod + def createHistogramDescriptionTable(dictByBatchcodeTable): + histogramDescriptionTable = ( + dictByBatchcodeTable + .groupby('VAX_LOT_EXPLODED') + .agg(HistogramDescriptionTableFactory._getHistograms) + .drop('nan')) + histogramDescriptionTable = histogramDescriptionTable.rename(columns = { "SYMPTOM_COUNT_BY_VAX_LOT": "HISTOGRAM_DESCRIPTION" }) + histogramDescriptionTable.index.rename('VAX_LOT', inplace = True) + return histogramDescriptionTable + + @staticmethod + def _getHistograms(dictByBatchcodeTable): + dictByBatchcodeTable = dictByBatchcodeTable.to_frame() + dictByBatchcodeTable = dictByBatchcodeTable.rename(columns = { "SYMPTOM_COUNT_BY_VAX_LOT": "histogram" }) + HistogramDescriptionTableFactory._addBatchcodesColumn(dictByBatchcodeTable) + histograms = dictByBatchcodeTable.to_dict('records') + return { + "batchcode": dictByBatchcodeTable.index.get_level_values('VAX_LOT_EXPLODED')[0], + "histograms": histograms + } + + @staticmethod + def _addBatchcodesColumn(dictByBatchcodeTable): + batchcodeColumns = dictByBatchcodeTable.index.names.difference(['VAX_LOT_EXPLODED']) + dictByBatchcodeTable['batchcodes'] = dictByBatchcodeTable.reset_index()[batchcodeColumns].values.tolist() + dictByBatchcodeTable['batchcodes'] = dictByBatchcodeTable['batchcodes'].map(HistogramDescriptionTableFactory._getNaNBatchcodes) + + @staticmethod + def _getNaNBatchcodes(batchcodes): + # FK-TODO: handle 'nan' everywhere correctly + return [batchcode for batchcode in batchcodes if batchcode != 'nan'] diff --git a/src/HistogramDescriptionTableFactoryTest.py b/src/HistogramDescriptionTableFactoryTest.py new file mode 100644 index 00000000000..75fb43cb901 --- /dev/null +++ b/src/HistogramDescriptionTableFactoryTest.py @@ -0,0 +1,95 @@ +import unittest +from pandas.testing import assert_frame_equal +from HistogramDescriptionTableFactory import HistogramDescriptionTableFactory +from TestHelper import TestHelper +import pandas as pd + +class HistogramDescriptionTableFactoryTest(unittest.TestCase): + + def test_createHistogramDescriptionTable(self): + # Given + dictByBatchcodeTable = TestHelper.createDataFrame( + columns = ['SYMPTOM_COUNT_BY_VAX_LOT'], + data = [ [{"Blood pressure orthostatic abnormal": 5, "Chest discomfort": 1}], + [{"Blood pressure orthostatic abnormal": 5, "Chest discomfort": 1}], + [{"Blood pressure orthostatic abnormal": 5, "Chest discomfort": 1}], + + [{"Chest discomfort": 2}], + [{"Chest discomfort": 2}], + [{"Chest discomfort": 2}] + ], + index = pd.MultiIndex.from_tuples( + names = ['VAX_LOT_EXPLODED', 'VAX_LOT1', 'VAX_LOT2', 'VAX_LOT3'], + tuples = [['1808982', '1808982', 'EW0175', 'FD1921'], + ['EW0175', '1808982', 'EW0175', 'FD1921'], + ['FD1921', '1808982', 'EW0175', 'FD1921'], + + ['015M20A', '015M20A', '1808982', 'nan'], + ['1808982', '015M20A', '1808982', 'nan'], + ['nan', '015M20A', '1808982', 'nan']])) + + # When + histogramDescriptionTable = HistogramDescriptionTableFactory.createHistogramDescriptionTable(dictByBatchcodeTable) + + # Then + assert_frame_equal( + histogramDescriptionTable, + TestHelper.createDataFrame( + columns = ['HISTOGRAM_DESCRIPTION'], + data = [ [ + { + "batchcode": "1808982", + "histograms": [ + { + "batchcodes": ["1808982", "EW0175", "FD1921"], + "histogram": {"Blood pressure orthostatic abnormal": 5, "Chest discomfort": 1} + }, + { + "batchcodes": ["015M20A", "1808982"], + "histogram": {"Chest discomfort": 2} + } + ] + } + ], + [ + { + "batchcode": "EW0175", + "histograms": [ + { + "batchcodes": ["1808982", "EW0175", "FD1921"], + "histogram": {"Blood pressure orthostatic abnormal": 5, "Chest discomfort": 1} + } + ] + } + ], + [ + { + "batchcode": "FD1921", + "histograms": [ + { + "batchcodes": ["1808982", "EW0175", "FD1921"], + "histogram": {"Blood pressure orthostatic abnormal": 5, "Chest discomfort": 1} + } + ] + } + ], + [ + { + "batchcode": "015M20A", + "histograms": [ + { + "batchcodes": ["015M20A", "1808982"], + "histogram": {"Chest discomfort": 2} + } + ] + } + ] + ], + index = pd.Index( + name = 'VAX_LOT', + data = [ + '1808982', + 'EW0175', + 'FD1921', + '015M20A'])), + check_like = True)