From bf282e4dac1930858cf9646cd5d4c108ff392e97 Mon Sep 17 00:00:00 2001 From: frankknoll Date: Thu, 13 Apr 2023 00:47:09 +0200 Subject: [PATCH] refactoring --- ...ableIntoHistogramDescriptionTableMerger.py | 66 ++++++++++++------- ...IntoHistogramDescriptionTableMergerTest.py | 2 +- src/HistogramFactoryAndPersister.py | 2 +- src/Utils.py | 6 ++ 4 files changed, 50 insertions(+), 26 deletions(-) diff --git a/src/BatchCodeTableIntoHistogramDescriptionTableMerger.py b/src/BatchCodeTableIntoHistogramDescriptionTableMerger.py index 85f5dcc6792..2b260af2cba 100644 --- a/src/BatchCodeTableIntoHistogramDescriptionTableMerger.py +++ b/src/BatchCodeTableIntoHistogramDescriptionTableMerger.py @@ -1,30 +1,48 @@ import pandas as pd +from Utils import get_dictWithKeys_dictWithoutKeys + class BatchCodeTableIntoHistogramDescriptionTableMerger: - # FK-TODO: refactor - @staticmethod - def mergeBatchCodeTableIntoHistogramDescriptionTable(batchCodeTable, histogramDescriptionTable): - def merge(src): - dst = src['HISTOGRAM_DESCRIPTION'] - # dict_3 = {**dict_1, **dict_2} - dst['Adverse Reaction Reports'] = src['Adverse Reaction Reports'] - dst['Deaths'] = src['Deaths'] - dst['Disabilities'] = src['Disabilities'] - dst['Life Threatening Illnesses'] = src['Life Threatening Illnesses'] - dst['Company'] = src['Company'] - dst['Severe reports'] = src['Severe reports'] - dst['Lethality'] = src['Lethality'] - return dst - mergedTable = pd.merge( - histogramDescriptionTable, - batchCodeTable, - how = 'left', - left_index = True, - right_index = True, - validate = 'one_to_one') - mergedTable = mergedTable[['HISTOGRAM_DESCRIPTION', 'Adverse Reaction Reports', 'Deaths', 'Disabilities', 'Life Threatening Illnesses', 'Company', 'Severe reports', 'Lethality']].apply(merge, axis='columns') - mergedTable.name = 'HISTOGRAM_DESCRIPTION' - mergedTable = mergedTable.to_frame() + def __init__(self): + self.HISTOGRAM_DESCRIPTION_columnName = 'HISTOGRAM_DESCRIPTION' + + def mergeBatchCodeTableIntoHistogramDescriptionTable(self, batchCodeTable, histogramDescriptionTable): + mergedTable = self._combineTables(batchCodeTable, histogramDescriptionTable) + mergedTable = self._merge_columns_into_HISTOGRAM_DESCRIPTION(mergedTable) mergedTable['COUNTRY'] = histogramDescriptionTable['COUNTRY'] return mergedTable + + def _combineTables(self, batchCodeTable, histogramDescriptionTable): + mergedTable = pd.merge( + histogramDescriptionTable, + batchCodeTable, + how='left', + left_index=True, + right_index=True, + validate='one_to_one') + return mergedTable[ + [ + self.HISTOGRAM_DESCRIPTION_columnName, + 'Adverse Reaction Reports', + 'Deaths', + 'Disabilities', + 'Life Threatening Illnesses', + 'Company', + 'Severe reports', + 'Lethality' + ]] + + def _merge_columns_into_HISTOGRAM_DESCRIPTION(self, table): + table = table.apply( + self.__merge_columns_into_HISTOGRAM_DESCRIPTION, + axis='columns') + table.name = self.HISTOGRAM_DESCRIPTION_columnName + return table.to_frame() + + def __merge_columns_into_HISTOGRAM_DESCRIPTION(self, src): + dict_with_HISTOGRAM_DESCRIPTION, dict_without_HISTOGRAM_DESCRIPTION = get_dictWithKeys_dictWithoutKeys( + src.to_dict(), + {self.HISTOGRAM_DESCRIPTION_columnName}) + HISTOGRAM_DESCRIPTION = dict_with_HISTOGRAM_DESCRIPTION[self.HISTOGRAM_DESCRIPTION_columnName] + return {**HISTOGRAM_DESCRIPTION, **dict_without_HISTOGRAM_DESCRIPTION} diff --git a/src/BatchCodeTableIntoHistogramDescriptionTableMergerTest.py b/src/BatchCodeTableIntoHistogramDescriptionTableMergerTest.py index 3d91505233a..7c400ddb61f 100644 --- a/src/BatchCodeTableIntoHistogramDescriptionTableMergerTest.py +++ b/src/BatchCodeTableIntoHistogramDescriptionTableMergerTest.py @@ -40,7 +40,7 @@ class BatchCodeTableIntoHistogramDescriptionTableMergerTest(unittest.TestCase): data = ['1808982'])) # When - mergedTable = BatchCodeTableIntoHistogramDescriptionTableMerger.mergeBatchCodeTableIntoHistogramDescriptionTable(batchCodeTable = batchCodeTable, histogramDescriptionTable = histogramDescriptionTable) + mergedTable = BatchCodeTableIntoHistogramDescriptionTableMerger().mergeBatchCodeTableIntoHistogramDescriptionTable(batchCodeTable = batchCodeTable, histogramDescriptionTable = histogramDescriptionTable) # Then assert_frame_equal( diff --git a/src/HistogramFactoryAndPersister.py b/src/HistogramFactoryAndPersister.py index 1dba4e30f35..39864bfc1d5 100644 --- a/src/HistogramFactoryAndPersister.py +++ b/src/HistogramFactoryAndPersister.py @@ -10,7 +10,7 @@ def createAndSaveGlobalHistograms(symptomByBatchcodeTable, batchCodeTable): dictByBatchcodeTable = createHistograms(symptomByBatchcodeTable) explodedTable = MultiIndexExploder.explodeMultiIndexOfTable(dictByBatchcodeTable) histogramDescriptionTable = HistogramDescriptionTableFactory.createHistogramDescriptionTable(explodedTable) - histogramDescriptionTable = BatchCodeTableIntoHistogramDescriptionTableMerger.mergeBatchCodeTableIntoHistogramDescriptionTable( + histogramDescriptionTable = BatchCodeTableIntoHistogramDescriptionTableMerger().mergeBatchCodeTableIntoHistogramDescriptionTable( batchCodeTable = _rearrange(batchCodeTable), histogramDescriptionTable = histogramDescriptionTable) for country, histogramDescriptionTableForCountry in histogramDescriptionTable.groupby('COUNTRY'): diff --git a/src/Utils.py b/src/Utils.py index 00ae51a7ab5..fd181e2eebd 100644 --- a/src/Utils.py +++ b/src/Utils.py @@ -8,3 +8,9 @@ def fillLst(lst, desiredLen, fillValue): def flatten(tuples): return [item for tuple in tuples for item in tuple] + + +def get_dictWithKeys_dictWithoutKeys(dict, keys): + dictWithKeys = {key: dict[key] for key in keys} + dictWithoutKeys = {key: dict[key] for key in dict.keys() - keys} + return dictWithKeys, dictWithoutKeys