From 11b2ea4eeb70504953b6bf32c6d6f80bf8f606a5 Mon Sep 17 00:00:00 2001 From: frankknoll Date: Tue, 13 Jun 2023 23:19:58 +0200 Subject: [PATCH] refining CountryColumnsMergerTest --- src/CountryColumnsMerger.py | 25 +++++++++++++----- src/CountryColumnsMergerTest.py | 46 ++++++++++++++++++++++++++++----- 2 files changed, 57 insertions(+), 14 deletions(-) diff --git a/src/CountryColumnsMerger.py b/src/CountryColumnsMerger.py index 963e0c9b55d..157675b3c85 100644 --- a/src/CountryColumnsMerger.py +++ b/src/CountryColumnsMerger.py @@ -5,12 +5,6 @@ class CountryColumnsMerger: @staticmethod def mergeCountryColumnOfSrcIntoDst(src: pd.DataFrame, dst: pd.DataFrame): - def merge(series): - if pd.isnull(series['COUNTRY_dst']): - return series['COUNTRY_src'] - else: - return series['COUNTRY_dst'] - merged = pd.merge( dst, src, @@ -18,6 +12,23 @@ class CountryColumnsMerger: left_index = True, right_index = True, suffixes=('_dst', '_src')) - merged['COUNTRY'] = merged.apply(merge, axis = 'columns').astype('string') + merged['COUNTRY'] = (merged + .apply( + lambda series: CountryColumnsMerger._merge( + series['COUNTRY_src'], + series['COUNTRY_dst']), + axis = 'columns') + .astype('string')) return merged.drop(columns = ['COUNTRY_dst', 'COUNTRY_src']) + + @staticmethod + def _merge(src, dst): + if (CountryColumnsMerger._isNonUnique(src, dst)) or (pd.isnull(src) and pd.isnull(dst)): + raise Exception() + + return src if not pd.isnull(src) and pd.isnull(dst) else dst + + @staticmethod + def _isNonUnique(src, dst): + return not pd.isnull(src) and not pd.isnull(dst) and src != dst \ No newline at end of file diff --git a/src/CountryColumnsMergerTest.py b/src/CountryColumnsMergerTest.py index b882078b976..ff0011c8f7b 100644 --- a/src/CountryColumnsMergerTest.py +++ b/src/CountryColumnsMergerTest.py @@ -7,13 +7,15 @@ from CountryColumnsMerger import CountryColumnsMerger class CountryColumnsMergerTest(unittest.TestCase): - def test_mergeCountryColumns(self): + def test_mergeCountryColumnOfSrcIntoDst(self): # Given - unknown = TestHelper.createDataFrame( + src_val = 'Germany' + dst_val = None + dst = TestHelper.createDataFrame( columns = ['COUNTRY'], data = [ ['United Kingdom'], ['France'], - [None]], + [dst_val]], index = pd.Index( name = 'VAERS_ID', data = [ @@ -22,10 +24,10 @@ class CountryColumnsMergerTest(unittest.TestCase): '123']), dtypes = {'COUNTRY': 'string'}) - known = TestHelper.createDataFrame( + src = TestHelper.createDataFrame( columns = ['COUNTRY'], data = [ ['United Kingdom'], - ['Germany']], + [src_val]], index = pd.Index( name = 'VAERS_ID', data = [ @@ -34,7 +36,7 @@ class CountryColumnsMergerTest(unittest.TestCase): dtypes = {'COUNTRY': 'string'}) # When - merged = CountryColumnsMerger.mergeCountryColumnOfSrcIntoDst(src = known, dst = unknown) + merged = CountryColumnsMerger.mergeCountryColumnOfSrcIntoDst(src = src, dst = dst) # Then assert_frame_equal( @@ -43,7 +45,7 @@ class CountryColumnsMergerTest(unittest.TestCase): columns = ['COUNTRY'], data = [ ['United Kingdom'], ['France'], - ['Germany']], + [src_val]], index = pd.Index( name = 'VAERS_ID', data = [ @@ -51,3 +53,33 @@ class CountryColumnsMergerTest(unittest.TestCase): '0815', '123']), dtypes = {'COUNTRY': 'string'})) + + def test_shouldNotMergeCountryColumnOfSrcIntoDst_non_unique(self): + self._mergeCountryColumnOfSrcIntoDst(val_dst = 'United Kingdom', val_src = 'Germany') + + def test_shouldNotMergeCountryColumnOfSrcIntoDst3(self): + self._mergeCountryColumnOfSrcIntoDst(val_dst = None, val_src = None) + + def _mergeCountryColumnOfSrcIntoDst(self, val_dst, val_src): + # Given + dst = TestHelper.createDataFrame( + columns = ['COUNTRY'], + data = [ [val_dst]], + index = pd.Index( + name = 'VAERS_ID', + data = [ + '4711']), + dtypes = {'COUNTRY': 'string'}) + + src = TestHelper.createDataFrame( + columns = ['COUNTRY'], + data = [ [val_src]], + index = pd.Index( + name = 'VAERS_ID', + data = [ + '4711']), + dtypes = {'COUNTRY': 'string'}) + + # When && Then + with self.assertRaises(Exception): + CountryColumnsMerger.mergeCountryColumnOfSrcIntoDst(src = src, dst = dst)