From 05b75756c9e3d82566e7112cb72400fe0b613bb5 Mon Sep 17 00:00:00 2001 From: frankknoll Date: Wed, 14 Jun 2023 01:35:03 +0200 Subject: [PATCH] refining CountryColumnsMergerTest --- src/CountryColumnsMerger.py | 10 ++- src/CountryColumnsMergerTest.py | 108 +++++++++++++++++++++++++++++--- 2 files changed, 108 insertions(+), 10 deletions(-) diff --git a/src/CountryColumnsMerger.py b/src/CountryColumnsMerger.py index 15150fdc160..3d4a022cf9c 100644 --- a/src/CountryColumnsMerger.py +++ b/src/CountryColumnsMerger.py @@ -7,7 +7,7 @@ class CountryColumnsMerger: def mergeCountryColumnOfSrcIntoDst(src: pd.DataFrame, dst: pd.DataFrame): merged = pd.merge( dst, - src, + CountryColumnsMerger._drop_duplicates(src['COUNTRY']), how = 'left', left_index = True, right_index = True, @@ -21,10 +21,14 @@ class CountryColumnsMerger: .astype('string')) return merged.drop(columns = ['COUNTRY_dst', 'COUNTRY_src']) + @staticmethod + def _drop_duplicates(df): + return df[~df.index.duplicated(keep = 'first')] + @staticmethod def _mergeSrcIntoDst(src, dst): - if (CountryColumnsMerger._notEqual(src, dst)) or (pd.isnull(src) and pd.isnull(dst)): - raise Exception() + if CountryColumnsMerger._notEqual(src, dst): + raise Exception(f'{src} != {dst}') if pd.isnull(dst) and not pd.isnull(src): return src diff --git a/src/CountryColumnsMergerTest.py b/src/CountryColumnsMergerTest.py index c718bbaec27..1fe53f5eefd 100644 --- a/src/CountryColumnsMergerTest.py +++ b/src/CountryColumnsMergerTest.py @@ -55,14 +55,108 @@ class CountryColumnsMergerTest(unittest.TestCase): dtypes = {'COUNTRY': 'string'}), check_like = True) - def test_shouldNotMergeCountryColumnOfSrcIntoDst_non_unique(self): - self.shouldNotMergeCountryColumnOfSrcIntoDst(val_dst = 'United Kingdom', val_src = 'Germany') - - def test_shouldNotMergeCountryColumnOfSrcIntoDst3(self): - self.shouldNotMergeCountryColumnOfSrcIntoDst(val_dst = None, val_src = None) - - def shouldNotMergeCountryColumnOfSrcIntoDst(self, val_dst, val_src): + def test_mergeCountryColumnOfSrcIntoDst_non_unique_index(self): # Given + src_val = 'Germany' + dst_val = None + dst = TestHelper.createDataFrame( + columns = ['COUNTRY', 'TestColumn'], + data = [ ['United Kingdom', 'test1'], + ['France', 'test2'], + [dst_val, 'test3']], + index = pd.Index( + name = 'VAERS_ID', + data = [ + '4711', + '0815', + '123']), + dtypes = {'COUNTRY': 'string'}) + + src = TestHelper.createDataFrame( + columns = ['COUNTRY'], + data = [ ['United Kingdom'], + [src_val], + [src_val]], + index = pd.Index( + name = 'VAERS_ID', + data = [ + '4711', + '123', + '123']), + dtypes = {'COUNTRY': 'string'}) + + # When + merged = CountryColumnsMerger.mergeCountryColumnOfSrcIntoDst(src, dst) + + # Then + assert_frame_equal( + merged, + TestHelper.createDataFrame( + columns = ['COUNTRY', 'TestColumn'], + data = [ ['United Kingdom', 'test1'], + ['France', 'test2'], + [src_val, 'test3']], + index = pd.Index( + name = 'VAERS_ID', + data = [ + '4711', + '0815', + '123']), + dtypes = {'COUNTRY': 'string'}), + check_like = True) + + def test_mergeCountryColumnOfSrcIntoDst_None_None(self): + # Given + src_val = None + dst_val = None + dst = TestHelper.createDataFrame( + columns = ['COUNTRY', 'TestColumn'], + data = [ ['United Kingdom', 'test1'], + ['France', 'test2'], + [dst_val, 'test3']], + index = pd.Index( + name = 'VAERS_ID', + data = [ + '4711', + '0815', + '123']), + dtypes = {'COUNTRY': 'string'}) + + src = TestHelper.createDataFrame( + columns = ['COUNTRY'], + data = [ ['United Kingdom'], + [src_val]], + index = pd.Index( + name = 'VAERS_ID', + data = [ + '4711', + '123']), + dtypes = {'COUNTRY': 'string'}) + + # When + merged = CountryColumnsMerger.mergeCountryColumnOfSrcIntoDst(src, dst) + + # Then + assert_frame_equal( + merged, + TestHelper.createDataFrame( + columns = ['COUNTRY', 'TestColumn'], + data = [ ['United Kingdom', 'test1'], + ['France', 'test2'], + [src_val, 'test3']], + index = pd.Index( + name = 'VAERS_ID', + data = [ + '4711', + '0815', + '123']), + dtypes = {'COUNTRY': 'string'}), + check_like = True) + + def test_shouldNotMergeCountryColumnOfSrcIntoDst_non_unique(self): + # Given + val_dst = 'United Kingdom' + val_src = 'Germany' dst = TestHelper.createDataFrame( columns = ['COUNTRY'], data = [ [val_dst]],