diff --git a/src/CountryColumnsMerger.py b/src/CountryColumnsMerger.py new file mode 100644 index 00000000000..963e0c9b55d --- /dev/null +++ b/src/CountryColumnsMerger.py @@ -0,0 +1,23 @@ +import pandas as pd + + +class CountryColumnsMerger: + + @staticmethod + def mergeCountryColumnOfSrcIntoDst(src: pd.DataFrame, dst: pd.DataFrame): + def merge(series): + if pd.isnull(series['COUNTRY_dst']): + return series['COUNTRY_src'] + else: + return series['COUNTRY_dst'] + + merged = pd.merge( + dst, + src, + how = 'left', + left_index = True, + right_index = True, + suffixes=('_dst', '_src')) + merged['COUNTRY'] = merged.apply(merge, axis = 'columns').astype('string') + return merged.drop(columns = ['COUNTRY_dst', 'COUNTRY_src']) + \ No newline at end of file diff --git a/src/CountryColumnsMergerTest.py b/src/CountryColumnsMergerTest.py new file mode 100644 index 00000000000..b882078b976 --- /dev/null +++ b/src/CountryColumnsMergerTest.py @@ -0,0 +1,53 @@ +import unittest +from TestHelper import TestHelper +from pandas.testing import assert_frame_equal +import pandas as pd +from CountryColumnsMerger import CountryColumnsMerger + + +class CountryColumnsMergerTest(unittest.TestCase): + + def test_mergeCountryColumns(self): + # Given + unknown = TestHelper.createDataFrame( + columns = ['COUNTRY'], + data = [ ['United Kingdom'], + ['France'], + [None]], + index = pd.Index( + name = 'VAERS_ID', + data = [ + '4711', + '0815', + '123']), + dtypes = {'COUNTRY': 'string'}) + + known = TestHelper.createDataFrame( + columns = ['COUNTRY'], + data = [ ['United Kingdom'], + ['Germany']], + index = pd.Index( + name = 'VAERS_ID', + data = [ + '4711', + '123']), + dtypes = {'COUNTRY': 'string'}) + + # When + merged = CountryColumnsMerger.mergeCountryColumnOfSrcIntoDst(src = known, dst = unknown) + + # Then + assert_frame_equal( + merged, + TestHelper.createDataFrame( + columns = ['COUNTRY'], + data = [ ['United Kingdom'], + ['France'], + ['Germany']], + index = pd.Index( + name = 'VAERS_ID', + data = [ + '4711', + '0815', + '123']), + dtypes = {'COUNTRY': 'string'}))