refining CountryColumnsMergerTest

This commit is contained in:
frankknoll
2023-06-14 01:35:03 +02:00
parent db5d2d6df0
commit 05b75756c9
2 changed files with 108 additions and 10 deletions

View File

@@ -7,7 +7,7 @@ class CountryColumnsMerger:
def mergeCountryColumnOfSrcIntoDst(src: pd.DataFrame, dst: pd.DataFrame):
merged = pd.merge(
dst,
src,
CountryColumnsMerger._drop_duplicates(src['COUNTRY']),
how = 'left',
left_index = True,
right_index = True,
@@ -21,10 +21,14 @@ class CountryColumnsMerger:
.astype('string'))
return merged.drop(columns = ['COUNTRY_dst', 'COUNTRY_src'])
@staticmethod
def _drop_duplicates(df):
return df[~df.index.duplicated(keep = 'first')]
@staticmethod
def _mergeSrcIntoDst(src, dst):
if (CountryColumnsMerger._notEqual(src, dst)) or (pd.isnull(src) and pd.isnull(dst)):
raise Exception()
if CountryColumnsMerger._notEqual(src, dst):
raise Exception(f'{src} != {dst}')
if pd.isnull(dst) and not pd.isnull(src):
return src

View File

@@ -55,14 +55,108 @@ class CountryColumnsMergerTest(unittest.TestCase):
dtypes = {'COUNTRY': 'string'}),
check_like = True)
def test_shouldNotMergeCountryColumnOfSrcIntoDst_non_unique(self):
self.shouldNotMergeCountryColumnOfSrcIntoDst(val_dst = 'United Kingdom', val_src = 'Germany')
def test_shouldNotMergeCountryColumnOfSrcIntoDst3(self):
self.shouldNotMergeCountryColumnOfSrcIntoDst(val_dst = None, val_src = None)
def shouldNotMergeCountryColumnOfSrcIntoDst(self, val_dst, val_src):
def test_mergeCountryColumnOfSrcIntoDst_non_unique_index(self):
# Given
src_val = 'Germany'
dst_val = None
dst = TestHelper.createDataFrame(
columns = ['COUNTRY', 'TestColumn'],
data = [ ['United Kingdom', 'test1'],
['France', 'test2'],
[dst_val, 'test3']],
index = pd.Index(
name = 'VAERS_ID',
data = [
'4711',
'0815',
'123']),
dtypes = {'COUNTRY': 'string'})
src = TestHelper.createDataFrame(
columns = ['COUNTRY'],
data = [ ['United Kingdom'],
[src_val],
[src_val]],
index = pd.Index(
name = 'VAERS_ID',
data = [
'4711',
'123',
'123']),
dtypes = {'COUNTRY': 'string'})
# When
merged = CountryColumnsMerger.mergeCountryColumnOfSrcIntoDst(src, dst)
# Then
assert_frame_equal(
merged,
TestHelper.createDataFrame(
columns = ['COUNTRY', 'TestColumn'],
data = [ ['United Kingdom', 'test1'],
['France', 'test2'],
[src_val, 'test3']],
index = pd.Index(
name = 'VAERS_ID',
data = [
'4711',
'0815',
'123']),
dtypes = {'COUNTRY': 'string'}),
check_like = True)
def test_mergeCountryColumnOfSrcIntoDst_None_None(self):
# Given
src_val = None
dst_val = None
dst = TestHelper.createDataFrame(
columns = ['COUNTRY', 'TestColumn'],
data = [ ['United Kingdom', 'test1'],
['France', 'test2'],
[dst_val, 'test3']],
index = pd.Index(
name = 'VAERS_ID',
data = [
'4711',
'0815',
'123']),
dtypes = {'COUNTRY': 'string'})
src = TestHelper.createDataFrame(
columns = ['COUNTRY'],
data = [ ['United Kingdom'],
[src_val]],
index = pd.Index(
name = 'VAERS_ID',
data = [
'4711',
'123']),
dtypes = {'COUNTRY': 'string'})
# When
merged = CountryColumnsMerger.mergeCountryColumnOfSrcIntoDst(src, dst)
# Then
assert_frame_equal(
merged,
TestHelper.createDataFrame(
columns = ['COUNTRY', 'TestColumn'],
data = [ ['United Kingdom', 'test1'],
['France', 'test2'],
[src_val, 'test3']],
index = pd.Index(
name = 'VAERS_ID',
data = [
'4711',
'0815',
'123']),
dtypes = {'COUNTRY': 'string'}),
check_like = True)
def test_shouldNotMergeCountryColumnOfSrcIntoDst_non_unique(self):
# Given
val_dst = 'United Kingdom'
val_src = 'Germany'
dst = TestHelper.createDataFrame(
columns = ['COUNTRY'],
data = [ [val_dst]],