refining CountryColumnsMergerTest

This commit is contained in:
frankknoll
2023-06-13 23:19:58 +02:00
parent 181d280a6e
commit 11b2ea4eeb
2 changed files with 57 additions and 14 deletions

View File

@@ -5,12 +5,6 @@ class CountryColumnsMerger:
@staticmethod
def mergeCountryColumnOfSrcIntoDst(src: pd.DataFrame, dst: pd.DataFrame):
def merge(series):
if pd.isnull(series['COUNTRY_dst']):
return series['COUNTRY_src']
else:
return series['COUNTRY_dst']
merged = pd.merge(
dst,
src,
@@ -18,6 +12,23 @@ class CountryColumnsMerger:
left_index = True,
right_index = True,
suffixes=('_dst', '_src'))
merged['COUNTRY'] = merged.apply(merge, axis = 'columns').astype('string')
merged['COUNTRY'] = (merged
.apply(
lambda series: CountryColumnsMerger._merge(
series['COUNTRY_src'],
series['COUNTRY_dst']),
axis = 'columns')
.astype('string'))
return merged.drop(columns = ['COUNTRY_dst', 'COUNTRY_src'])
@staticmethod
def _merge(src, dst):
if (CountryColumnsMerger._isNonUnique(src, dst)) or (pd.isnull(src) and pd.isnull(dst)):
raise Exception()
return src if not pd.isnull(src) and pd.isnull(dst) else dst
@staticmethod
def _isNonUnique(src, dst):
return not pd.isnull(src) and not pd.isnull(dst) and src != dst

View File

@@ -7,13 +7,15 @@ from CountryColumnsMerger import CountryColumnsMerger
class CountryColumnsMergerTest(unittest.TestCase):
def test_mergeCountryColumns(self):
def test_mergeCountryColumnOfSrcIntoDst(self):
# Given
unknown = TestHelper.createDataFrame(
src_val = 'Germany'
dst_val = None
dst = TestHelper.createDataFrame(
columns = ['COUNTRY'],
data = [ ['United Kingdom'],
['France'],
[None]],
[dst_val]],
index = pd.Index(
name = 'VAERS_ID',
data = [
@@ -22,10 +24,10 @@ class CountryColumnsMergerTest(unittest.TestCase):
'123']),
dtypes = {'COUNTRY': 'string'})
known = TestHelper.createDataFrame(
src = TestHelper.createDataFrame(
columns = ['COUNTRY'],
data = [ ['United Kingdom'],
['Germany']],
[src_val]],
index = pd.Index(
name = 'VAERS_ID',
data = [
@@ -34,7 +36,7 @@ class CountryColumnsMergerTest(unittest.TestCase):
dtypes = {'COUNTRY': 'string'})
# When
merged = CountryColumnsMerger.mergeCountryColumnOfSrcIntoDst(src = known, dst = unknown)
merged = CountryColumnsMerger.mergeCountryColumnOfSrcIntoDst(src = src, dst = dst)
# Then
assert_frame_equal(
@@ -43,7 +45,7 @@ class CountryColumnsMergerTest(unittest.TestCase):
columns = ['COUNTRY'],
data = [ ['United Kingdom'],
['France'],
['Germany']],
[src_val]],
index = pd.Index(
name = 'VAERS_ID',
data = [
@@ -51,3 +53,33 @@ class CountryColumnsMergerTest(unittest.TestCase):
'0815',
'123']),
dtypes = {'COUNTRY': 'string'}))
def test_shouldNotMergeCountryColumnOfSrcIntoDst_non_unique(self):
self._mergeCountryColumnOfSrcIntoDst(val_dst = 'United Kingdom', val_src = 'Germany')
def test_shouldNotMergeCountryColumnOfSrcIntoDst3(self):
self._mergeCountryColumnOfSrcIntoDst(val_dst = None, val_src = None)
def _mergeCountryColumnOfSrcIntoDst(self, val_dst, val_src):
# Given
dst = TestHelper.createDataFrame(
columns = ['COUNTRY'],
data = [ [val_dst]],
index = pd.Index(
name = 'VAERS_ID',
data = [
'4711']),
dtypes = {'COUNTRY': 'string'})
src = TestHelper.createDataFrame(
columns = ['COUNTRY'],
data = [ [val_src]],
index = pd.Index(
name = 'VAERS_ID',
data = [
'4711']),
dtypes = {'COUNTRY': 'string'})
# When && Then
with self.assertRaises(Exception):
CountryColumnsMerger.mergeCountryColumnOfSrcIntoDst(src = src, dst = dst)