refining CountryColumnsMergerTest
This commit is contained in:
@@ -5,12 +5,6 @@ class CountryColumnsMerger:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def mergeCountryColumnOfSrcIntoDst(src: pd.DataFrame, dst: pd.DataFrame):
|
def mergeCountryColumnOfSrcIntoDst(src: pd.DataFrame, dst: pd.DataFrame):
|
||||||
def merge(series):
|
|
||||||
if pd.isnull(series['COUNTRY_dst']):
|
|
||||||
return series['COUNTRY_src']
|
|
||||||
else:
|
|
||||||
return series['COUNTRY_dst']
|
|
||||||
|
|
||||||
merged = pd.merge(
|
merged = pd.merge(
|
||||||
dst,
|
dst,
|
||||||
src,
|
src,
|
||||||
@@ -18,6 +12,23 @@ class CountryColumnsMerger:
|
|||||||
left_index = True,
|
left_index = True,
|
||||||
right_index = True,
|
right_index = True,
|
||||||
suffixes=('_dst', '_src'))
|
suffixes=('_dst', '_src'))
|
||||||
merged['COUNTRY'] = merged.apply(merge, axis = 'columns').astype('string')
|
merged['COUNTRY'] = (merged
|
||||||
|
.apply(
|
||||||
|
lambda series: CountryColumnsMerger._merge(
|
||||||
|
series['COUNTRY_src'],
|
||||||
|
series['COUNTRY_dst']),
|
||||||
|
axis = 'columns')
|
||||||
|
.astype('string'))
|
||||||
return merged.drop(columns = ['COUNTRY_dst', 'COUNTRY_src'])
|
return merged.drop(columns = ['COUNTRY_dst', 'COUNTRY_src'])
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _merge(src, dst):
|
||||||
|
if (CountryColumnsMerger._isNonUnique(src, dst)) or (pd.isnull(src) and pd.isnull(dst)):
|
||||||
|
raise Exception()
|
||||||
|
|
||||||
|
return src if not pd.isnull(src) and pd.isnull(dst) else dst
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _isNonUnique(src, dst):
|
||||||
|
return not pd.isnull(src) and not pd.isnull(dst) and src != dst
|
||||||
|
|
||||||
@@ -7,13 +7,15 @@ from CountryColumnsMerger import CountryColumnsMerger
|
|||||||
|
|
||||||
class CountryColumnsMergerTest(unittest.TestCase):
|
class CountryColumnsMergerTest(unittest.TestCase):
|
||||||
|
|
||||||
def test_mergeCountryColumns(self):
|
def test_mergeCountryColumnOfSrcIntoDst(self):
|
||||||
# Given
|
# Given
|
||||||
unknown = TestHelper.createDataFrame(
|
src_val = 'Germany'
|
||||||
|
dst_val = None
|
||||||
|
dst = TestHelper.createDataFrame(
|
||||||
columns = ['COUNTRY'],
|
columns = ['COUNTRY'],
|
||||||
data = [ ['United Kingdom'],
|
data = [ ['United Kingdom'],
|
||||||
['France'],
|
['France'],
|
||||||
[None]],
|
[dst_val]],
|
||||||
index = pd.Index(
|
index = pd.Index(
|
||||||
name = 'VAERS_ID',
|
name = 'VAERS_ID',
|
||||||
data = [
|
data = [
|
||||||
@@ -22,10 +24,10 @@ class CountryColumnsMergerTest(unittest.TestCase):
|
|||||||
'123']),
|
'123']),
|
||||||
dtypes = {'COUNTRY': 'string'})
|
dtypes = {'COUNTRY': 'string'})
|
||||||
|
|
||||||
known = TestHelper.createDataFrame(
|
src = TestHelper.createDataFrame(
|
||||||
columns = ['COUNTRY'],
|
columns = ['COUNTRY'],
|
||||||
data = [ ['United Kingdom'],
|
data = [ ['United Kingdom'],
|
||||||
['Germany']],
|
[src_val]],
|
||||||
index = pd.Index(
|
index = pd.Index(
|
||||||
name = 'VAERS_ID',
|
name = 'VAERS_ID',
|
||||||
data = [
|
data = [
|
||||||
@@ -34,7 +36,7 @@ class CountryColumnsMergerTest(unittest.TestCase):
|
|||||||
dtypes = {'COUNTRY': 'string'})
|
dtypes = {'COUNTRY': 'string'})
|
||||||
|
|
||||||
# When
|
# When
|
||||||
merged = CountryColumnsMerger.mergeCountryColumnOfSrcIntoDst(src = known, dst = unknown)
|
merged = CountryColumnsMerger.mergeCountryColumnOfSrcIntoDst(src = src, dst = dst)
|
||||||
|
|
||||||
# Then
|
# Then
|
||||||
assert_frame_equal(
|
assert_frame_equal(
|
||||||
@@ -43,7 +45,7 @@ class CountryColumnsMergerTest(unittest.TestCase):
|
|||||||
columns = ['COUNTRY'],
|
columns = ['COUNTRY'],
|
||||||
data = [ ['United Kingdom'],
|
data = [ ['United Kingdom'],
|
||||||
['France'],
|
['France'],
|
||||||
['Germany']],
|
[src_val]],
|
||||||
index = pd.Index(
|
index = pd.Index(
|
||||||
name = 'VAERS_ID',
|
name = 'VAERS_ID',
|
||||||
data = [
|
data = [
|
||||||
@@ -51,3 +53,33 @@ class CountryColumnsMergerTest(unittest.TestCase):
|
|||||||
'0815',
|
'0815',
|
||||||
'123']),
|
'123']),
|
||||||
dtypes = {'COUNTRY': 'string'}))
|
dtypes = {'COUNTRY': 'string'}))
|
||||||
|
|
||||||
|
def test_shouldNotMergeCountryColumnOfSrcIntoDst_non_unique(self):
|
||||||
|
self._mergeCountryColumnOfSrcIntoDst(val_dst = 'United Kingdom', val_src = 'Germany')
|
||||||
|
|
||||||
|
def test_shouldNotMergeCountryColumnOfSrcIntoDst3(self):
|
||||||
|
self._mergeCountryColumnOfSrcIntoDst(val_dst = None, val_src = None)
|
||||||
|
|
||||||
|
def _mergeCountryColumnOfSrcIntoDst(self, val_dst, val_src):
|
||||||
|
# Given
|
||||||
|
dst = TestHelper.createDataFrame(
|
||||||
|
columns = ['COUNTRY'],
|
||||||
|
data = [ [val_dst]],
|
||||||
|
index = pd.Index(
|
||||||
|
name = 'VAERS_ID',
|
||||||
|
data = [
|
||||||
|
'4711']),
|
||||||
|
dtypes = {'COUNTRY': 'string'})
|
||||||
|
|
||||||
|
src = TestHelper.createDataFrame(
|
||||||
|
columns = ['COUNTRY'],
|
||||||
|
data = [ [val_src]],
|
||||||
|
index = pd.Index(
|
||||||
|
name = 'VAERS_ID',
|
||||||
|
data = [
|
||||||
|
'4711']),
|
||||||
|
dtypes = {'COUNTRY': 'string'})
|
||||||
|
|
||||||
|
# When && Then
|
||||||
|
with self.assertRaises(Exception):
|
||||||
|
CountryColumnsMerger.mergeCountryColumnOfSrcIntoDst(src = src, dst = dst)
|
||||||
|
|||||||
Reference in New Issue
Block a user