replacing 'Unknown Country' by None

This commit is contained in:
frankknoll
2023-04-01 11:39:36 +02:00
parent 74ebd55a47
commit e7256697c3
5 changed files with 27 additions and 27 deletions

View File

@@ -86,7 +86,7 @@ class BatchCodeTableFactoryTest(unittest.TestCase):
# Given # Given
dataFrame = TestHelper.createDataFrame( dataFrame = TestHelper.createDataFrame(
columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES', 'SPLTTYPE', 'HOSPITAL', 'ER_VISIT', 'COUNTRY'], columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES', 'SPLTTYPE', 'HOSPITAL', 'ER_VISIT', 'COUNTRY'],
data = [ [1, 0, 0, 'COVID19', 'PFIZER\BIONTECH', '016M20A', '2', 'GBPFIZER INC2020486806', 0, 0, 'United Kingdom'], data = [ [1, 0, 0, 'COVID19', 'PFIZER\BIONTECH', '016M20A', '2', 'dummy', 0, 0, None],
[0, 0, 0, 'COVID19', 'MODERNA', '030L20A', '1', 'FRMODERNATX, INC.MOD20224', 0, 0, 'France'], [0, 0, 0, 'COVID19', 'MODERNA', '030L20A', '1', 'FRMODERNATX, INC.MOD20224', 0, 0, 'France'],
[1, 1, 1, 'COVID19', 'MODERNA', '030L20B', '1', 'FRMODERNATX, INC.MOD20224', 0, 0, 'France'], [1, 1, 1, 'COVID19', 'MODERNA', '030L20B', '1', 'FRMODERNATX, INC.MOD20224', 0, 0, 'France'],
[0, 1, 1, 'COVID19', 'MODERNA', '030L20B', '1', 'FRMODERNATX, INC.MOD20224', 0, 0, 'United Kingdom']], [0, 1, 1, 'COVID19', 'MODERNA', '030L20B', '1', 'FRMODERNATX, INC.MOD20224', 0, 0, 'United Kingdom']],
@@ -106,7 +106,7 @@ class BatchCodeTableFactoryTest(unittest.TestCase):
batchCodeTable[['Adverse Reaction Reports', 'Deaths', 'Disabilities', 'Life Threatening Illnesses', 'Company', 'Countries', 'Severe reports', 'Lethality']], batchCodeTable[['Adverse Reaction Reports', 'Deaths', 'Disabilities', 'Life Threatening Illnesses', 'Company', 'Countries', 'Severe reports', 'Lethality']],
TestHelper.createDataFrame( TestHelper.createDataFrame(
columns = ['Adverse Reaction Reports', 'Deaths', 'Disabilities', 'Life Threatening Illnesses', 'Company', 'Countries', 'Severe reports', 'Lethality'], columns = ['Adverse Reaction Reports', 'Deaths', 'Disabilities', 'Life Threatening Illnesses', 'Company', 'Countries', 'Severe reports', 'Lethality'],
data = [ [1, 1, 0, 0, 'PFIZER\BIONTECH', self._convertCountries(['United Kingdom'], countriesAsList), 1/1 * 100, 1/1 * 100], data = [ [1, 1, 0, 0, 'PFIZER\BIONTECH', self._convertCountries([], countriesAsList), 1/1 * 100, 1/1 * 100],
[2, 1, 2, 2, 'MODERNA', self._convertCountries(['France', 'United Kingdom'], countriesAsList), 2/2 * 100, 1/2 * 100], [2, 1, 2, 2, 'MODERNA', self._convertCountries(['France', 'United Kingdom'], countriesAsList), 2/2 * 100, 1/2 * 100],
[1, 0, 0, 0, 'MODERNA', self._convertCountries(['France'], countriesAsList), 0/1 * 100, 0/1 * 100]], [1, 0, 0, 0, 'MODERNA', self._convertCountries(['France'], countriesAsList), 0/1 * 100, 0/1 * 100]],
index = pd.Index( index = pd.Index(

View File

@@ -29,17 +29,17 @@ class CountryColumnAdderTest(unittest.TestCase):
assert_frame_equal( assert_frame_equal(
dataFrameWithCountryColumn, dataFrameWithCountryColumn,
TestHelper.createDataFrame( TestHelper.createDataFrame(
columns = ['SPLTTYPE', 'COUNTRY'], columns = ['SPLTTYPE', 'COUNTRY'],
data = [ ['GBPFIZER INC2020486806', 'United Kingdom'], data = [ ['GBPFIZER INC2020486806', 'United Kingdom'],
['FRMODERNATX, INC.MOD20224', 'France'], ['FRMODERNATX, INC.MOD20224', 'France'],
['dummy', 'Unknown Country']], ['dummy', None]],
index = pd.Index( index = pd.Index(
name = 'VAERS_ID', name = 'VAERS_ID',
data = [ data = [
"4711", "4711",
"0815", "0815",
"123"]), "123"]),
dtypes = {'COUNTRY': 'string'})) dtypes = {'COUNTRY': 'string'}))
def test_addCountryColumn2(self): def test_addCountryColumn2(self):
@@ -75,14 +75,14 @@ class CountryColumnAdderTest(unittest.TestCase):
assert_frame_equal( assert_frame_equal(
dataFrameWithCountryColumn, dataFrameWithCountryColumn,
TestHelper.createDataFrame( TestHelper.createDataFrame(
columns = ['VAX_LOT', 'COUNTRY'], columns = ['VAX_LOT', 'COUNTRY'],
data = [ ['1808982', 'France'], data = [ ['1808982', 'France'],
['EW0175', 'France'], ['EW0175', 'France'],
['EW0176', 'United Kingdom']], ['EW0176', 'United Kingdom']],
index = pd.Index( index = pd.Index(
name = 'VAERS_ID', name = 'VAERS_ID',
data = [ data = [
2547730, 2547730,
2547730, 2547730,
2547744]), 2547744]),
dtypes = {'COUNTRY': 'string'})) dtypes = {'COUNTRY': 'string'}))

View File

@@ -1,5 +1,5 @@
def getCountries(internationalVaersCovid19): def getCountries(internationalVaersCovid19):
return sorted(internationalVaersCovid19['COUNTRY'].unique()) return sorted(internationalVaersCovid19['COUNTRY'].dropna().unique())
def getCountryOptions(countries): def getCountryOptions(countries):

View File

@@ -16,7 +16,7 @@ class Splttype2CountryConverter:
lambda splttype: lambda splttype:
Splttype2CountryConverter._getCountryNameOfSplttypeOrDefault( Splttype2CountryConverter._getCountryNameOfSplttypeOrDefault(
splttype = splttype, splttype = splttype,
default = 'Unknown Country')) default = None))
.astype("string")) .astype("string"))
@staticmethod @staticmethod

View File

@@ -28,4 +28,4 @@ class SummationTableFactory:
@staticmethod @staticmethod
def sortCountries(countries): def sortCountries(countries):
return sorted(set(countries)) return sorted(set(countries.dropna()))