refining CountryColumnAdderTest
This commit is contained in:
@@ -1,24 +1,35 @@
|
|||||||
import pycountry
|
import pycountry
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
class CountryColumnAdder:
|
class CountryColumnAdder:
|
||||||
|
|
||||||
@staticmethod
|
def __init__(self, dataFrame_SPLTTYPE_By_VAERS_ID):
|
||||||
def addCountryColumn(dataFrame):
|
self.dataFrame_COUNTRY_By_VAERS_ID = self._create_dataFrame_COUNTRY_By_VAERS_ID(dataFrame_SPLTTYPE_By_VAERS_ID)
|
||||||
dataFrame['COUNTRY'] = CountryColumnAdder._splttype2Country(dataFrame['SPLTTYPE'])
|
|
||||||
return dataFrame
|
|
||||||
|
|
||||||
@staticmethod
|
def addCountryColumn(self, dataFrame):
|
||||||
def _splttype2Country(splttypeSeries):
|
return pd.merge(
|
||||||
|
dataFrame,
|
||||||
|
self.dataFrame_COUNTRY_By_VAERS_ID,
|
||||||
|
how = 'left',
|
||||||
|
left_index = True,
|
||||||
|
right_index = True)
|
||||||
|
|
||||||
|
def _create_dataFrame_COUNTRY_By_VAERS_ID(self, dataFrame_SPLTTYPE_By_VAERS_ID):
|
||||||
|
dataFrame_COUNTRY_By_VAERS_ID = dataFrame_SPLTTYPE_By_VAERS_ID[['SPLTTYPE']].copy()
|
||||||
|
dataFrame_COUNTRY_By_VAERS_ID['COUNTRY'] = self._splttype2Country(dataFrame_COUNTRY_By_VAERS_ID['SPLTTYPE'])
|
||||||
|
dataFrame_COUNTRY_By_VAERS_ID = dataFrame_COUNTRY_By_VAERS_ID.drop(columns = ['SPLTTYPE'])
|
||||||
|
return dataFrame_COUNTRY_By_VAERS_ID
|
||||||
|
|
||||||
|
def _splttype2Country(self, splttypeSeries):
|
||||||
return (splttypeSeries
|
return (splttypeSeries
|
||||||
.apply(
|
.apply(
|
||||||
lambda splttype:
|
lambda splttype:
|
||||||
CountryColumnAdder._getCountryNameOfSplttypeOrDefault(
|
self._getCountryNameOfSplttypeOrDefault(
|
||||||
splttype = splttype,
|
splttype = splttype,
|
||||||
default = 'Unknown Country'))
|
default = 'Unknown Country'))
|
||||||
.astype("string"))
|
.astype("string"))
|
||||||
|
|
||||||
@staticmethod
|
def _getCountryNameOfSplttypeOrDefault(self, splttype, default):
|
||||||
def _getCountryNameOfSplttypeOrDefault(splttype, default):
|
|
||||||
if not isinstance(splttype, str):
|
if not isinstance(splttype, str):
|
||||||
return default
|
return default
|
||||||
|
|
||||||
|
|||||||
@@ -14,13 +14,16 @@ class CountryColumnAdderTest(unittest.TestCase):
|
|||||||
data = [ ['GBPFIZER INC2020486806'],
|
data = [ ['GBPFIZER INC2020486806'],
|
||||||
['FRMODERNATX, INC.MOD20224'],
|
['FRMODERNATX, INC.MOD20224'],
|
||||||
['dummy']],
|
['dummy']],
|
||||||
index = [
|
index = pd.Index(
|
||||||
"4711",
|
name = 'VAERS_ID',
|
||||||
"0815",
|
data = [
|
||||||
"123"])
|
"4711",
|
||||||
|
"0815",
|
||||||
|
"123"]))
|
||||||
|
countryColumnAdder = CountryColumnAdder(dataFrame)
|
||||||
|
|
||||||
# When
|
# When
|
||||||
dataFrameWithCountryColumn = CountryColumnAdder.addCountryColumn(dataFrame)
|
dataFrameWithCountryColumn = countryColumnAdder.addCountryColumn(dataFrame)
|
||||||
|
|
||||||
# Then
|
# Then
|
||||||
assert_frame_equal(
|
assert_frame_equal(
|
||||||
@@ -30,8 +33,56 @@ class CountryColumnAdderTest(unittest.TestCase):
|
|||||||
data = [ ['GBPFIZER INC2020486806', 'United Kingdom'],
|
data = [ ['GBPFIZER INC2020486806', 'United Kingdom'],
|
||||||
['FRMODERNATX, INC.MOD20224', 'France'],
|
['FRMODERNATX, INC.MOD20224', 'France'],
|
||||||
['dummy', 'Unknown Country']],
|
['dummy', 'Unknown Country']],
|
||||||
index = [
|
index = pd.Index(
|
||||||
"4711",
|
name = 'VAERS_ID',
|
||||||
"0815",
|
data = [
|
||||||
"123"],
|
"4711",
|
||||||
|
"0815",
|
||||||
|
"123"]),
|
||||||
|
dtypes = {'COUNTRY': 'string'}))
|
||||||
|
|
||||||
|
|
||||||
|
def test_addCountryColumn2(self):
|
||||||
|
# Given
|
||||||
|
countryColumnAdder = CountryColumnAdder(
|
||||||
|
TestHelper.createDataFrame(
|
||||||
|
columns = ['SPLTTYPE'],
|
||||||
|
data = [ ['GBPFIZER INC2020486806'],
|
||||||
|
['FRMODERNATX, INC.MOD20224'],
|
||||||
|
['dummy']],
|
||||||
|
index = pd.Index(
|
||||||
|
name = 'VAERS_ID',
|
||||||
|
data = [
|
||||||
|
2547744,
|
||||||
|
2547730,
|
||||||
|
2540815])))
|
||||||
|
dataFrame = TestHelper.createDataFrame(
|
||||||
|
columns = ['VAX_LOT'],
|
||||||
|
data = [ ['1808982'],
|
||||||
|
['EW0175'],
|
||||||
|
['EW0176']],
|
||||||
|
index = pd.Index(
|
||||||
|
name = 'VAERS_ID',
|
||||||
|
data = [
|
||||||
|
2547730,
|
||||||
|
2547730,
|
||||||
|
2547744]))
|
||||||
|
|
||||||
|
# When
|
||||||
|
dataFrameWithCountryColumn = countryColumnAdder.addCountryColumn(dataFrame)
|
||||||
|
|
||||||
|
# Then
|
||||||
|
assert_frame_equal(
|
||||||
|
dataFrameWithCountryColumn,
|
||||||
|
TestHelper.createDataFrame(
|
||||||
|
columns = ['VAX_LOT', 'COUNTRY'],
|
||||||
|
data = [ ['1808982', 'France'],
|
||||||
|
['EW0175', 'France'],
|
||||||
|
['EW0176', 'United Kingdom']],
|
||||||
|
index = pd.Index(
|
||||||
|
name = 'VAERS_ID',
|
||||||
|
data = [
|
||||||
|
2547730,
|
||||||
|
2547730,
|
||||||
|
2547744]),
|
||||||
dtypes = {'COUNTRY': 'string'}))
|
dtypes = {'COUNTRY': 'string'}))
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ def getVaersForYears(years):
|
|||||||
def getNonDomesticVaers():
|
def getNonDomesticVaers():
|
||||||
return _getVaers(
|
return _getVaers(
|
||||||
[_getVaersDescrReader().readNonDomesticVaersDescr()],
|
[_getVaersDescrReader().readNonDomesticVaersDescr()],
|
||||||
CountryColumnAdder.addCountryColumn)
|
addCountryColumn = lambda dataFrame: CountryColumnAdder(dataFrame).addCountryColumn(dataFrame))
|
||||||
|
|
||||||
def _getVaersDescrReader():
|
def _getVaersDescrReader():
|
||||||
return VaersDescrReader(dataDir = "VAERS")
|
return VaersDescrReader(dataDir = "VAERS")
|
||||||
|
|||||||
Reference in New Issue
Block a user