From 3f0b650519333c3f8f016b004b59cc37243b6603 Mon Sep 17 00:00:00 2001 From: frankknoll Date: Tue, 7 Feb 2023 11:37:43 +0100 Subject: [PATCH] refactoring --- src/CountryColumnAdder.py | 27 +++------------------------ src/Splttype2CountryConverter.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 24 deletions(-) create mode 100644 src/Splttype2CountryConverter.py diff --git a/src/CountryColumnAdder.py b/src/CountryColumnAdder.py index d7e8f76dcfb..592dcb42ec2 100644 --- a/src/CountryColumnAdder.py +++ b/src/CountryColumnAdder.py @@ -1,10 +1,10 @@ -import pycountry import pandas as pd +from Splttype2CountryConverter import Splttype2CountryConverter class CountryColumnAdder: def __init__(self, dataFrame_SPLTTYPE_By_VAERS_ID): - self.dataFrame_COUNTRY_By_VAERS_ID = self._create_dataFrame_COUNTRY_By_VAERS_ID(dataFrame_SPLTTYPE_By_VAERS_ID) + self.dataFrame_COUNTRY_By_VAERS_ID = Splttype2CountryConverter.convertSplttype2Country(dataFrame_SPLTTYPE_By_VAERS_ID) def addCountryColumn(self, dataFrame): return pd.merge( @@ -13,25 +13,4 @@ class CountryColumnAdder: how = 'left', left_index = True, right_index = True) - - def _create_dataFrame_COUNTRY_By_VAERS_ID(self, dataFrame_SPLTTYPE_By_VAERS_ID): - dataFrame_COUNTRY_By_VAERS_ID = dataFrame_SPLTTYPE_By_VAERS_ID[['SPLTTYPE']].copy() - dataFrame_COUNTRY_By_VAERS_ID['COUNTRY'] = self._splttype2Country(dataFrame_COUNTRY_By_VAERS_ID['SPLTTYPE']) - dataFrame_COUNTRY_By_VAERS_ID = dataFrame_COUNTRY_By_VAERS_ID.drop(columns = ['SPLTTYPE']) - return dataFrame_COUNTRY_By_VAERS_ID - - def _splttype2Country(self, splttypeSeries): - return (splttypeSeries - .apply( - lambda splttype: - self._getCountryNameOfSplttypeOrDefault( - splttype = splttype, - default = 'Unknown Country')) - .astype("string")) - - def _getCountryNameOfSplttypeOrDefault(self, splttype, default): - if not isinstance(splttype, str): - return default - - country = pycountry.countries.get(alpha_2 = splttype[:2]) - return country.name if country is not None else default \ No newline at end of file + \ No newline at end of file diff --git a/src/Splttype2CountryConverter.py b/src/Splttype2CountryConverter.py new file mode 100644 index 00000000000..33bc336fb64 --- /dev/null +++ b/src/Splttype2CountryConverter.py @@ -0,0 +1,28 @@ +import pycountry + +class Splttype2CountryConverter: + + @staticmethod + def convertSplttype2Country(dataFrame_SPLTTYPE_By_VAERS_ID): + dataFrame_COUNTRY_By_VAERS_ID = dataFrame_SPLTTYPE_By_VAERS_ID[['SPLTTYPE']].copy() + dataFrame_COUNTRY_By_VAERS_ID['COUNTRY'] = Splttype2CountryConverter._splttype2Country(dataFrame_COUNTRY_By_VAERS_ID['SPLTTYPE']) + dataFrame_COUNTRY_By_VAERS_ID = dataFrame_COUNTRY_By_VAERS_ID.drop(columns = ['SPLTTYPE']) + return dataFrame_COUNTRY_By_VAERS_ID + + @staticmethod + def _splttype2Country(splttypeSeries): + return (splttypeSeries + .apply( + lambda splttype: + Splttype2CountryConverter._getCountryNameOfSplttypeOrDefault( + splttype = splttype, + default = 'Unknown Country')) + .astype("string")) + + @staticmethod + def _getCountryNameOfSplttypeOrDefault(splttype, default): + if not isinstance(splttype, str): + return default + + country = pycountry.countries.get(alpha_2 = splttype[:2]) + return country.name if country is not None else default \ No newline at end of file