From a0256f24532f2ab7bc5a6391754240fc54124a3d Mon Sep 17 00:00:00 2001 From: frankknoll Date: Sun, 26 Mar 2023 20:41:06 +0200 Subject: [PATCH] adding CountriesColumnAdder --- src/CountriesColumnAdder.py | 8 ++++++++ src/CountriesColumnAdderTest.py | 30 ++++++++++++++++++++++++++++++ src/HowBadIsMyBatch.ipynb | 2 ++ 3 files changed, 40 insertions(+) create mode 100644 src/CountriesColumnAdder.py create mode 100644 src/CountriesColumnAdderTest.py diff --git a/src/CountriesColumnAdder.py b/src/CountriesColumnAdder.py new file mode 100644 index 00000000000..b89b64fce07 --- /dev/null +++ b/src/CountriesColumnAdder.py @@ -0,0 +1,8 @@ +class CountriesColumnAdder: + + def addCountriesColumn(self, countriesByBatchcodeTable): + countriesByBatchcodeTable['Countries'] = countriesByBatchcodeTable.apply(self._getCountriesHavingEvents, axis='columns') + return countriesByBatchcodeTable + + def _getCountriesHavingEvents(self, eventCountByCountry): + return set(eventCountByCountry[eventCountByCountry > 0].index) diff --git a/src/CountriesColumnAdderTest.py b/src/CountriesColumnAdderTest.py new file mode 100644 index 00000000000..b8fabd37014 --- /dev/null +++ b/src/CountriesColumnAdderTest.py @@ -0,0 +1,30 @@ +import unittest +from TestHelper import TestHelper +from pandas.testing import assert_frame_equal +import pandas as pd +from src.CountriesColumnAdder import CountriesColumnAdder + +class CountriesColumnAdderTest(unittest.TestCase): + + def test_addCountriesColumn(self): + # Given + countriesByBatchcodeTable = TestHelper.createDataFrame( + columns = ['United States', 'Germany', 'Italy'], + data = [ [20, 0, 3]], + index = pd.Index( + name = 'Batchcode', + data = ['FE6208'])) + + # When + countriesByBatchcodeTableWithCountriesColumn = CountriesColumnAdder().addCountriesColumn(countriesByBatchcodeTable) + + # Then + assert_frame_equal( + countriesByBatchcodeTableWithCountriesColumn, + TestHelper.createDataFrame( + columns = ['United States', 'Germany', 'Italy', 'Countries'], + data = [ [20, 0, 3, {'United States', 'Italy'}]], + index = pd.Index( + name = 'Batchcode', + data = ['FE6208']))) + \ No newline at end of file diff --git a/src/HowBadIsMyBatch.ipynb b/src/HowBadIsMyBatch.ipynb index ef1cb72b35c..530da640e0c 100644 --- a/src/HowBadIsMyBatch.ipynb +++ b/src/HowBadIsMyBatch.ipynb @@ -35,6 +35,8 @@ " exploration.drop(index=indexName, inplace=True)\n", " exploration.index.rename(indexName, inplace=True)\n", " exploration.drop(columns='Totals', inplace=True)\n", + " for column in exploration.columns:\n", + " exploration[column] = exploration[column].astype('int64')\n", " return exploration" ] },