From f44c0f8869259cbdc650fcde9736ddb7e18a6ac9 Mon Sep 17 00:00:00 2001 From: frankknoll Date: Tue, 28 Mar 2023 15:01:31 +0200 Subject: [PATCH] refactoring --- src/CountriesColumnAdder.py | 4 +- src/CountriesColumnAdderTest.py | 5 +- src/HowBadIsMyBatch.ipynb | 87 +++++++++++++++++++++++++++++++++ 3 files changed, 92 insertions(+), 4 deletions(-) diff --git a/src/CountriesColumnAdder.py b/src/CountriesColumnAdder.py index b89b64fce07..ecd71b2846c 100644 --- a/src/CountriesColumnAdder.py +++ b/src/CountriesColumnAdder.py @@ -1,7 +1,7 @@ class CountriesColumnAdder: - def addCountriesColumn(self, countriesByBatchcodeTable): - countriesByBatchcodeTable['Countries'] = countriesByBatchcodeTable.apply(self._getCountriesHavingEvents, axis='columns') + def addCountriesColumn(self, countriesByBatchcodeTable, columnName): + countriesByBatchcodeTable[columnName] = countriesByBatchcodeTable.apply(self._getCountriesHavingEvents, axis='columns') return countriesByBatchcodeTable def _getCountriesHavingEvents(self, eventCountByCountry): diff --git a/src/CountriesColumnAdderTest.py b/src/CountriesColumnAdderTest.py index ffefa98610e..21366783519 100644 --- a/src/CountriesColumnAdderTest.py +++ b/src/CountriesColumnAdderTest.py @@ -14,15 +14,16 @@ class CountriesColumnAdderTest(unittest.TestCase): index = pd.Index( name = 'Batchcode', data = ['FE6208'])) + columnName = 'Countries guessed' # When - countriesByBatchcodeTableWithCountriesColumn = CountriesColumnAdder().addCountriesColumn(countriesByBatchcodeTable) + countriesByBatchcodeTableWithCountriesColumn = CountriesColumnAdder().addCountriesColumn(countriesByBatchcodeTable, columnName) # Then assert_frame_equal( countriesByBatchcodeTableWithCountriesColumn, TestHelper.createDataFrame( - columns = ['United States', 'Germany', 'Italy', 'Countries'], + columns = ['United States', 'Germany', 'Italy', columnName], data = [ [20, 0, 3, {'United States', 'Italy'}]], index = pd.Index( name = 'Batchcode', diff --git a/src/HowBadIsMyBatch.ipynb b/src/HowBadIsMyBatch.ipynb index 530da640e0c..3b944a7c62c 100644 --- a/src/HowBadIsMyBatch.ipynb +++ b/src/HowBadIsMyBatch.ipynb @@ -210,6 +210,93 @@ "country_By_Batchcode_Search_Term" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "5e3f6e61", + "metadata": {}, + "outputs": [], + "source": [ + "from CountriesColumnAdder import CountriesColumnAdder\n", + "\n", + "country_By_Batchcode_Search_Term = CountriesColumnAdder().addCountriesColumn(\n", + " country_By_Batchcode_Search_Term,\n", + " columnName = 'Countries guessed')\n", + "country_By_Batchcode_Search_Term" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5c1780b8", + "metadata": {}, + "outputs": [], + "source": [ + "country_By_Batchcode_Search_Term['Countries guessed']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2ec46898", + "metadata": {}, + "outputs": [], + "source": [ + "# mi.droplevel('z')\n", + "country_By_Batchcode_Search_Term.index.droplevel('Batchcode Search Term')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0087d657", + "metadata": {}, + "outputs": [], + "source": [ + "country_By_Clicked_Batchcode = CountriesColumnAdder().addCountriesColumn(\n", + " country_By_Clicked_Batchcode,\n", + " columnName = 'Countries guessed')\n", + "country_By_Clicked_Batchcode" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "03193075", + "metadata": {}, + "outputs": [], + "source": [ + "country_By_Clicked_Batchcode2 = country_By_Clicked_Batchcode[['Countries guessed']]\n", + "country_By_Clicked_Batchcode2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d6dd7355", + "metadata": {}, + "outputs": [], + "source": [ + "res = pd.merge(\n", + " batchCodeTable,\n", + " country_By_Clicked_Batchcode2,\n", + " how = 'left',\n", + " left_index = True,\n", + " right_index = True,\n", + " validate = 'one_to_one')\n", + "res" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b36ea0d4", + "metadata": {}, + "outputs": [], + "source": [ + "res[~res['Countries guessed'].isna()].to_excel('tmp/tableWithCountriesGuessed.xlsx')" + ] + }, { "cell_type": "code", "execution_count": null,