From d66ce311a3b68c9bb6087e520ce71e773e1c115f Mon Sep 17 00:00:00 2001 From: frankknoll Date: Wed, 29 Mar 2023 16:06:11 +0200 Subject: [PATCH] adding CountriesByBatchcodeProvider --- src/CountriesByBatchcodeProvider.py | 37 ++++++++ src/HowBadIsMyBatch.ipynb | 135 ++++------------------------ 2 files changed, 53 insertions(+), 119 deletions(-) create mode 100644 src/CountriesByBatchcodeProvider.py diff --git a/src/CountriesByBatchcodeProvider.py b/src/CountriesByBatchcodeProvider.py new file mode 100644 index 00000000000..3df90d4108d --- /dev/null +++ b/src/CountriesByBatchcodeProvider.py @@ -0,0 +1,37 @@ +import pandas as pd +from CompletedBatchcodeColumnAdder import CompletedBatchcodeColumnAdder +from BatchcodeCompletion import BatchcodeCompletion +from CountriesColumnAdder import CountriesColumnAdder +from BatchCodeTableFactory import BatchCodeTableFactory + +def getCountriesByCompletedBatchcode(internationalVaersCovid19): + batchCodeTable = BatchCodeTableFactory(internationalVaersCovid19).createGlobalBatchCodeTable() + country_By_Batchcode_Search_Term = _readExploration('data/Country By Batchcode Search Term.csv', indexName = 'Batchcode Search Term') + completedBatchcodeColumnAdder = CompletedBatchcodeColumnAdder(BatchcodeCompletion(ADR_by_Batchcode = batchCodeTable).completeBatchcode) + country_By_Batchcode_Search_Term = completedBatchcodeColumnAdder.addCompletedBatchcodeColumn(country_By_Batchcode_Search_Term) + columnName = 'Countries' + country_By_Batchcode_Search_Term = CountriesColumnAdder().addCountriesColumn( + country_By_Batchcode_Search_Term, + columnName = columnName) + country_By_Batchcode_Search_Term = country_By_Batchcode_Search_Term[[columnName]].droplevel('Batchcode Search Term') + return country_By_Batchcode_Search_Term + +def getCountriesByClickedBatchcode(): + country_By_Clicked_Batchcode = _readExploration( + 'data/Country By Clicked Batchcode.csv', + indexName = 'Clicked Batchcode') + columnName = 'Countries' + country_By_Clicked_Batchcode = CountriesColumnAdder().addCountriesColumn( + country_By_Clicked_Batchcode, + columnName = columnName) + country_By_Clicked_Batchcode = country_By_Clicked_Batchcode[[columnName]] + return country_By_Clicked_Batchcode + +def _readExploration(csvFile, indexName): + exploration = pd.read_csv(csvFile, header=[0], index_col=0, skiprows=6, on_bad_lines='warn') + exploration.drop(index=indexName, inplace=True) + exploration.index.rename(indexName, inplace=True) + exploration.drop(columns='Totals', inplace=True) + for column in exploration.columns: + exploration[column] = exploration[column].astype('int64') + return exploration \ No newline at end of file diff --git a/src/HowBadIsMyBatch.ipynb b/src/HowBadIsMyBatch.ipynb index 3b944a7c62c..a97676eac3d 100644 --- a/src/HowBadIsMyBatch.ipynb +++ b/src/HowBadIsMyBatch.ipynb @@ -23,45 +23,6 @@ "from HistogramFactoryAndPersister import createAndSaveGlobalHistograms, createAndSaveHistogramsForCountries" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "0474f6d7", - "metadata": {}, - "outputs": [], - "source": [ - "def readExploration(csvFile, indexName):\n", - " exploration = pd.read_csv(csvFile, header=[0], index_col=0, skiprows=6, on_bad_lines='warn')\n", - " exploration.drop(index=indexName, inplace=True)\n", - " exploration.index.rename(indexName, inplace=True)\n", - " exploration.drop(columns='Totals', inplace=True)\n", - " for column in exploration.columns:\n", - " exploration[column] = exploration[column].astype('int64')\n", - " return exploration" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0cb5849c", - "metadata": {}, - "outputs": [], - "source": [ - "country_By_Clicked_Batchcode = readExploration('data/Country By Clicked Batchcode.csv', indexName = 'Clicked Batchcode')\n", - "country_By_Clicked_Batchcode" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b5066a92", - "metadata": {}, - "outputs": [], - "source": [ - "country_By_Batchcode_Search_Term = readExploration('data/Country By Batchcode Search Term.csv', indexName = 'Batchcode Search Term')\n", - "country_By_Batchcode_Search_Term" - ] - }, { "cell_type": "code", "execution_count": null, @@ -180,20 +141,6 @@ "internationalVaersCovid19" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "7a1023e8", - "metadata": {}, - "outputs": [], - "source": [ - "from BatchCodeTableFactory import BatchCodeTableFactory\n", - "\n", - "batchCodeTableFactory = BatchCodeTableFactory(internationalVaersCovid19)\n", - "batchCodeTable = batchCodeTableFactory.createGlobalBatchCodeTable()\n", - "batchCodeTable" - ] - }, { "cell_type": "code", "execution_count": null, @@ -201,49 +148,10 @@ "metadata": {}, "outputs": [], "source": [ - "from CompletedBatchcodeColumnAdder import CompletedBatchcodeColumnAdder\n", - "from BatchcodeCompletion import BatchcodeCompletion\n", + "from CountriesByBatchcodeProvider import getCountriesByCompletedBatchcode\n", "\n", - "batchcodeCompletion = BatchcodeCompletion(ADR_by_Batchcode = batchCodeTable)\n", - "completedBatchcodeColumnAdder = CompletedBatchcodeColumnAdder(batchcodeCompletion.completeBatchcode)\n", - "country_By_Batchcode_Search_Term = completedBatchcodeColumnAdder.addCompletedBatchcodeColumn(country_By_Batchcode_Search_Term)\n", - "country_By_Batchcode_Search_Term" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5e3f6e61", - "metadata": {}, - "outputs": [], - "source": [ - "from CountriesColumnAdder import CountriesColumnAdder\n", - "\n", - "country_By_Batchcode_Search_Term = CountriesColumnAdder().addCountriesColumn(\n", - " country_By_Batchcode_Search_Term,\n", - " columnName = 'Countries guessed')\n", - "country_By_Batchcode_Search_Term" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5c1780b8", - "metadata": {}, - "outputs": [], - "source": [ - "country_By_Batchcode_Search_Term['Countries guessed']" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2ec46898", - "metadata": {}, - "outputs": [], - "source": [ - "# mi.droplevel('z')\n", - "country_By_Batchcode_Search_Term.index.droplevel('Batchcode Search Term')" + "countriesByCompletedBatchcode = getCountriesByCompletedBatchcode(internationalVaersCovid19)\n", + "countriesByCompletedBatchcode" ] }, { @@ -253,21 +161,10 @@ "metadata": {}, "outputs": [], "source": [ - "country_By_Clicked_Batchcode = CountriesColumnAdder().addCountriesColumn(\n", - " country_By_Clicked_Batchcode,\n", - " columnName = 'Countries guessed')\n", - "country_By_Clicked_Batchcode" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "03193075", - "metadata": {}, - "outputs": [], - "source": [ - "country_By_Clicked_Batchcode2 = country_By_Clicked_Batchcode[['Countries guessed']]\n", - "country_By_Clicked_Batchcode2" + "from CountriesByBatchcodeProvider import getCountriesByClickedBatchcode\n", + "\n", + "countriesByClickedBatchcode = getCountriesByClickedBatchcode()\n", + "countriesByClickedBatchcode" ] }, { @@ -277,14 +174,14 @@ "metadata": {}, "outputs": [], "source": [ - "res = pd.merge(\n", - " batchCodeTable,\n", - " country_By_Clicked_Batchcode2,\n", - " how = 'left',\n", - " left_index = True,\n", - " right_index = True,\n", - " validate = 'one_to_one')\n", - "res" + "#res = pd.merge(\n", + "# batchCodeTable,\n", + "# countriesByClickedBatchcode,\n", + "# how = 'left',\n", + "# left_index = True,\n", + "# right_index = True,\n", + "# validate = 'one_to_one')\n", + "#res" ] }, { @@ -294,7 +191,7 @@ "metadata": {}, "outputs": [], "source": [ - "res[~res['Countries guessed'].isna()].to_excel('tmp/tableWithCountriesGuessed.xlsx')" + "# res[~res['batchCodeTableCountries guessed'].isna()].to_excel('tmp/tableWithCountriesGuessed.xlsx')" ] }, {