adding CountriesByBatchcodeProvider

This commit is contained in:
frankknoll
2023-03-29 16:06:11 +02:00
parent c8b7985455
commit d66ce311a3
2 changed files with 53 additions and 119 deletions

View File

@@ -0,0 +1,37 @@
import pandas as pd
from CompletedBatchcodeColumnAdder import CompletedBatchcodeColumnAdder
from BatchcodeCompletion import BatchcodeCompletion
from CountriesColumnAdder import CountriesColumnAdder
from BatchCodeTableFactory import BatchCodeTableFactory
def getCountriesByCompletedBatchcode(internationalVaersCovid19):
batchCodeTable = BatchCodeTableFactory(internationalVaersCovid19).createGlobalBatchCodeTable()
country_By_Batchcode_Search_Term = _readExploration('data/Country By Batchcode Search Term.csv', indexName = 'Batchcode Search Term')
completedBatchcodeColumnAdder = CompletedBatchcodeColumnAdder(BatchcodeCompletion(ADR_by_Batchcode = batchCodeTable).completeBatchcode)
country_By_Batchcode_Search_Term = completedBatchcodeColumnAdder.addCompletedBatchcodeColumn(country_By_Batchcode_Search_Term)
columnName = 'Countries'
country_By_Batchcode_Search_Term = CountriesColumnAdder().addCountriesColumn(
country_By_Batchcode_Search_Term,
columnName = columnName)
country_By_Batchcode_Search_Term = country_By_Batchcode_Search_Term[[columnName]].droplevel('Batchcode Search Term')
return country_By_Batchcode_Search_Term
def getCountriesByClickedBatchcode():
country_By_Clicked_Batchcode = _readExploration(
'data/Country By Clicked Batchcode.csv',
indexName = 'Clicked Batchcode')
columnName = 'Countries'
country_By_Clicked_Batchcode = CountriesColumnAdder().addCountriesColumn(
country_By_Clicked_Batchcode,
columnName = columnName)
country_By_Clicked_Batchcode = country_By_Clicked_Batchcode[[columnName]]
return country_By_Clicked_Batchcode
def _readExploration(csvFile, indexName):
exploration = pd.read_csv(csvFile, header=[0], index_col=0, skiprows=6, on_bad_lines='warn')
exploration.drop(index=indexName, inplace=True)
exploration.index.rename(indexName, inplace=True)
exploration.drop(columns='Totals', inplace=True)
for column in exploration.columns:
exploration[column] = exploration[column].astype('int64')
return exploration

View File

@@ -23,45 +23,6 @@
"from HistogramFactoryAndPersister import createAndSaveGlobalHistograms, createAndSaveHistogramsForCountries" "from HistogramFactoryAndPersister import createAndSaveGlobalHistograms, createAndSaveHistogramsForCountries"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"id": "0474f6d7",
"metadata": {},
"outputs": [],
"source": [
"def readExploration(csvFile, indexName):\n",
" exploration = pd.read_csv(csvFile, header=[0], index_col=0, skiprows=6, on_bad_lines='warn')\n",
" exploration.drop(index=indexName, inplace=True)\n",
" exploration.index.rename(indexName, inplace=True)\n",
" exploration.drop(columns='Totals', inplace=True)\n",
" for column in exploration.columns:\n",
" exploration[column] = exploration[column].astype('int64')\n",
" return exploration"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0cb5849c",
"metadata": {},
"outputs": [],
"source": [
"country_By_Clicked_Batchcode = readExploration('data/Country By Clicked Batchcode.csv', indexName = 'Clicked Batchcode')\n",
"country_By_Clicked_Batchcode"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b5066a92",
"metadata": {},
"outputs": [],
"source": [
"country_By_Batchcode_Search_Term = readExploration('data/Country By Batchcode Search Term.csv', indexName = 'Batchcode Search Term')\n",
"country_By_Batchcode_Search_Term"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
@@ -180,20 +141,6 @@
"internationalVaersCovid19" "internationalVaersCovid19"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"id": "7a1023e8",
"metadata": {},
"outputs": [],
"source": [
"from BatchCodeTableFactory import BatchCodeTableFactory\n",
"\n",
"batchCodeTableFactory = BatchCodeTableFactory(internationalVaersCovid19)\n",
"batchCodeTable = batchCodeTableFactory.createGlobalBatchCodeTable()\n",
"batchCodeTable"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
@@ -201,49 +148,10 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from CompletedBatchcodeColumnAdder import CompletedBatchcodeColumnAdder\n", "from CountriesByBatchcodeProvider import getCountriesByCompletedBatchcode\n",
"from BatchcodeCompletion import BatchcodeCompletion\n",
"\n", "\n",
"batchcodeCompletion = BatchcodeCompletion(ADR_by_Batchcode = batchCodeTable)\n", "countriesByCompletedBatchcode = getCountriesByCompletedBatchcode(internationalVaersCovid19)\n",
"completedBatchcodeColumnAdder = CompletedBatchcodeColumnAdder(batchcodeCompletion.completeBatchcode)\n", "countriesByCompletedBatchcode"
"country_By_Batchcode_Search_Term = completedBatchcodeColumnAdder.addCompletedBatchcodeColumn(country_By_Batchcode_Search_Term)\n",
"country_By_Batchcode_Search_Term"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5e3f6e61",
"metadata": {},
"outputs": [],
"source": [
"from CountriesColumnAdder import CountriesColumnAdder\n",
"\n",
"country_By_Batchcode_Search_Term = CountriesColumnAdder().addCountriesColumn(\n",
" country_By_Batchcode_Search_Term,\n",
" columnName = 'Countries guessed')\n",
"country_By_Batchcode_Search_Term"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5c1780b8",
"metadata": {},
"outputs": [],
"source": [
"country_By_Batchcode_Search_Term['Countries guessed']"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2ec46898",
"metadata": {},
"outputs": [],
"source": [
"# mi.droplevel('z')\n",
"country_By_Batchcode_Search_Term.index.droplevel('Batchcode Search Term')"
] ]
}, },
{ {
@@ -253,21 +161,10 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"country_By_Clicked_Batchcode = CountriesColumnAdder().addCountriesColumn(\n", "from CountriesByBatchcodeProvider import getCountriesByClickedBatchcode\n",
" country_By_Clicked_Batchcode,\n", "\n",
" columnName = 'Countries guessed')\n", "countriesByClickedBatchcode = getCountriesByClickedBatchcode()\n",
"country_By_Clicked_Batchcode" "countriesByClickedBatchcode"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "03193075",
"metadata": {},
"outputs": [],
"source": [
"country_By_Clicked_Batchcode2 = country_By_Clicked_Batchcode[['Countries guessed']]\n",
"country_By_Clicked_Batchcode2"
] ]
}, },
{ {
@@ -277,14 +174,14 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"res = pd.merge(\n", "#res = pd.merge(\n",
" batchCodeTable,\n", "# batchCodeTable,\n",
" country_By_Clicked_Batchcode2,\n", "# countriesByClickedBatchcode,\n",
" how = 'left',\n", "# how = 'left',\n",
" left_index = True,\n", "# left_index = True,\n",
" right_index = True,\n", "# right_index = True,\n",
" validate = 'one_to_one')\n", "# validate = 'one_to_one')\n",
"res" "#res"
] ]
}, },
{ {
@@ -294,7 +191,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"res[~res['Countries guessed'].isna()].to_excel('tmp/tableWithCountriesGuessed.xlsx')" "# res[~res['batchCodeTableCountries guessed'].isna()].to_excel('tmp/tableWithCountriesGuessed.xlsx')"
] ]
}, },
{ {