adding CountriesByBatchcodeProvider
This commit is contained in:
37
src/CountriesByBatchcodeProvider.py
Normal file
37
src/CountriesByBatchcodeProvider.py
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
import pandas as pd
|
||||||
|
from CompletedBatchcodeColumnAdder import CompletedBatchcodeColumnAdder
|
||||||
|
from BatchcodeCompletion import BatchcodeCompletion
|
||||||
|
from CountriesColumnAdder import CountriesColumnAdder
|
||||||
|
from BatchCodeTableFactory import BatchCodeTableFactory
|
||||||
|
|
||||||
|
def getCountriesByCompletedBatchcode(internationalVaersCovid19):
|
||||||
|
batchCodeTable = BatchCodeTableFactory(internationalVaersCovid19).createGlobalBatchCodeTable()
|
||||||
|
country_By_Batchcode_Search_Term = _readExploration('data/Country By Batchcode Search Term.csv', indexName = 'Batchcode Search Term')
|
||||||
|
completedBatchcodeColumnAdder = CompletedBatchcodeColumnAdder(BatchcodeCompletion(ADR_by_Batchcode = batchCodeTable).completeBatchcode)
|
||||||
|
country_By_Batchcode_Search_Term = completedBatchcodeColumnAdder.addCompletedBatchcodeColumn(country_By_Batchcode_Search_Term)
|
||||||
|
columnName = 'Countries'
|
||||||
|
country_By_Batchcode_Search_Term = CountriesColumnAdder().addCountriesColumn(
|
||||||
|
country_By_Batchcode_Search_Term,
|
||||||
|
columnName = columnName)
|
||||||
|
country_By_Batchcode_Search_Term = country_By_Batchcode_Search_Term[[columnName]].droplevel('Batchcode Search Term')
|
||||||
|
return country_By_Batchcode_Search_Term
|
||||||
|
|
||||||
|
def getCountriesByClickedBatchcode():
|
||||||
|
country_By_Clicked_Batchcode = _readExploration(
|
||||||
|
'data/Country By Clicked Batchcode.csv',
|
||||||
|
indexName = 'Clicked Batchcode')
|
||||||
|
columnName = 'Countries'
|
||||||
|
country_By_Clicked_Batchcode = CountriesColumnAdder().addCountriesColumn(
|
||||||
|
country_By_Clicked_Batchcode,
|
||||||
|
columnName = columnName)
|
||||||
|
country_By_Clicked_Batchcode = country_By_Clicked_Batchcode[[columnName]]
|
||||||
|
return country_By_Clicked_Batchcode
|
||||||
|
|
||||||
|
def _readExploration(csvFile, indexName):
|
||||||
|
exploration = pd.read_csv(csvFile, header=[0], index_col=0, skiprows=6, on_bad_lines='warn')
|
||||||
|
exploration.drop(index=indexName, inplace=True)
|
||||||
|
exploration.index.rename(indexName, inplace=True)
|
||||||
|
exploration.drop(columns='Totals', inplace=True)
|
||||||
|
for column in exploration.columns:
|
||||||
|
exploration[column] = exploration[column].astype('int64')
|
||||||
|
return exploration
|
||||||
@@ -23,45 +23,6 @@
|
|||||||
"from HistogramFactoryAndPersister import createAndSaveGlobalHistograms, createAndSaveHistogramsForCountries"
|
"from HistogramFactoryAndPersister import createAndSaveGlobalHistograms, createAndSaveHistogramsForCountries"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "0474f6d7",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"def readExploration(csvFile, indexName):\n",
|
|
||||||
" exploration = pd.read_csv(csvFile, header=[0], index_col=0, skiprows=6, on_bad_lines='warn')\n",
|
|
||||||
" exploration.drop(index=indexName, inplace=True)\n",
|
|
||||||
" exploration.index.rename(indexName, inplace=True)\n",
|
|
||||||
" exploration.drop(columns='Totals', inplace=True)\n",
|
|
||||||
" for column in exploration.columns:\n",
|
|
||||||
" exploration[column] = exploration[column].astype('int64')\n",
|
|
||||||
" return exploration"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "0cb5849c",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"country_By_Clicked_Batchcode = readExploration('data/Country By Clicked Batchcode.csv', indexName = 'Clicked Batchcode')\n",
|
|
||||||
"country_By_Clicked_Batchcode"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "b5066a92",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"country_By_Batchcode_Search_Term = readExploration('data/Country By Batchcode Search Term.csv', indexName = 'Batchcode Search Term')\n",
|
|
||||||
"country_By_Batchcode_Search_Term"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
@@ -180,20 +141,6 @@
|
|||||||
"internationalVaersCovid19"
|
"internationalVaersCovid19"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "7a1023e8",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from BatchCodeTableFactory import BatchCodeTableFactory\n",
|
|
||||||
"\n",
|
|
||||||
"batchCodeTableFactory = BatchCodeTableFactory(internationalVaersCovid19)\n",
|
|
||||||
"batchCodeTable = batchCodeTableFactory.createGlobalBatchCodeTable()\n",
|
|
||||||
"batchCodeTable"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
@@ -201,49 +148,10 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from CompletedBatchcodeColumnAdder import CompletedBatchcodeColumnAdder\n",
|
"from CountriesByBatchcodeProvider import getCountriesByCompletedBatchcode\n",
|
||||||
"from BatchcodeCompletion import BatchcodeCompletion\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"batchcodeCompletion = BatchcodeCompletion(ADR_by_Batchcode = batchCodeTable)\n",
|
"countriesByCompletedBatchcode = getCountriesByCompletedBatchcode(internationalVaersCovid19)\n",
|
||||||
"completedBatchcodeColumnAdder = CompletedBatchcodeColumnAdder(batchcodeCompletion.completeBatchcode)\n",
|
"countriesByCompletedBatchcode"
|
||||||
"country_By_Batchcode_Search_Term = completedBatchcodeColumnAdder.addCompletedBatchcodeColumn(country_By_Batchcode_Search_Term)\n",
|
|
||||||
"country_By_Batchcode_Search_Term"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "5e3f6e61",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from CountriesColumnAdder import CountriesColumnAdder\n",
|
|
||||||
"\n",
|
|
||||||
"country_By_Batchcode_Search_Term = CountriesColumnAdder().addCountriesColumn(\n",
|
|
||||||
" country_By_Batchcode_Search_Term,\n",
|
|
||||||
" columnName = 'Countries guessed')\n",
|
|
||||||
"country_By_Batchcode_Search_Term"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "5c1780b8",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"country_By_Batchcode_Search_Term['Countries guessed']"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "2ec46898",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# mi.droplevel('z')\n",
|
|
||||||
"country_By_Batchcode_Search_Term.index.droplevel('Batchcode Search Term')"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -253,21 +161,10 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"country_By_Clicked_Batchcode = CountriesColumnAdder().addCountriesColumn(\n",
|
"from CountriesByBatchcodeProvider import getCountriesByClickedBatchcode\n",
|
||||||
" country_By_Clicked_Batchcode,\n",
|
"\n",
|
||||||
" columnName = 'Countries guessed')\n",
|
"countriesByClickedBatchcode = getCountriesByClickedBatchcode()\n",
|
||||||
"country_By_Clicked_Batchcode"
|
"countriesByClickedBatchcode"
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "03193075",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"country_By_Clicked_Batchcode2 = country_By_Clicked_Batchcode[['Countries guessed']]\n",
|
|
||||||
"country_By_Clicked_Batchcode2"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -277,14 +174,14 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"res = pd.merge(\n",
|
"#res = pd.merge(\n",
|
||||||
" batchCodeTable,\n",
|
"# batchCodeTable,\n",
|
||||||
" country_By_Clicked_Batchcode2,\n",
|
"# countriesByClickedBatchcode,\n",
|
||||||
" how = 'left',\n",
|
"# how = 'left',\n",
|
||||||
" left_index = True,\n",
|
"# left_index = True,\n",
|
||||||
" right_index = True,\n",
|
"# right_index = True,\n",
|
||||||
" validate = 'one_to_one')\n",
|
"# validate = 'one_to_one')\n",
|
||||||
"res"
|
"#res"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -294,7 +191,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"res[~res['Countries guessed'].isna()].to_excel('tmp/tableWithCountriesGuessed.xlsx')"
|
"# res[~res['batchCodeTableCountries guessed'].isna()].to_excel('tmp/tableWithCountriesGuessed.xlsx')"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
Reference in New Issue
Block a user