adding CityCountsByBatchcodeTablesMerger
This commit is contained in:
19
src/GoogleAnalytics/CityCountsByBatchcodeTablesMerger.py
Normal file
19
src/GoogleAnalytics/CityCountsByBatchcodeTablesMerger.py
Normal file
@@ -0,0 +1,19 @@
|
||||
import pandas as pd
|
||||
from GoogleAnalytics.RegionCountsByClickedBatchcodeProvider import RegionCountsByClickedBatchcodeProvider
|
||||
from GoogleAnalytics.FilesProvider import FilesProvider
|
||||
from GoogleAnalytics.Resolution import Resolution
|
||||
|
||||
class CityCountsByBatchcodeTablesMerger:
|
||||
|
||||
@staticmethod
|
||||
def getCityCountsByClickedBatchcode(dataDir):
|
||||
files = FilesProvider(dataDir).getFilesHavingResolution(Resolution.CITY)
|
||||
cityCountsByClickedBatchcodeTables = [RegionCountsByClickedBatchcodeProvider._getCityCountsByClickedBatchcode(file) for file in files]
|
||||
table = pd.concat(cityCountsByClickedBatchcodeTables)
|
||||
return CityCountsByBatchcodeTablesMerger._getCityCountsByClickedBatchcodeFromTable(table)
|
||||
|
||||
@staticmethod
|
||||
def _getCityCountsByClickedBatchcodeFromTable(cityCountsByClickedBatchcodeTable):
|
||||
return (cityCountsByClickedBatchcodeTable
|
||||
.groupby(cityCountsByClickedBatchcodeTable.index.names)
|
||||
.sum())
|
||||
27
src/GoogleAnalytics/CityCountsByBatchcodeTablesMergerTest.py
Normal file
27
src/GoogleAnalytics/CityCountsByBatchcodeTablesMergerTest.py
Normal file
@@ -0,0 +1,27 @@
|
||||
import unittest
|
||||
from pandas.testing import assert_frame_equal
|
||||
from TestHelper import TestHelper
|
||||
import pandas as pd
|
||||
from GoogleAnalytics.CityCountsByBatchcodeTablesMerger import CityCountsByBatchcodeTablesMerger
|
||||
|
||||
class CityCountsByBatchcodeTablesMergerTest(unittest.TestCase):
|
||||
|
||||
def test_getCityCountsByClickedBatchcode(self):
|
||||
# Given
|
||||
|
||||
# When
|
||||
cityCountsByClickedBatchcodeTable = CityCountsByBatchcodeTablesMerger.getCityCountsByClickedBatchcode('src/testdata/GoogleAnalytics')
|
||||
|
||||
# Then
|
||||
assert_frame_equal(
|
||||
cityCountsByClickedBatchcodeTable,
|
||||
TestHelper.createDataFrame(
|
||||
columns = ['CITY_COUNT_BY_VAX_LOT'],
|
||||
data = [ [100 + 200],
|
||||
[10 + 20],
|
||||
[20 + 40]],
|
||||
index = pd.MultiIndex.from_tuples(
|
||||
names = ['VAX_LOT', 'COUNTRY', 'REGION', 'CITY'],
|
||||
tuples = [['#003B21A', 'United States', 'California', 'Roseville'],
|
||||
['000086A', 'Germany', 'Bavaria', 'Nordlingen'],
|
||||
['000086A', 'Germany', 'Bavaria', 'Nuremberg']])))
|
||||
@@ -1,13 +1,13 @@
|
||||
import pandas as pd
|
||||
from GoogleAnalytics.RegionCountsByClickedBatchcodeProvider import RegionCountsByClickedBatchcodeProvider
|
||||
from GoogleAnalytics.FilesProvider import FilesProvider
|
||||
from GoogleAnalytics.Resolution import Resolution
|
||||
from TablesHelper import TablesHelper
|
||||
|
||||
class RegionCountsByBatchcodeTablesMerger:
|
||||
|
||||
@staticmethod
|
||||
def getRegionCountsByClickedBatchcode(dataDir):
|
||||
files = FilesProvider(dataDir).getFilesHavingResolution(Resolution.CITY)
|
||||
tables = [RegionCountsByClickedBatchcodeProvider.getRegionCountsByClickedBatchcode(file) for file in files]
|
||||
table = TablesHelper.concatTables_groupByIndex_sum(tables)
|
||||
return table
|
||||
cityCountsByClickedBatchcodeTables = [RegionCountsByClickedBatchcodeProvider._getCityCountsByClickedBatchcode(file) for file in files]
|
||||
table = pd.concat(cityCountsByClickedBatchcodeTables)
|
||||
return RegionCountsByClickedBatchcodeProvider._getRegionCountsByClickedBatchcodeFromTable(table)
|
||||
|
||||
@@ -5,13 +5,7 @@ class RegionCountsByClickedBatchcodeProvider:
|
||||
|
||||
@staticmethod
|
||||
def getRegionCountsByClickedBatchcode(file):
|
||||
cityCountsByClickedBatchcodeTable = RegionCountsByClickedBatchcodeProvider._getCityCountsByClickedBatchcode(file)
|
||||
return (cityCountsByClickedBatchcodeTable
|
||||
.groupby(['VAX_LOT', 'COUNTRY', 'REGION'])
|
||||
.agg(REGION_COUNT_BY_VAX_LOT =
|
||||
pd.NamedAgg(
|
||||
column = 'CITY_COUNT_BY_VAX_LOT',
|
||||
aggfunc = sum)))
|
||||
return RegionCountsByClickedBatchcodeProvider._getRegionCountsByClickedBatchcodeFromTable(RegionCountsByClickedBatchcodeProvider._getCityCountsByClickedBatchcode(file))
|
||||
|
||||
# FK-TODO: delegate same method CountryCountsByClickedBatchcodeProvider._getCityCountsByClickedBatchcode() to here
|
||||
@staticmethod
|
||||
@@ -25,3 +19,12 @@ class RegionCountsByClickedBatchcodeProvider:
|
||||
'Event count': 'CITY_COUNT_BY_VAX_LOT'
|
||||
},
|
||||
index_columns = ['COUNTRY', 'REGION', 'CITY'])
|
||||
|
||||
@staticmethod
|
||||
def _getRegionCountsByClickedBatchcodeFromTable(cityCountsByClickedBatchcodeTable):
|
||||
return (cityCountsByClickedBatchcodeTable
|
||||
.groupby(['VAX_LOT', 'COUNTRY', 'REGION'])
|
||||
.agg(REGION_COUNT_BY_VAX_LOT =
|
||||
pd.NamedAgg(
|
||||
column = 'CITY_COUNT_BY_VAX_LOT',
|
||||
aggfunc = sum)))
|
||||
|
||||
@@ -249,6 +249,14 @@
|
||||
" '../docs/data/barChartDescriptionTable.json')\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c5a66a3b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Google Analytics"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -283,6 +291,78 @@
|
||||
"regionCountsByClickedBatchcodeTable4Germany.to_excel('tmp/regionCountsByClickedBatchcodeTable4Germany.xlsx')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3c05fcfc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# VAX_LOT: EX8679\n",
|
||||
"(regionCountsByClickedBatchcodeTable4Germany\n",
|
||||
" .groupby('VAX_LOT')\n",
|
||||
" .sum()\n",
|
||||
" .sort_values(by = 'REGION_COUNT_BY_VAX_LOT', ascending = False))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "370bf329",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"(regionCountsByClickedBatchcodeTable4Germany\n",
|
||||
" .loc[('EX8679', slice(None), slice(None)), :]\n",
|
||||
" .sort_values(by = 'REGION_COUNT_BY_VAX_LOT', ascending = False))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9740c40b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from GoogleAnalytics.CityCountsByBatchcodeTablesMerger import CityCountsByBatchcodeTablesMerger\n",
|
||||
"\n",
|
||||
"cityCountsByClickedBatchcodeTable = CityCountsByBatchcodeTablesMerger.getCityCountsByClickedBatchcode('data/GoogleAnalytics')\n",
|
||||
"cityCountsByClickedBatchcodeTable"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0dac0ea6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"cityCountsByClickedBatchcodeTable_EX8679_Germany = cityCountsByClickedBatchcodeTable.loc[('EX8679', 'Germany', slice(None), slice(None)), :]\n",
|
||||
"cityCountsByClickedBatchcodeTable_EX8679_Germany"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8d7a8bbf",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"cityCountsByClickedBatchcodeTable_EX8679_Germany.to_excel('tmp/cityCountsByClickedBatchcodeTable_EX8679_Germany.xlsx')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "494943f3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"(cityCountsByClickedBatchcodeTable_EX8679_Germany\n",
|
||||
" .sort_values(by = ['CITY_COUNT_BY_VAX_LOT'], ascending = False)\n",
|
||||
" .to_excel('tmp/cityCountsByClickedBatchcodeTable_EX8679_Germany_sorted.xlsx'))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
|
||||
Reference in New Issue
Block a user