adding CityCountsByBatchcodeTablesMerger

This commit is contained in:
frankknoll
2023-10-04 11:34:20 +02:00
parent 3f1f9ac19f
commit 5cd36b8e1e
5 changed files with 140 additions and 11 deletions

View File

@@ -0,0 +1,19 @@
import pandas as pd
from GoogleAnalytics.RegionCountsByClickedBatchcodeProvider import RegionCountsByClickedBatchcodeProvider
from GoogleAnalytics.FilesProvider import FilesProvider
from GoogleAnalytics.Resolution import Resolution
class CityCountsByBatchcodeTablesMerger:
@staticmethod
def getCityCountsByClickedBatchcode(dataDir):
files = FilesProvider(dataDir).getFilesHavingResolution(Resolution.CITY)
cityCountsByClickedBatchcodeTables = [RegionCountsByClickedBatchcodeProvider._getCityCountsByClickedBatchcode(file) for file in files]
table = pd.concat(cityCountsByClickedBatchcodeTables)
return CityCountsByBatchcodeTablesMerger._getCityCountsByClickedBatchcodeFromTable(table)
@staticmethod
def _getCityCountsByClickedBatchcodeFromTable(cityCountsByClickedBatchcodeTable):
return (cityCountsByClickedBatchcodeTable
.groupby(cityCountsByClickedBatchcodeTable.index.names)
.sum())

View File

@@ -0,0 +1,27 @@
import unittest
from pandas.testing import assert_frame_equal
from TestHelper import TestHelper
import pandas as pd
from GoogleAnalytics.CityCountsByBatchcodeTablesMerger import CityCountsByBatchcodeTablesMerger
class CityCountsByBatchcodeTablesMergerTest(unittest.TestCase):
def test_getCityCountsByClickedBatchcode(self):
# Given
# When
cityCountsByClickedBatchcodeTable = CityCountsByBatchcodeTablesMerger.getCityCountsByClickedBatchcode('src/testdata/GoogleAnalytics')
# Then
assert_frame_equal(
cityCountsByClickedBatchcodeTable,
TestHelper.createDataFrame(
columns = ['CITY_COUNT_BY_VAX_LOT'],
data = [ [100 + 200],
[10 + 20],
[20 + 40]],
index = pd.MultiIndex.from_tuples(
names = ['VAX_LOT', 'COUNTRY', 'REGION', 'CITY'],
tuples = [['#003B21A', 'United States', 'California', 'Roseville'],
['000086A', 'Germany', 'Bavaria', 'Nordlingen'],
['000086A', 'Germany', 'Bavaria', 'Nuremberg']])))

View File

@@ -1,13 +1,13 @@
import pandas as pd
from GoogleAnalytics.RegionCountsByClickedBatchcodeProvider import RegionCountsByClickedBatchcodeProvider
from GoogleAnalytics.FilesProvider import FilesProvider
from GoogleAnalytics.Resolution import Resolution
from TablesHelper import TablesHelper
class RegionCountsByBatchcodeTablesMerger:
@staticmethod
def getRegionCountsByClickedBatchcode(dataDir):
files = FilesProvider(dataDir).getFilesHavingResolution(Resolution.CITY)
tables = [RegionCountsByClickedBatchcodeProvider.getRegionCountsByClickedBatchcode(file) for file in files]
table = TablesHelper.concatTables_groupByIndex_sum(tables)
return table
cityCountsByClickedBatchcodeTables = [RegionCountsByClickedBatchcodeProvider._getCityCountsByClickedBatchcode(file) for file in files]
table = pd.concat(cityCountsByClickedBatchcodeTables)
return RegionCountsByClickedBatchcodeProvider._getRegionCountsByClickedBatchcodeFromTable(table)

View File

@@ -5,13 +5,7 @@ class RegionCountsByClickedBatchcodeProvider:
@staticmethod
def getRegionCountsByClickedBatchcode(file):
cityCountsByClickedBatchcodeTable = RegionCountsByClickedBatchcodeProvider._getCityCountsByClickedBatchcode(file)
return (cityCountsByClickedBatchcodeTable
.groupby(['VAX_LOT', 'COUNTRY', 'REGION'])
.agg(REGION_COUNT_BY_VAX_LOT =
pd.NamedAgg(
column = 'CITY_COUNT_BY_VAX_LOT',
aggfunc = sum)))
return RegionCountsByClickedBatchcodeProvider._getRegionCountsByClickedBatchcodeFromTable(RegionCountsByClickedBatchcodeProvider._getCityCountsByClickedBatchcode(file))
# FK-TODO: delegate same method CountryCountsByClickedBatchcodeProvider._getCityCountsByClickedBatchcode() to here
@staticmethod
@@ -25,3 +19,12 @@ class RegionCountsByClickedBatchcodeProvider:
'Event count': 'CITY_COUNT_BY_VAX_LOT'
},
index_columns = ['COUNTRY', 'REGION', 'CITY'])
@staticmethod
def _getRegionCountsByClickedBatchcodeFromTable(cityCountsByClickedBatchcodeTable):
return (cityCountsByClickedBatchcodeTable
.groupby(['VAX_LOT', 'COUNTRY', 'REGION'])
.agg(REGION_COUNT_BY_VAX_LOT =
pd.NamedAgg(
column = 'CITY_COUNT_BY_VAX_LOT',
aggfunc = sum)))

View File

@@ -249,6 +249,14 @@
" '../docs/data/barChartDescriptionTable.json')\n"
]
},
{
"cell_type": "markdown",
"id": "c5a66a3b",
"metadata": {},
"source": [
"# Google Analytics"
]
},
{
"cell_type": "code",
"execution_count": null,
@@ -283,6 +291,78 @@
"regionCountsByClickedBatchcodeTable4Germany.to_excel('tmp/regionCountsByClickedBatchcodeTable4Germany.xlsx')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3c05fcfc",
"metadata": {},
"outputs": [],
"source": [
"# VAX_LOT: EX8679\n",
"(regionCountsByClickedBatchcodeTable4Germany\n",
" .groupby('VAX_LOT')\n",
" .sum()\n",
" .sort_values(by = 'REGION_COUNT_BY_VAX_LOT', ascending = False))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "370bf329",
"metadata": {},
"outputs": [],
"source": [
"(regionCountsByClickedBatchcodeTable4Germany\n",
" .loc[('EX8679', slice(None), slice(None)), :]\n",
" .sort_values(by = 'REGION_COUNT_BY_VAX_LOT', ascending = False))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9740c40b",
"metadata": {},
"outputs": [],
"source": [
"from GoogleAnalytics.CityCountsByBatchcodeTablesMerger import CityCountsByBatchcodeTablesMerger\n",
"\n",
"cityCountsByClickedBatchcodeTable = CityCountsByBatchcodeTablesMerger.getCityCountsByClickedBatchcode('data/GoogleAnalytics')\n",
"cityCountsByClickedBatchcodeTable"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0dac0ea6",
"metadata": {},
"outputs": [],
"source": [
"cityCountsByClickedBatchcodeTable_EX8679_Germany = cityCountsByClickedBatchcodeTable.loc[('EX8679', 'Germany', slice(None), slice(None)), :]\n",
"cityCountsByClickedBatchcodeTable_EX8679_Germany"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8d7a8bbf",
"metadata": {},
"outputs": [],
"source": [
"cityCountsByClickedBatchcodeTable_EX8679_Germany.to_excel('tmp/cityCountsByClickedBatchcodeTable_EX8679_Germany.xlsx')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "494943f3",
"metadata": {},
"outputs": [],
"source": [
"(cityCountsByClickedBatchcodeTable_EX8679_Germany\n",
" .sort_values(by = ['CITY_COUNT_BY_VAX_LOT'], ascending = False)\n",
" .to_excel('tmp/cityCountsByClickedBatchcodeTable_EX8679_Germany_sorted.xlsx'))"
]
},
{
"attachments": {},
"cell_type": "markdown",