adding CityCountsByBatchcodeTablesMerger

This commit is contained in:
frankknoll
2023-10-04 11:34:20 +02:00
parent 3f1f9ac19f
commit 5cd36b8e1e
5 changed files with 140 additions and 11 deletions

View File

@@ -0,0 +1,19 @@
import pandas as pd
from GoogleAnalytics.RegionCountsByClickedBatchcodeProvider import RegionCountsByClickedBatchcodeProvider
from GoogleAnalytics.FilesProvider import FilesProvider
from GoogleAnalytics.Resolution import Resolution
class CityCountsByBatchcodeTablesMerger:
@staticmethod
def getCityCountsByClickedBatchcode(dataDir):
files = FilesProvider(dataDir).getFilesHavingResolution(Resolution.CITY)
cityCountsByClickedBatchcodeTables = [RegionCountsByClickedBatchcodeProvider._getCityCountsByClickedBatchcode(file) for file in files]
table = pd.concat(cityCountsByClickedBatchcodeTables)
return CityCountsByBatchcodeTablesMerger._getCityCountsByClickedBatchcodeFromTable(table)
@staticmethod
def _getCityCountsByClickedBatchcodeFromTable(cityCountsByClickedBatchcodeTable):
return (cityCountsByClickedBatchcodeTable
.groupby(cityCountsByClickedBatchcodeTable.index.names)
.sum())

View File

@@ -0,0 +1,27 @@
import unittest
from pandas.testing import assert_frame_equal
from TestHelper import TestHelper
import pandas as pd
from GoogleAnalytics.CityCountsByBatchcodeTablesMerger import CityCountsByBatchcodeTablesMerger
class CityCountsByBatchcodeTablesMergerTest(unittest.TestCase):
def test_getCityCountsByClickedBatchcode(self):
# Given
# When
cityCountsByClickedBatchcodeTable = CityCountsByBatchcodeTablesMerger.getCityCountsByClickedBatchcode('src/testdata/GoogleAnalytics')
# Then
assert_frame_equal(
cityCountsByClickedBatchcodeTable,
TestHelper.createDataFrame(
columns = ['CITY_COUNT_BY_VAX_LOT'],
data = [ [100 + 200],
[10 + 20],
[20 + 40]],
index = pd.MultiIndex.from_tuples(
names = ['VAX_LOT', 'COUNTRY', 'REGION', 'CITY'],
tuples = [['#003B21A', 'United States', 'California', 'Roseville'],
['000086A', 'Germany', 'Bavaria', 'Nordlingen'],
['000086A', 'Germany', 'Bavaria', 'Nuremberg']])))

View File

@@ -1,13 +1,13 @@
import pandas as pd
from GoogleAnalytics.RegionCountsByClickedBatchcodeProvider import RegionCountsByClickedBatchcodeProvider
from GoogleAnalytics.FilesProvider import FilesProvider
from GoogleAnalytics.Resolution import Resolution
from TablesHelper import TablesHelper
class RegionCountsByBatchcodeTablesMerger:
@staticmethod
def getRegionCountsByClickedBatchcode(dataDir):
files = FilesProvider(dataDir).getFilesHavingResolution(Resolution.CITY)
tables = [RegionCountsByClickedBatchcodeProvider.getRegionCountsByClickedBatchcode(file) for file in files]
table = TablesHelper.concatTables_groupByIndex_sum(tables)
return table
cityCountsByClickedBatchcodeTables = [RegionCountsByClickedBatchcodeProvider._getCityCountsByClickedBatchcode(file) for file in files]
table = pd.concat(cityCountsByClickedBatchcodeTables)
return RegionCountsByClickedBatchcodeProvider._getRegionCountsByClickedBatchcodeFromTable(table)

View File

@@ -5,13 +5,7 @@ class RegionCountsByClickedBatchcodeProvider:
@staticmethod
def getRegionCountsByClickedBatchcode(file):
cityCountsByClickedBatchcodeTable = RegionCountsByClickedBatchcodeProvider._getCityCountsByClickedBatchcode(file)
return (cityCountsByClickedBatchcodeTable
.groupby(['VAX_LOT', 'COUNTRY', 'REGION'])
.agg(REGION_COUNT_BY_VAX_LOT =
pd.NamedAgg(
column = 'CITY_COUNT_BY_VAX_LOT',
aggfunc = sum)))
return RegionCountsByClickedBatchcodeProvider._getRegionCountsByClickedBatchcodeFromTable(RegionCountsByClickedBatchcodeProvider._getCityCountsByClickedBatchcode(file))
# FK-TODO: delegate same method CountryCountsByClickedBatchcodeProvider._getCityCountsByClickedBatchcode() to here
@staticmethod
@@ -25,3 +19,12 @@ class RegionCountsByClickedBatchcodeProvider:
'Event count': 'CITY_COUNT_BY_VAX_LOT'
},
index_columns = ['COUNTRY', 'REGION', 'CITY'])
@staticmethod
def _getRegionCountsByClickedBatchcodeFromTable(cityCountsByClickedBatchcodeTable):
return (cityCountsByClickedBatchcodeTable
.groupby(['VAX_LOT', 'COUNTRY', 'REGION'])
.agg(REGION_COUNT_BY_VAX_LOT =
pd.NamedAgg(
column = 'CITY_COUNT_BY_VAX_LOT',
aggfunc = sum)))