adding CityCountsByBatchcodeTablesMerger
This commit is contained in:
19
src/GoogleAnalytics/CityCountsByBatchcodeTablesMerger.py
Normal file
19
src/GoogleAnalytics/CityCountsByBatchcodeTablesMerger.py
Normal file
@@ -0,0 +1,19 @@
|
||||
import pandas as pd
|
||||
from GoogleAnalytics.RegionCountsByClickedBatchcodeProvider import RegionCountsByClickedBatchcodeProvider
|
||||
from GoogleAnalytics.FilesProvider import FilesProvider
|
||||
from GoogleAnalytics.Resolution import Resolution
|
||||
|
||||
class CityCountsByBatchcodeTablesMerger:
|
||||
|
||||
@staticmethod
|
||||
def getCityCountsByClickedBatchcode(dataDir):
|
||||
files = FilesProvider(dataDir).getFilesHavingResolution(Resolution.CITY)
|
||||
cityCountsByClickedBatchcodeTables = [RegionCountsByClickedBatchcodeProvider._getCityCountsByClickedBatchcode(file) for file in files]
|
||||
table = pd.concat(cityCountsByClickedBatchcodeTables)
|
||||
return CityCountsByBatchcodeTablesMerger._getCityCountsByClickedBatchcodeFromTable(table)
|
||||
|
||||
@staticmethod
|
||||
def _getCityCountsByClickedBatchcodeFromTable(cityCountsByClickedBatchcodeTable):
|
||||
return (cityCountsByClickedBatchcodeTable
|
||||
.groupby(cityCountsByClickedBatchcodeTable.index.names)
|
||||
.sum())
|
||||
27
src/GoogleAnalytics/CityCountsByBatchcodeTablesMergerTest.py
Normal file
27
src/GoogleAnalytics/CityCountsByBatchcodeTablesMergerTest.py
Normal file
@@ -0,0 +1,27 @@
|
||||
import unittest
|
||||
from pandas.testing import assert_frame_equal
|
||||
from TestHelper import TestHelper
|
||||
import pandas as pd
|
||||
from GoogleAnalytics.CityCountsByBatchcodeTablesMerger import CityCountsByBatchcodeTablesMerger
|
||||
|
||||
class CityCountsByBatchcodeTablesMergerTest(unittest.TestCase):
|
||||
|
||||
def test_getCityCountsByClickedBatchcode(self):
|
||||
# Given
|
||||
|
||||
# When
|
||||
cityCountsByClickedBatchcodeTable = CityCountsByBatchcodeTablesMerger.getCityCountsByClickedBatchcode('src/testdata/GoogleAnalytics')
|
||||
|
||||
# Then
|
||||
assert_frame_equal(
|
||||
cityCountsByClickedBatchcodeTable,
|
||||
TestHelper.createDataFrame(
|
||||
columns = ['CITY_COUNT_BY_VAX_LOT'],
|
||||
data = [ [100 + 200],
|
||||
[10 + 20],
|
||||
[20 + 40]],
|
||||
index = pd.MultiIndex.from_tuples(
|
||||
names = ['VAX_LOT', 'COUNTRY', 'REGION', 'CITY'],
|
||||
tuples = [['#003B21A', 'United States', 'California', 'Roseville'],
|
||||
['000086A', 'Germany', 'Bavaria', 'Nordlingen'],
|
||||
['000086A', 'Germany', 'Bavaria', 'Nuremberg']])))
|
||||
@@ -1,13 +1,13 @@
|
||||
import pandas as pd
|
||||
from GoogleAnalytics.RegionCountsByClickedBatchcodeProvider import RegionCountsByClickedBatchcodeProvider
|
||||
from GoogleAnalytics.FilesProvider import FilesProvider
|
||||
from GoogleAnalytics.Resolution import Resolution
|
||||
from TablesHelper import TablesHelper
|
||||
|
||||
class RegionCountsByBatchcodeTablesMerger:
|
||||
|
||||
@staticmethod
|
||||
def getRegionCountsByClickedBatchcode(dataDir):
|
||||
files = FilesProvider(dataDir).getFilesHavingResolution(Resolution.CITY)
|
||||
tables = [RegionCountsByClickedBatchcodeProvider.getRegionCountsByClickedBatchcode(file) for file in files]
|
||||
table = TablesHelper.concatTables_groupByIndex_sum(tables)
|
||||
return table
|
||||
cityCountsByClickedBatchcodeTables = [RegionCountsByClickedBatchcodeProvider._getCityCountsByClickedBatchcode(file) for file in files]
|
||||
table = pd.concat(cityCountsByClickedBatchcodeTables)
|
||||
return RegionCountsByClickedBatchcodeProvider._getRegionCountsByClickedBatchcodeFromTable(table)
|
||||
|
||||
@@ -5,13 +5,7 @@ class RegionCountsByClickedBatchcodeProvider:
|
||||
|
||||
@staticmethod
|
||||
def getRegionCountsByClickedBatchcode(file):
|
||||
cityCountsByClickedBatchcodeTable = RegionCountsByClickedBatchcodeProvider._getCityCountsByClickedBatchcode(file)
|
||||
return (cityCountsByClickedBatchcodeTable
|
||||
.groupby(['VAX_LOT', 'COUNTRY', 'REGION'])
|
||||
.agg(REGION_COUNT_BY_VAX_LOT =
|
||||
pd.NamedAgg(
|
||||
column = 'CITY_COUNT_BY_VAX_LOT',
|
||||
aggfunc = sum)))
|
||||
return RegionCountsByClickedBatchcodeProvider._getRegionCountsByClickedBatchcodeFromTable(RegionCountsByClickedBatchcodeProvider._getCityCountsByClickedBatchcode(file))
|
||||
|
||||
# FK-TODO: delegate same method CountryCountsByClickedBatchcodeProvider._getCityCountsByClickedBatchcode() to here
|
||||
@staticmethod
|
||||
@@ -25,3 +19,12 @@ class RegionCountsByClickedBatchcodeProvider:
|
||||
'Event count': 'CITY_COUNT_BY_VAX_LOT'
|
||||
},
|
||||
index_columns = ['COUNTRY', 'REGION', 'CITY'])
|
||||
|
||||
@staticmethod
|
||||
def _getRegionCountsByClickedBatchcodeFromTable(cityCountsByClickedBatchcodeTable):
|
||||
return (cityCountsByClickedBatchcodeTable
|
||||
.groupby(['VAX_LOT', 'COUNTRY', 'REGION'])
|
||||
.agg(REGION_COUNT_BY_VAX_LOT =
|
||||
pd.NamedAgg(
|
||||
column = 'CITY_COUNT_BY_VAX_LOT',
|
||||
aggfunc = sum)))
|
||||
|
||||
Reference in New Issue
Block a user