diff --git a/src/CountriesByBatchcodeProvider.py b/src/CountriesByBatchcodeProvider.py index 2b63e678403..414e8f23817 100644 --- a/src/CountriesByBatchcodeProvider.py +++ b/src/CountriesByBatchcodeProvider.py @@ -2,28 +2,15 @@ import pandas as pd from BatchCodeTableFactory import BatchCodeTableFactory from InternationalVaersCovid19Provider import getInternationalVaersCovid19 from SummationTableFactory import SummationTableFactory +from CountryCountsByBatchcodeTablesMerger import CountryCountsByBatchcodeTablesMerger def getCountryCountsByBatchcodeTable(): return _combineCountryCountsByBatchcodeTables( - countryCountsByClickedBatchcode = _getCountryCountsByClickedBatchcode(), + countryCountsByClickedBatchcode = CountryCountsByBatchcodeTablesMerger.getCountryCountsByClickedBatchcodeTable(), countryCountsByBatchcodeBeforeDeletion = _getCountryCountsByBatchcodeBeforeDeletion()) -def _getCountryCountsByClickedBatchcode(): - exploration = pd.read_csv('data/Country By Clicked Batchcode.csv', index_col = 0, skiprows = [0, 1, 2, 3, 4, 5, 7]) - exploration.index.name = 'VAX_LOT' - exploration.rename( - columns = - { - 'Country': 'COUNTRY', - 'Event count': 'COUNTRY_COUNT_BY_VAX_LOT' - }, - inplace = True) - exploration.set_index('COUNTRY',append = True, inplace = True) - return exploration - - def _getCountryCountsByBatchcodeBeforeDeletion(): internationalVaersCovid19 = getInternationalVaersCovid19(dataDir = 'VAERS/VAERSBeforeDeletion', years = [2020, 2021, 2022]) return (internationalVaersCovid19 @@ -47,7 +34,8 @@ def _combineCountryCountsByBatchcodeTables(countryCountsByClickedBatchcode, coun def getCountriesByClickedBatchcode(): - return (_getCountryCountsByClickedBatchcode() + return (CountryCountsByBatchcodeTablesMerger + .getCountryCountsByClickedBatchcodeTable() .reset_index(level = 'COUNTRY') .groupby('VAX_LOT') .agg( diff --git a/src/CountryCountsByBatchcodeTablesMerger.py b/src/CountryCountsByBatchcodeTablesMerger.py index 171c83b52d2..9d9bb654103 100644 --- a/src/CountryCountsByBatchcodeTablesMerger.py +++ b/src/CountryCountsByBatchcodeTablesMerger.py @@ -1,10 +1,21 @@ import pandas as pd +import glob +from CountryCountsByClickedBatchcodeProvider import CountryCountsByClickedBatchcodeProvider class CountryCountsByBatchcodeTablesMerger: @staticmethod - def merge(countryCountsByBatchcodeTables): + def mergeCountryCountsByBatchcodeTables(countryCountsByBatchcodeTables): return (pd .concat(countryCountsByBatchcodeTables) .groupby(countryCountsByBatchcodeTables[0].index.names) .sum()) + + @staticmethod + def getCountryCountsByClickedBatchcodeTable(): + return CountryCountsByBatchcodeTablesMerger.mergeCountryCountsByBatchcodeTables(CountryCountsByBatchcodeTablesMerger._getTables()) + + @staticmethod + def _getTables(): + files = glob.glob(r'data/*') + return [CountryCountsByClickedBatchcodeProvider.getCountryCountsByClickedBatchcode(file) for file in files] diff --git a/src/CountryCountsByBatchcodeTablesMergerTest.py b/src/CountryCountsByBatchcodeTablesMergerTest.py index 438f00184f2..3a779196cb9 100644 --- a/src/CountryCountsByBatchcodeTablesMergerTest.py +++ b/src/CountryCountsByBatchcodeTablesMergerTest.py @@ -6,7 +6,7 @@ from CountryCountsByBatchcodeTablesMerger import CountryCountsByBatchcodeTablesM class CountryCountsByBatchcodeTablesMergerTest(unittest.TestCase): - def test_merge(self): + def test_mergeCountryCountsByBatchcodeTables(self): # Given countryCountsByBatchcodeTable1 = TestHelper.createDataFrame( columns = ['COUNTRY_COUNT_BY_VAX_LOT'], @@ -24,7 +24,7 @@ class CountryCountsByBatchcodeTablesMergerTest(unittest.TestCase): tuples = [['12345', 'Germany']])) # When - mergedCountryCountsByBatchcodeTables = CountryCountsByBatchcodeTablesMerger.merge( + mergedCountryCountsByBatchcodeTables = CountryCountsByBatchcodeTablesMerger.mergeCountryCountsByBatchcodeTables( [ countryCountsByBatchcodeTable1, countryCountsByBatchcodeTable2 diff --git a/src/CountryCountsByClickedBatchcodeProvider.py b/src/CountryCountsByClickedBatchcodeProvider.py new file mode 100644 index 00000000000..8645e417578 --- /dev/null +++ b/src/CountryCountsByClickedBatchcodeProvider.py @@ -0,0 +1,17 @@ +import pandas as pd + +class CountryCountsByClickedBatchcodeProvider: + + @staticmethod + def getCountryCountsByClickedBatchcode(file): + exploration = pd.read_csv(file, index_col = 0, skiprows = [0, 1, 2, 3, 4, 5, 7]) + exploration.index.name = 'VAX_LOT' + exploration.rename( + columns = + { + 'Country': 'COUNTRY', + 'Event count': 'COUNTRY_COUNT_BY_VAX_LOT' + }, + inplace = True) + exploration.set_index('COUNTRY',append = True, inplace = True) + return exploration diff --git a/src/HowBadIsMyBatch.ipynb b/src/HowBadIsMyBatch.ipynb index 47bb8a30530..5817f76f91f 100644 --- a/src/HowBadIsMyBatch.ipynb +++ b/src/HowBadIsMyBatch.ipynb @@ -166,13 +166,26 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "dd9fb2b0", "metadata": {}, "outputs": [], "source": [ "countryCountsByBatchcode.to_excel('tmp/countryCountsByBatchcode.xlsx')" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "49ecce85", + "metadata": {}, + "outputs": [], + "source": [ + "from CountryCountsByBatchcodeTablesMerger import CountryCountsByBatchcodeTablesMerger\n", + "\n", + "mergedCountryCountsByClickedBatchcodeTables = CountryCountsByBatchcodeTablesMerger.getCountryCountsByClickedBatchcodeTable()\n", + "mergedCountryCountsByClickedBatchcodeTables" + ] } ], "metadata": {