adding CountryCountsByBatchcodeTablesMerger

This commit is contained in:
frankknoll
2023-06-02 14:36:48 +02:00
parent b0142cef77
commit c733e26c45
5 changed files with 49 additions and 20 deletions

View File

@@ -2,28 +2,15 @@ import pandas as pd
from BatchCodeTableFactory import BatchCodeTableFactory
from InternationalVaersCovid19Provider import getInternationalVaersCovid19
from SummationTableFactory import SummationTableFactory
from CountryCountsByBatchcodeTablesMerger import CountryCountsByBatchcodeTablesMerger
def getCountryCountsByBatchcodeTable():
return _combineCountryCountsByBatchcodeTables(
countryCountsByClickedBatchcode = _getCountryCountsByClickedBatchcode(),
countryCountsByClickedBatchcode = CountryCountsByBatchcodeTablesMerger.getCountryCountsByClickedBatchcodeTable(),
countryCountsByBatchcodeBeforeDeletion = _getCountryCountsByBatchcodeBeforeDeletion())
def _getCountryCountsByClickedBatchcode():
exploration = pd.read_csv('data/Country By Clicked Batchcode.csv', index_col = 0, skiprows = [0, 1, 2, 3, 4, 5, 7])
exploration.index.name = 'VAX_LOT'
exploration.rename(
columns =
{
'Country': 'COUNTRY',
'Event count': 'COUNTRY_COUNT_BY_VAX_LOT'
},
inplace = True)
exploration.set_index('COUNTRY',append = True, inplace = True)
return exploration
def _getCountryCountsByBatchcodeBeforeDeletion():
internationalVaersCovid19 = getInternationalVaersCovid19(dataDir = 'VAERS/VAERSBeforeDeletion', years = [2020, 2021, 2022])
return (internationalVaersCovid19
@@ -47,7 +34,8 @@ def _combineCountryCountsByBatchcodeTables(countryCountsByClickedBatchcode, coun
def getCountriesByClickedBatchcode():
return (_getCountryCountsByClickedBatchcode()
return (CountryCountsByBatchcodeTablesMerger
.getCountryCountsByClickedBatchcodeTable()
.reset_index(level = 'COUNTRY')
.groupby('VAX_LOT')
.agg(

View File

@@ -1,10 +1,21 @@
import pandas as pd
import glob
from CountryCountsByClickedBatchcodeProvider import CountryCountsByClickedBatchcodeProvider
class CountryCountsByBatchcodeTablesMerger:
@staticmethod
def merge(countryCountsByBatchcodeTables):
def mergeCountryCountsByBatchcodeTables(countryCountsByBatchcodeTables):
return (pd
.concat(countryCountsByBatchcodeTables)
.groupby(countryCountsByBatchcodeTables[0].index.names)
.sum())
@staticmethod
def getCountryCountsByClickedBatchcodeTable():
return CountryCountsByBatchcodeTablesMerger.mergeCountryCountsByBatchcodeTables(CountryCountsByBatchcodeTablesMerger._getTables())
@staticmethod
def _getTables():
files = glob.glob(r'data/*')
return [CountryCountsByClickedBatchcodeProvider.getCountryCountsByClickedBatchcode(file) for file in files]

View File

@@ -6,7 +6,7 @@ from CountryCountsByBatchcodeTablesMerger import CountryCountsByBatchcodeTablesM
class CountryCountsByBatchcodeTablesMergerTest(unittest.TestCase):
def test_merge(self):
def test_mergeCountryCountsByBatchcodeTables(self):
# Given
countryCountsByBatchcodeTable1 = TestHelper.createDataFrame(
columns = ['COUNTRY_COUNT_BY_VAX_LOT'],
@@ -24,7 +24,7 @@ class CountryCountsByBatchcodeTablesMergerTest(unittest.TestCase):
tuples = [['12345', 'Germany']]))
# When
mergedCountryCountsByBatchcodeTables = CountryCountsByBatchcodeTablesMerger.merge(
mergedCountryCountsByBatchcodeTables = CountryCountsByBatchcodeTablesMerger.mergeCountryCountsByBatchcodeTables(
[
countryCountsByBatchcodeTable1,
countryCountsByBatchcodeTable2

View File

@@ -0,0 +1,17 @@
import pandas as pd
class CountryCountsByClickedBatchcodeProvider:
@staticmethod
def getCountryCountsByClickedBatchcode(file):
exploration = pd.read_csv(file, index_col = 0, skiprows = [0, 1, 2, 3, 4, 5, 7])
exploration.index.name = 'VAX_LOT'
exploration.rename(
columns =
{
'Country': 'COUNTRY',
'Event count': 'COUNTRY_COUNT_BY_VAX_LOT'
},
inplace = True)
exploration.set_index('COUNTRY',append = True, inplace = True)
return exploration

View File

@@ -166,13 +166,26 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": null,
"id": "dd9fb2b0",
"metadata": {},
"outputs": [],
"source": [
"countryCountsByBatchcode.to_excel('tmp/countryCountsByBatchcode.xlsx')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "49ecce85",
"metadata": {},
"outputs": [],
"source": [
"from CountryCountsByBatchcodeTablesMerger import CountryCountsByBatchcodeTablesMerger\n",
"\n",
"mergedCountryCountsByClickedBatchcodeTables = CountryCountsByBatchcodeTablesMerger.getCountryCountsByClickedBatchcodeTable()\n",
"mergedCountryCountsByClickedBatchcodeTables"
]
}
],
"metadata": {