adding CountryCountsByBatchcodeTablesMerger

This commit is contained in:
frankknoll
2023-06-02 14:36:48 +02:00
parent b0142cef77
commit c733e26c45
5 changed files with 49 additions and 20 deletions

View File

@@ -2,28 +2,15 @@ import pandas as pd
from BatchCodeTableFactory import BatchCodeTableFactory from BatchCodeTableFactory import BatchCodeTableFactory
from InternationalVaersCovid19Provider import getInternationalVaersCovid19 from InternationalVaersCovid19Provider import getInternationalVaersCovid19
from SummationTableFactory import SummationTableFactory from SummationTableFactory import SummationTableFactory
from CountryCountsByBatchcodeTablesMerger import CountryCountsByBatchcodeTablesMerger
def getCountryCountsByBatchcodeTable(): def getCountryCountsByBatchcodeTable():
return _combineCountryCountsByBatchcodeTables( return _combineCountryCountsByBatchcodeTables(
countryCountsByClickedBatchcode = _getCountryCountsByClickedBatchcode(), countryCountsByClickedBatchcode = CountryCountsByBatchcodeTablesMerger.getCountryCountsByClickedBatchcodeTable(),
countryCountsByBatchcodeBeforeDeletion = _getCountryCountsByBatchcodeBeforeDeletion()) countryCountsByBatchcodeBeforeDeletion = _getCountryCountsByBatchcodeBeforeDeletion())
def _getCountryCountsByClickedBatchcode():
exploration = pd.read_csv('data/Country By Clicked Batchcode.csv', index_col = 0, skiprows = [0, 1, 2, 3, 4, 5, 7])
exploration.index.name = 'VAX_LOT'
exploration.rename(
columns =
{
'Country': 'COUNTRY',
'Event count': 'COUNTRY_COUNT_BY_VAX_LOT'
},
inplace = True)
exploration.set_index('COUNTRY',append = True, inplace = True)
return exploration
def _getCountryCountsByBatchcodeBeforeDeletion(): def _getCountryCountsByBatchcodeBeforeDeletion():
internationalVaersCovid19 = getInternationalVaersCovid19(dataDir = 'VAERS/VAERSBeforeDeletion', years = [2020, 2021, 2022]) internationalVaersCovid19 = getInternationalVaersCovid19(dataDir = 'VAERS/VAERSBeforeDeletion', years = [2020, 2021, 2022])
return (internationalVaersCovid19 return (internationalVaersCovid19
@@ -47,7 +34,8 @@ def _combineCountryCountsByBatchcodeTables(countryCountsByClickedBatchcode, coun
def getCountriesByClickedBatchcode(): def getCountriesByClickedBatchcode():
return (_getCountryCountsByClickedBatchcode() return (CountryCountsByBatchcodeTablesMerger
.getCountryCountsByClickedBatchcodeTable()
.reset_index(level = 'COUNTRY') .reset_index(level = 'COUNTRY')
.groupby('VAX_LOT') .groupby('VAX_LOT')
.agg( .agg(

View File

@@ -1,10 +1,21 @@
import pandas as pd import pandas as pd
import glob
from CountryCountsByClickedBatchcodeProvider import CountryCountsByClickedBatchcodeProvider
class CountryCountsByBatchcodeTablesMerger: class CountryCountsByBatchcodeTablesMerger:
@staticmethod @staticmethod
def merge(countryCountsByBatchcodeTables): def mergeCountryCountsByBatchcodeTables(countryCountsByBatchcodeTables):
return (pd return (pd
.concat(countryCountsByBatchcodeTables) .concat(countryCountsByBatchcodeTables)
.groupby(countryCountsByBatchcodeTables[0].index.names) .groupby(countryCountsByBatchcodeTables[0].index.names)
.sum()) .sum())
@staticmethod
def getCountryCountsByClickedBatchcodeTable():
return CountryCountsByBatchcodeTablesMerger.mergeCountryCountsByBatchcodeTables(CountryCountsByBatchcodeTablesMerger._getTables())
@staticmethod
def _getTables():
files = glob.glob(r'data/*')
return [CountryCountsByClickedBatchcodeProvider.getCountryCountsByClickedBatchcode(file) for file in files]

View File

@@ -6,7 +6,7 @@ from CountryCountsByBatchcodeTablesMerger import CountryCountsByBatchcodeTablesM
class CountryCountsByBatchcodeTablesMergerTest(unittest.TestCase): class CountryCountsByBatchcodeTablesMergerTest(unittest.TestCase):
def test_merge(self): def test_mergeCountryCountsByBatchcodeTables(self):
# Given # Given
countryCountsByBatchcodeTable1 = TestHelper.createDataFrame( countryCountsByBatchcodeTable1 = TestHelper.createDataFrame(
columns = ['COUNTRY_COUNT_BY_VAX_LOT'], columns = ['COUNTRY_COUNT_BY_VAX_LOT'],
@@ -24,7 +24,7 @@ class CountryCountsByBatchcodeTablesMergerTest(unittest.TestCase):
tuples = [['12345', 'Germany']])) tuples = [['12345', 'Germany']]))
# When # When
mergedCountryCountsByBatchcodeTables = CountryCountsByBatchcodeTablesMerger.merge( mergedCountryCountsByBatchcodeTables = CountryCountsByBatchcodeTablesMerger.mergeCountryCountsByBatchcodeTables(
[ [
countryCountsByBatchcodeTable1, countryCountsByBatchcodeTable1,
countryCountsByBatchcodeTable2 countryCountsByBatchcodeTable2

View File

@@ -0,0 +1,17 @@
import pandas as pd
class CountryCountsByClickedBatchcodeProvider:
@staticmethod
def getCountryCountsByClickedBatchcode(file):
exploration = pd.read_csv(file, index_col = 0, skiprows = [0, 1, 2, 3, 4, 5, 7])
exploration.index.name = 'VAX_LOT'
exploration.rename(
columns =
{
'Country': 'COUNTRY',
'Event count': 'COUNTRY_COUNT_BY_VAX_LOT'
},
inplace = True)
exploration.set_index('COUNTRY',append = True, inplace = True)
return exploration

View File

@@ -166,13 +166,26 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": null,
"id": "dd9fb2b0", "id": "dd9fb2b0",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"countryCountsByBatchcode.to_excel('tmp/countryCountsByBatchcode.xlsx')" "countryCountsByBatchcode.to_excel('tmp/countryCountsByBatchcode.xlsx')"
] ]
},
{
"cell_type": "code",
"execution_count": null,
"id": "49ecce85",
"metadata": {},
"outputs": [],
"source": [
"from CountryCountsByBatchcodeTablesMerger import CountryCountsByBatchcodeTablesMerger\n",
"\n",
"mergedCountryCountsByClickedBatchcodeTables = CountryCountsByBatchcodeTablesMerger.getCountryCountsByClickedBatchcodeTable()\n",
"mergedCountryCountsByClickedBatchcodeTables"
]
} }
], ],
"metadata": { "metadata": {