filterByBatchcodes()

This commit is contained in:
frankknoll
2023-06-10 17:50:10 +02:00
parent 79a11161e6
commit fe87328904
5 changed files with 46 additions and 53019 deletions

File diff suppressed because one or more lines are too long

View File

@@ -49,3 +49,7 @@ def getCountriesByBatchcodeBeforeDeletion():
internationalVaersCovid19 = getInternationalVaersCovid19(dataDir = 'VAERS/VAERSBeforeDeletion', years = [2020, 2021, 2022])
batchCodeTable = BatchCodeTableFactory(internationalVaersCovid19).createGlobalBatchCodeTable(countriesAsList = True)
return batchCodeTable[['Countries']]
def filterByBatchcodes(countryCountsByBatchcode, batchcodes2Retain):
return countryCountsByBatchcode.loc[(batchcodes2Retain, slice(None)), :]

View File

@@ -0,0 +1,38 @@
import unittest
from TestHelper import TestHelper
from pandas.testing import assert_frame_equal
import pandas as pd
from CountriesByBatchcodeProvider import filterByBatchcodes
class CountriesByBatchcodeProviderTest(unittest.TestCase):
def test_filterByBatchcodes(self):
# Given
countryCountsByBatchcode = TestHelper.createDataFrame(
columns = ['COUNTRY_COUNT_BY_VAX_LOT Clicked', 'COUNTRY_COUNT_BY_VAX_LOT Before Deletion'],
data = [ [10, 20],
[90, 95],
[15, 30],
[70, 80]],
index = pd.MultiIndex.from_tuples(
names = ['VAX_LOT', 'COUNTRY'],
tuples = [('!D0181', 'Germany'),
('# 009C01A', 'United States'),
('!D0181', 'Hungary'),
('# 009C01A', 'Germany')]))
# When
countryCountsByBatchcodeFiltered = filterByBatchcodes(countryCountsByBatchcode, ['!D0181'])
# Then
assert_frame_equal(
countryCountsByBatchcodeFiltered,
TestHelper.createDataFrame(
columns = ['COUNTRY_COUNT_BY_VAX_LOT Clicked', 'COUNTRY_COUNT_BY_VAX_LOT Before Deletion'],
data = [ [10, 20],
[15, 30]],
index = pd.MultiIndex.from_tuples(
names = ['VAX_LOT', 'COUNTRY'],
tuples = [('!D0181', 'Germany'),
('!D0181', 'Hungary')])))

View File

@@ -24,7 +24,7 @@
"from HistogramFactoryAndPersister import createAndSaveGlobalHistograms\n",
"from BatchCodeTableFactory import BatchCodeTableFactory\n",
"from Column2DataframeAdder import addColumn2Dataframe\n",
"from CountriesByBatchcodeProvider import getCountryCountsByBatchcodeTable\n",
"from CountriesByBatchcodeProvider import getCountryCountsByBatchcodeTable, filterByBatchcodes\n",
"from CountryCountsByBatchcodeTable2BarChartDescriptionTableConverter import CountryCountsByBatchcodeTable2BarChartDescriptionTableConverter\n"
]
},
@@ -178,22 +178,12 @@
{
"cell_type": "code",
"execution_count": null,
"id": "9588a10c",
"metadata": {},
"outputs": [],
"source": [
"countryCountsByBatchcode = getCountryCountsByBatchcodeTable()\n",
"countryCountsByBatchcode"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "70fcc420",
"metadata": {},
"outputs": [],
"source": [
"# FK-TODO: nur die Batchcodes von countryCountsByBatchcode beibehalten, die auch in batchCodeTable vorkommen"
"countryCountsByBatchcode = filterByBatchcodes(getCountryCountsByBatchcodeTable(), batchCodeTable['Batch'].values)\n",
"countryCountsByBatchcode"
]
},
{

File diff suppressed because it is too large Load Diff