filterByBatchcodes()

This commit is contained in:
frankknoll
2023-06-10 17:50:10 +02:00
parent 79a11161e6
commit fe87328904
5 changed files with 46 additions and 53019 deletions

File diff suppressed because one or more lines are too long

View File

@@ -49,3 +49,7 @@ def getCountriesByBatchcodeBeforeDeletion():
internationalVaersCovid19 = getInternationalVaersCovid19(dataDir = 'VAERS/VAERSBeforeDeletion', years = [2020, 2021, 2022]) internationalVaersCovid19 = getInternationalVaersCovid19(dataDir = 'VAERS/VAERSBeforeDeletion', years = [2020, 2021, 2022])
batchCodeTable = BatchCodeTableFactory(internationalVaersCovid19).createGlobalBatchCodeTable(countriesAsList = True) batchCodeTable = BatchCodeTableFactory(internationalVaersCovid19).createGlobalBatchCodeTable(countriesAsList = True)
return batchCodeTable[['Countries']] return batchCodeTable[['Countries']]
def filterByBatchcodes(countryCountsByBatchcode, batchcodes2Retain):
return countryCountsByBatchcode.loc[(batchcodes2Retain, slice(None)), :]

View File

@@ -0,0 +1,38 @@
import unittest
from TestHelper import TestHelper
from pandas.testing import assert_frame_equal
import pandas as pd
from CountriesByBatchcodeProvider import filterByBatchcodes
class CountriesByBatchcodeProviderTest(unittest.TestCase):
def test_filterByBatchcodes(self):
# Given
countryCountsByBatchcode = TestHelper.createDataFrame(
columns = ['COUNTRY_COUNT_BY_VAX_LOT Clicked', 'COUNTRY_COUNT_BY_VAX_LOT Before Deletion'],
data = [ [10, 20],
[90, 95],
[15, 30],
[70, 80]],
index = pd.MultiIndex.from_tuples(
names = ['VAX_LOT', 'COUNTRY'],
tuples = [('!D0181', 'Germany'),
('# 009C01A', 'United States'),
('!D0181', 'Hungary'),
('# 009C01A', 'Germany')]))
# When
countryCountsByBatchcodeFiltered = filterByBatchcodes(countryCountsByBatchcode, ['!D0181'])
# Then
assert_frame_equal(
countryCountsByBatchcodeFiltered,
TestHelper.createDataFrame(
columns = ['COUNTRY_COUNT_BY_VAX_LOT Clicked', 'COUNTRY_COUNT_BY_VAX_LOT Before Deletion'],
data = [ [10, 20],
[15, 30]],
index = pd.MultiIndex.from_tuples(
names = ['VAX_LOT', 'COUNTRY'],
tuples = [('!D0181', 'Germany'),
('!D0181', 'Hungary')])))

View File

@@ -24,7 +24,7 @@
"from HistogramFactoryAndPersister import createAndSaveGlobalHistograms\n", "from HistogramFactoryAndPersister import createAndSaveGlobalHistograms\n",
"from BatchCodeTableFactory import BatchCodeTableFactory\n", "from BatchCodeTableFactory import BatchCodeTableFactory\n",
"from Column2DataframeAdder import addColumn2Dataframe\n", "from Column2DataframeAdder import addColumn2Dataframe\n",
"from CountriesByBatchcodeProvider import getCountryCountsByBatchcodeTable\n", "from CountriesByBatchcodeProvider import getCountryCountsByBatchcodeTable, filterByBatchcodes\n",
"from CountryCountsByBatchcodeTable2BarChartDescriptionTableConverter import CountryCountsByBatchcodeTable2BarChartDescriptionTableConverter\n" "from CountryCountsByBatchcodeTable2BarChartDescriptionTableConverter import CountryCountsByBatchcodeTable2BarChartDescriptionTableConverter\n"
] ]
}, },
@@ -178,22 +178,12 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "9588a10c",
"metadata": {},
"outputs": [],
"source": [
"countryCountsByBatchcode = getCountryCountsByBatchcodeTable()\n",
"countryCountsByBatchcode"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "70fcc420", "id": "70fcc420",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# FK-TODO: nur die Batchcodes von countryCountsByBatchcode beibehalten, die auch in batchCodeTable vorkommen" "countryCountsByBatchcode = filterByBatchcodes(getCountryCountsByBatchcodeTable(), batchCodeTable['Batch'].values)\n",
"countryCountsByBatchcode"
] ]
}, },
{ {

File diff suppressed because it is too large Load Diff