refining HistogramDescriptionTableFactoryTest

This commit is contained in:
frankknoll
2023-02-15 10:11:41 +01:00
parent 11df7a0a4d
commit 3b24c32fa0
4 changed files with 131 additions and 78 deletions

View File

@@ -1,25 +0,0 @@
class DictByBatchcodeTable2DictConverter:
@staticmethod
def convertDictByBatchcodeTable2Dict(dictByBatchcodeTable, batchcode):
return {
"batchcode": batchcode,
"histograms": DictByBatchcodeTable2DictConverter._getHistograms(dictByBatchcodeTable)
}
@staticmethod
def _getHistograms(dictByBatchcodeTable):
dictByBatchcodeTable = dictByBatchcodeTable.rename(columns = { "SYMPTOM_COUNT_BY_VAX_LOT": "histogram" })
DictByBatchcodeTable2DictConverter._addBatchcodesColumn(dictByBatchcodeTable)
return dictByBatchcodeTable.to_dict('records')
@staticmethod
def _addBatchcodesColumn(dictByBatchcodeTable):
batchcodeColumns = dictByBatchcodeTable.index.names
dictByBatchcodeTable['batchcodes'] = dictByBatchcodeTable.reset_index()[batchcodeColumns].values.tolist()
dictByBatchcodeTable['batchcodes'] = dictByBatchcodeTable['batchcodes'].map(DictByBatchcodeTable2DictConverter._getNaNBatchcodes)
@staticmethod
def _getNaNBatchcodes(batchcodes):
# FK-TODO: handle 'nan' everywhere correctly
return [batchcode for batchcode in batchcodes if batchcode != 'nan']

View File

@@ -1,53 +0,0 @@
import unittest
import json
from DictByBatchcodeTable2DictConverter import DictByBatchcodeTable2DictConverter
from TestHelper import TestHelper
import pandas as pd
class DictByBatchcodeTable2DictConverterTest(unittest.TestCase):
def test_convertDictByBatchcodeTable2Json(self):
# Given
dictByBatchcodeTable = TestHelper.createDataFrame(
columns = ['SYMPTOM_COUNT_BY_VAX_LOT'],
data = [ [
{
"Blood pressure orthostatic abnormal": 5,
"Chest discomfort": 1
}
],
[
{
"Chest discomfort": 2
}
]
],
index = pd.MultiIndex.from_tuples(
names = ['VAX_LOT1', 'VAX_LOT2', 'VAX_LOT3'],
tuples = [['1808982', 'EW0175', 'FD1921'],
['015M20A', '1808982', 'nan']]))
# When
dict = DictByBatchcodeTable2DictConverter.convertDictByBatchcodeTable2Dict(dictByBatchcodeTable, '1808982')
# Then
self.assertEqual(
dict,
{
"batchcode": "1808982",
"histograms": [
{
"batchcodes": ["1808982", "EW0175", "FD1921"],
"histogram": {
"Blood pressure orthostatic abnormal": 5,
"Chest discomfort": 1
}
},
{
"batchcodes": ["015M20A", "1808982"],
"histogram": {
"Chest discomfort": 2
}
}
]
})

View File

@@ -0,0 +1,36 @@
import pandas as pd
class HistogramDescriptionTableFactory:
@staticmethod
def createHistogramDescriptionTable(dictByBatchcodeTable):
histogramDescriptionTable = (
dictByBatchcodeTable
.groupby('VAX_LOT_EXPLODED')
.agg(HistogramDescriptionTableFactory._getHistograms)
.drop('nan'))
histogramDescriptionTable = histogramDescriptionTable.rename(columns = { "SYMPTOM_COUNT_BY_VAX_LOT": "HISTOGRAM_DESCRIPTION" })
histogramDescriptionTable.index.rename('VAX_LOT', inplace = True)
return histogramDescriptionTable
@staticmethod
def _getHistograms(dictByBatchcodeTable):
dictByBatchcodeTable = dictByBatchcodeTable.to_frame()
dictByBatchcodeTable = dictByBatchcodeTable.rename(columns = { "SYMPTOM_COUNT_BY_VAX_LOT": "histogram" })
HistogramDescriptionTableFactory._addBatchcodesColumn(dictByBatchcodeTable)
histograms = dictByBatchcodeTable.to_dict('records')
return {
"batchcode": dictByBatchcodeTable.index.get_level_values('VAX_LOT_EXPLODED')[0],
"histograms": histograms
}
@staticmethod
def _addBatchcodesColumn(dictByBatchcodeTable):
batchcodeColumns = dictByBatchcodeTable.index.names.difference(['VAX_LOT_EXPLODED'])
dictByBatchcodeTable['batchcodes'] = dictByBatchcodeTable.reset_index()[batchcodeColumns].values.tolist()
dictByBatchcodeTable['batchcodes'] = dictByBatchcodeTable['batchcodes'].map(HistogramDescriptionTableFactory._getNaNBatchcodes)
@staticmethod
def _getNaNBatchcodes(batchcodes):
# FK-TODO: handle 'nan' everywhere correctly
return [batchcode for batchcode in batchcodes if batchcode != 'nan']

View File

@@ -0,0 +1,95 @@
import unittest
from pandas.testing import assert_frame_equal
from HistogramDescriptionTableFactory import HistogramDescriptionTableFactory
from TestHelper import TestHelper
import pandas as pd
class HistogramDescriptionTableFactoryTest(unittest.TestCase):
def test_createHistogramDescriptionTable(self):
# Given
dictByBatchcodeTable = TestHelper.createDataFrame(
columns = ['SYMPTOM_COUNT_BY_VAX_LOT'],
data = [ [{"Blood pressure orthostatic abnormal": 5, "Chest discomfort": 1}],
[{"Blood pressure orthostatic abnormal": 5, "Chest discomfort": 1}],
[{"Blood pressure orthostatic abnormal": 5, "Chest discomfort": 1}],
[{"Chest discomfort": 2}],
[{"Chest discomfort": 2}],
[{"Chest discomfort": 2}]
],
index = pd.MultiIndex.from_tuples(
names = ['VAX_LOT_EXPLODED', 'VAX_LOT1', 'VAX_LOT2', 'VAX_LOT3'],
tuples = [['1808982', '1808982', 'EW0175', 'FD1921'],
['EW0175', '1808982', 'EW0175', 'FD1921'],
['FD1921', '1808982', 'EW0175', 'FD1921'],
['015M20A', '015M20A', '1808982', 'nan'],
['1808982', '015M20A', '1808982', 'nan'],
['nan', '015M20A', '1808982', 'nan']]))
# When
histogramDescriptionTable = HistogramDescriptionTableFactory.createHistogramDescriptionTable(dictByBatchcodeTable)
# Then
assert_frame_equal(
histogramDescriptionTable,
TestHelper.createDataFrame(
columns = ['HISTOGRAM_DESCRIPTION'],
data = [ [
{
"batchcode": "1808982",
"histograms": [
{
"batchcodes": ["1808982", "EW0175", "FD1921"],
"histogram": {"Blood pressure orthostatic abnormal": 5, "Chest discomfort": 1}
},
{
"batchcodes": ["015M20A", "1808982"],
"histogram": {"Chest discomfort": 2}
}
]
}
],
[
{
"batchcode": "EW0175",
"histograms": [
{
"batchcodes": ["1808982", "EW0175", "FD1921"],
"histogram": {"Blood pressure orthostatic abnormal": 5, "Chest discomfort": 1}
}
]
}
],
[
{
"batchcode": "FD1921",
"histograms": [
{
"batchcodes": ["1808982", "EW0175", "FD1921"],
"histogram": {"Blood pressure orthostatic abnormal": 5, "Chest discomfort": 1}
}
]
}
],
[
{
"batchcode": "015M20A",
"histograms": [
{
"batchcodes": ["015M20A", "1808982"],
"histogram": {"Chest discomfort": 2}
}
]
}
]
],
index = pd.Index(
name = 'VAX_LOT',
data = [
'1808982',
'EW0175',
'FD1921',
'015M20A'])),
check_like = True)