adding BatchCodeTableIntoHistogramDescriptionTableMerger

This commit is contained in:
frankknoll
2023-04-12 23:46:33 +02:00
parent ecf9bb996f
commit a715dc6cab
6 changed files with 132 additions and 15 deletions

View File

@@ -17,6 +17,7 @@ dependencies:
- pillow - pillow
- openpyxl - openpyxl
- tqdm - tqdm
- simplejson
# - python-decouple # - python-decouple
# - selenium # - selenium
# - webdriver-manager # - webdriver-manager

View File

@@ -0,0 +1,30 @@
import pandas as pd
class BatchCodeTableIntoHistogramDescriptionTableMerger:
# FK-TODO: refactor
@staticmethod
def mergeBatchCodeTableIntoHistogramDescriptionTable(batchCodeTable, histogramDescriptionTable):
def merge(src):
dst = src['HISTOGRAM_DESCRIPTION']
# dict_3 = {**dict_1, **dict_2}
dst['Adverse Reaction Reports'] = src['Adverse Reaction Reports']
dst['Deaths'] = src['Deaths']
dst['Disabilities'] = src['Disabilities']
dst['Life Threatening Illnesses'] = src['Life Threatening Illnesses']
dst['Company'] = src['Company']
dst['Severe reports'] = src['Severe reports']
dst['Lethality'] = src['Lethality']
return dst
mergedTable = pd.merge(
histogramDescriptionTable,
batchCodeTable,
how = 'left',
left_index = True,
right_index = True,
validate = 'one_to_one')
mergedTable = mergedTable[['HISTOGRAM_DESCRIPTION', 'Adverse Reaction Reports', 'Deaths', 'Disabilities', 'Life Threatening Illnesses', 'Company', 'Severe reports', 'Lethality']].apply(merge, axis='columns')
mergedTable.name = 'HISTOGRAM_DESCRIPTION'
mergedTable = mergedTable.to_frame()
mergedTable['COUNTRY'] = histogramDescriptionTable['COUNTRY']
return mergedTable

View File

@@ -0,0 +1,76 @@
import unittest
from pandas.testing import assert_frame_equal
from BatchCodeTableIntoHistogramDescriptionTableMerger import BatchCodeTableIntoHistogramDescriptionTableMerger
from TestHelper import TestHelper
import pandas as pd
import numpy as np
class BatchCodeTableIntoHistogramDescriptionTableMergerTest(unittest.TestCase):
def test_mergeBatchCodeTableIntoHistogramDescriptionTable(self):
# Given
histogramDescriptionTable = TestHelper.createDataFrame(
columns = ['HISTOGRAM_DESCRIPTION', 'COUNTRY'],
data = [ [
{
'batchcode': '1808982',
'histograms': [
{
'batchcodes': ['1808982', 'EW0175', 'FD1921'],
'histogram': {'Blood pressure orthostatic abnormal': 5, 'Chest discomfort': 1}
},
{
'batchcodes': ['015M20A', '1808982'],
'histogram': {'Chest discomfort': 2}
}
]
},
'Global'
]
],
index = pd.Index(
name = 'VAX_LOT',
data = ['1808982']))
batchCodeTable = TestHelper.createDataFrame(
columns = ['Adverse Reaction Reports', 'Deaths', 'Disabilities', 'Life Threatening Illnesses', 'Company', 'Severe reports', 'Lethality'],
data = [ [2, 1, 2, 2, 'MODERNA', 2/2 * 100, np.nan]],
index = pd.Index(
name = 'VAX_LOT',
data = ['1808982']))
# When
mergedTable = BatchCodeTableIntoHistogramDescriptionTableMerger.mergeBatchCodeTableIntoHistogramDescriptionTable(batchCodeTable = batchCodeTable, histogramDescriptionTable = histogramDescriptionTable)
# Then
assert_frame_equal(
mergedTable,
TestHelper.createDataFrame(
columns = ['HISTOGRAM_DESCRIPTION', 'COUNTRY'],
data = [ [
{
'batchcode': '1808982',
'Adverse Reaction Reports': 2,
'Deaths': 1,
'Disabilities': 2,
'Life Threatening Illnesses': 2,
'Company': 'MODERNA',
'Severe reports': 2/2 * 100,
'Lethality': np.nan,
'histograms': [
{
'batchcodes': ['1808982', 'EW0175', 'FD1921'],
'histogram': {'Blood pressure orthostatic abnormal': 5, 'Chest discomfort': 1}
},
{
'batchcodes': ['015M20A', '1808982'],
'histogram': {'Chest discomfort': 2}
}
]
},
'Global'
]
],
index = pd.Index(
name = 'VAX_LOT',
data = ['1808982'])))

View File

@@ -2,13 +2,23 @@ from HistogramFactory import createHistograms
from HistogramPersister import saveHistograms from HistogramPersister import saveHistograms
from MultiIndexExploder import MultiIndexExploder from MultiIndexExploder import MultiIndexExploder
from HistogramDescriptionTableFactory import HistogramDescriptionTableFactory from HistogramDescriptionTableFactory import HistogramDescriptionTableFactory
from BatchCodeTableIntoHistogramDescriptionTableMerger import BatchCodeTableIntoHistogramDescriptionTableMerger
def createAndSaveGlobalHistograms(symptomByBatchcodeTable): def createAndSaveGlobalHistograms(symptomByBatchcodeTable, batchCodeTable):
symptomByBatchcodeTable = symptomByBatchcodeTable.assign(COUNTRY = 'Global') symptomByBatchcodeTable = symptomByBatchcodeTable.assign(COUNTRY = 'Global')
dictByBatchcodeTable = createHistograms(symptomByBatchcodeTable) dictByBatchcodeTable = createHistograms(symptomByBatchcodeTable)
explodedTable = MultiIndexExploder.explodeMultiIndexOfTable(dictByBatchcodeTable) explodedTable = MultiIndexExploder.explodeMultiIndexOfTable(dictByBatchcodeTable)
histogramDescriptionTable = HistogramDescriptionTableFactory.createHistogramDescriptionTable(explodedTable) histogramDescriptionTable = HistogramDescriptionTableFactory.createHistogramDescriptionTable(explodedTable)
histogramDescriptionTable = BatchCodeTableIntoHistogramDescriptionTableMerger.mergeBatchCodeTableIntoHistogramDescriptionTable(
batchCodeTable = _rearrange(batchCodeTable),
histogramDescriptionTable = histogramDescriptionTable)
for country, histogramDescriptionTableForCountry in histogramDescriptionTable.groupby('COUNTRY'): for country, histogramDescriptionTableForCountry in histogramDescriptionTable.groupby('COUNTRY'):
print(f'saving histograms for {country}') print(f'saving histograms for {country}')
saveHistograms(histogramDescriptionTableForCountry, country) saveHistograms(histogramDescriptionTableForCountry, country)
def _rearrange(batchCodeTable):
batchCodeTable = batchCodeTable.set_index('Batch')
batchCodeTable.index.rename('VAX_LOT', inplace = True)
return batchCodeTable

View File

@@ -112,16 +112,6 @@
"symptomByBatchcodeTable" "symptomByBatchcodeTable"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"id": "23731536",
"metadata": {},
"outputs": [],
"source": [
"createAndSaveGlobalHistograms(symptomByBatchcodeTable)"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
@@ -149,13 +139,23 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "89d57e0f", "id": "189a639e",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"updateBatchCodeTableHtmlFile(batchCodeTable, batchCodeTableHtmlFile=\"../docs/batchCodeTable.html\")" "updateBatchCodeTableHtmlFile(batchCodeTable, batchCodeTableHtmlFile=\"../docs/batchCodeTable.html\")"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"id": "ec2a692e",
"metadata": {},
"outputs": [],
"source": [
"createAndSaveGlobalHistograms(symptomByBatchcodeTable, batchCodeTable)"
]
},
{ {
"attachments": {}, "attachments": {},
"cell_type": "markdown", "cell_type": "markdown",

View File

@@ -1,5 +1,5 @@
import os import os
import json import simplejson as json
class IOUtils: class IOUtils:
@@ -31,7 +31,7 @@ class IOUtils:
def saveDictAsJson(dict, file): def saveDictAsJson(dict, file):
IOUtils.ensurePath(file) IOUtils.ensurePath(file)
with open(file, 'w') as outfile: with open(file, 'w') as outfile:
json.dump(dict, outfile) json.dump(dict, outfile, ignore_nan=True)
@staticmethod @staticmethod
def ensurePath(file): def ensurePath(file):