adding BatchCodeTableIntoHistogramDescriptionTableMerger

This commit is contained in:
frankknoll
2023-04-12 23:46:33 +02:00
parent ecf9bb996f
commit a715dc6cab
6 changed files with 132 additions and 15 deletions

View File

@@ -17,6 +17,7 @@ dependencies:
- pillow
- openpyxl
- tqdm
- simplejson
# - python-decouple
# - selenium
# - webdriver-manager

View File

@@ -0,0 +1,30 @@
import pandas as pd
class BatchCodeTableIntoHistogramDescriptionTableMerger:
# FK-TODO: refactor
@staticmethod
def mergeBatchCodeTableIntoHistogramDescriptionTable(batchCodeTable, histogramDescriptionTable):
def merge(src):
dst = src['HISTOGRAM_DESCRIPTION']
# dict_3 = {**dict_1, **dict_2}
dst['Adverse Reaction Reports'] = src['Adverse Reaction Reports']
dst['Deaths'] = src['Deaths']
dst['Disabilities'] = src['Disabilities']
dst['Life Threatening Illnesses'] = src['Life Threatening Illnesses']
dst['Company'] = src['Company']
dst['Severe reports'] = src['Severe reports']
dst['Lethality'] = src['Lethality']
return dst
mergedTable = pd.merge(
histogramDescriptionTable,
batchCodeTable,
how = 'left',
left_index = True,
right_index = True,
validate = 'one_to_one')
mergedTable = mergedTable[['HISTOGRAM_DESCRIPTION', 'Adverse Reaction Reports', 'Deaths', 'Disabilities', 'Life Threatening Illnesses', 'Company', 'Severe reports', 'Lethality']].apply(merge, axis='columns')
mergedTable.name = 'HISTOGRAM_DESCRIPTION'
mergedTable = mergedTable.to_frame()
mergedTable['COUNTRY'] = histogramDescriptionTable['COUNTRY']
return mergedTable

View File

@@ -0,0 +1,76 @@
import unittest
from pandas.testing import assert_frame_equal
from BatchCodeTableIntoHistogramDescriptionTableMerger import BatchCodeTableIntoHistogramDescriptionTableMerger
from TestHelper import TestHelper
import pandas as pd
import numpy as np
class BatchCodeTableIntoHistogramDescriptionTableMergerTest(unittest.TestCase):
def test_mergeBatchCodeTableIntoHistogramDescriptionTable(self):
# Given
histogramDescriptionTable = TestHelper.createDataFrame(
columns = ['HISTOGRAM_DESCRIPTION', 'COUNTRY'],
data = [ [
{
'batchcode': '1808982',
'histograms': [
{
'batchcodes': ['1808982', 'EW0175', 'FD1921'],
'histogram': {'Blood pressure orthostatic abnormal': 5, 'Chest discomfort': 1}
},
{
'batchcodes': ['015M20A', '1808982'],
'histogram': {'Chest discomfort': 2}
}
]
},
'Global'
]
],
index = pd.Index(
name = 'VAX_LOT',
data = ['1808982']))
batchCodeTable = TestHelper.createDataFrame(
columns = ['Adverse Reaction Reports', 'Deaths', 'Disabilities', 'Life Threatening Illnesses', 'Company', 'Severe reports', 'Lethality'],
data = [ [2, 1, 2, 2, 'MODERNA', 2/2 * 100, np.nan]],
index = pd.Index(
name = 'VAX_LOT',
data = ['1808982']))
# When
mergedTable = BatchCodeTableIntoHistogramDescriptionTableMerger.mergeBatchCodeTableIntoHistogramDescriptionTable(batchCodeTable = batchCodeTable, histogramDescriptionTable = histogramDescriptionTable)
# Then
assert_frame_equal(
mergedTable,
TestHelper.createDataFrame(
columns = ['HISTOGRAM_DESCRIPTION', 'COUNTRY'],
data = [ [
{
'batchcode': '1808982',
'Adverse Reaction Reports': 2,
'Deaths': 1,
'Disabilities': 2,
'Life Threatening Illnesses': 2,
'Company': 'MODERNA',
'Severe reports': 2/2 * 100,
'Lethality': np.nan,
'histograms': [
{
'batchcodes': ['1808982', 'EW0175', 'FD1921'],
'histogram': {'Blood pressure orthostatic abnormal': 5, 'Chest discomfort': 1}
},
{
'batchcodes': ['015M20A', '1808982'],
'histogram': {'Chest discomfort': 2}
}
]
},
'Global'
]
],
index = pd.Index(
name = 'VAX_LOT',
data = ['1808982'])))

View File

@@ -2,13 +2,23 @@ from HistogramFactory import createHistograms
from HistogramPersister import saveHistograms
from MultiIndexExploder import MultiIndexExploder
from HistogramDescriptionTableFactory import HistogramDescriptionTableFactory
from BatchCodeTableIntoHistogramDescriptionTableMerger import BatchCodeTableIntoHistogramDescriptionTableMerger
def createAndSaveGlobalHistograms(symptomByBatchcodeTable):
def createAndSaveGlobalHistograms(symptomByBatchcodeTable, batchCodeTable):
symptomByBatchcodeTable = symptomByBatchcodeTable.assign(COUNTRY = 'Global')
dictByBatchcodeTable = createHistograms(symptomByBatchcodeTable)
explodedTable = MultiIndexExploder.explodeMultiIndexOfTable(dictByBatchcodeTable)
histogramDescriptionTable = HistogramDescriptionTableFactory.createHistogramDescriptionTable(explodedTable)
histogramDescriptionTable = BatchCodeTableIntoHistogramDescriptionTableMerger.mergeBatchCodeTableIntoHistogramDescriptionTable(
batchCodeTable = _rearrange(batchCodeTable),
histogramDescriptionTable = histogramDescriptionTable)
for country, histogramDescriptionTableForCountry in histogramDescriptionTable.groupby('COUNTRY'):
print(f'saving histograms for {country}')
saveHistograms(histogramDescriptionTableForCountry, country)
def _rearrange(batchCodeTable):
batchCodeTable = batchCodeTable.set_index('Batch')
batchCodeTable.index.rename('VAX_LOT', inplace = True)
return batchCodeTable

View File

@@ -112,16 +112,6 @@
"symptomByBatchcodeTable"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "23731536",
"metadata": {},
"outputs": [],
"source": [
"createAndSaveGlobalHistograms(symptomByBatchcodeTable)"
]
},
{
"cell_type": "code",
"execution_count": null,
@@ -149,13 +139,23 @@
{
"cell_type": "code",
"execution_count": null,
"id": "89d57e0f",
"id": "189a639e",
"metadata": {},
"outputs": [],
"source": [
"updateBatchCodeTableHtmlFile(batchCodeTable, batchCodeTableHtmlFile=\"../docs/batchCodeTable.html\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ec2a692e",
"metadata": {},
"outputs": [],
"source": [
"createAndSaveGlobalHistograms(symptomByBatchcodeTable, batchCodeTable)"
]
},
{
"attachments": {},
"cell_type": "markdown",

View File

@@ -1,5 +1,5 @@
import os
import json
import simplejson as json
class IOUtils:
@@ -31,7 +31,7 @@ class IOUtils:
def saveDictAsJson(dict, file):
IOUtils.ensurePath(file)
with open(file, 'w') as outfile:
json.dump(dict, outfile)
json.dump(dict, outfile, ignore_nan=True)
@staticmethod
def ensurePath(file):