adding BatchCodeTableIntoHistogramDescriptionTableMerger
This commit is contained in:
@@ -17,6 +17,7 @@ dependencies:
|
|||||||
- pillow
|
- pillow
|
||||||
- openpyxl
|
- openpyxl
|
||||||
- tqdm
|
- tqdm
|
||||||
|
- simplejson
|
||||||
# - python-decouple
|
# - python-decouple
|
||||||
# - selenium
|
# - selenium
|
||||||
# - webdriver-manager
|
# - webdriver-manager
|
||||||
|
|||||||
30
src/BatchCodeTableIntoHistogramDescriptionTableMerger.py
Normal file
30
src/BatchCodeTableIntoHistogramDescriptionTableMerger.py
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
class BatchCodeTableIntoHistogramDescriptionTableMerger:
|
||||||
|
|
||||||
|
# FK-TODO: refactor
|
||||||
|
@staticmethod
|
||||||
|
def mergeBatchCodeTableIntoHistogramDescriptionTable(batchCodeTable, histogramDescriptionTable):
|
||||||
|
def merge(src):
|
||||||
|
dst = src['HISTOGRAM_DESCRIPTION']
|
||||||
|
# dict_3 = {**dict_1, **dict_2}
|
||||||
|
dst['Adverse Reaction Reports'] = src['Adverse Reaction Reports']
|
||||||
|
dst['Deaths'] = src['Deaths']
|
||||||
|
dst['Disabilities'] = src['Disabilities']
|
||||||
|
dst['Life Threatening Illnesses'] = src['Life Threatening Illnesses']
|
||||||
|
dst['Company'] = src['Company']
|
||||||
|
dst['Severe reports'] = src['Severe reports']
|
||||||
|
dst['Lethality'] = src['Lethality']
|
||||||
|
return dst
|
||||||
|
mergedTable = pd.merge(
|
||||||
|
histogramDescriptionTable,
|
||||||
|
batchCodeTable,
|
||||||
|
how = 'left',
|
||||||
|
left_index = True,
|
||||||
|
right_index = True,
|
||||||
|
validate = 'one_to_one')
|
||||||
|
mergedTable = mergedTable[['HISTOGRAM_DESCRIPTION', 'Adverse Reaction Reports', 'Deaths', 'Disabilities', 'Life Threatening Illnesses', 'Company', 'Severe reports', 'Lethality']].apply(merge, axis='columns')
|
||||||
|
mergedTable.name = 'HISTOGRAM_DESCRIPTION'
|
||||||
|
mergedTable = mergedTable.to_frame()
|
||||||
|
mergedTable['COUNTRY'] = histogramDescriptionTable['COUNTRY']
|
||||||
|
return mergedTable
|
||||||
76
src/BatchCodeTableIntoHistogramDescriptionTableMergerTest.py
Normal file
76
src/BatchCodeTableIntoHistogramDescriptionTableMergerTest.py
Normal file
@@ -0,0 +1,76 @@
|
|||||||
|
import unittest
|
||||||
|
from pandas.testing import assert_frame_equal
|
||||||
|
from BatchCodeTableIntoHistogramDescriptionTableMerger import BatchCodeTableIntoHistogramDescriptionTableMerger
|
||||||
|
from TestHelper import TestHelper
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
class BatchCodeTableIntoHistogramDescriptionTableMergerTest(unittest.TestCase):
|
||||||
|
|
||||||
|
def test_mergeBatchCodeTableIntoHistogramDescriptionTable(self):
|
||||||
|
# Given
|
||||||
|
histogramDescriptionTable = TestHelper.createDataFrame(
|
||||||
|
columns = ['HISTOGRAM_DESCRIPTION', 'COUNTRY'],
|
||||||
|
data = [ [
|
||||||
|
{
|
||||||
|
'batchcode': '1808982',
|
||||||
|
'histograms': [
|
||||||
|
{
|
||||||
|
'batchcodes': ['1808982', 'EW0175', 'FD1921'],
|
||||||
|
'histogram': {'Blood pressure orthostatic abnormal': 5, 'Chest discomfort': 1}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'batchcodes': ['015M20A', '1808982'],
|
||||||
|
'histogram': {'Chest discomfort': 2}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
'Global'
|
||||||
|
]
|
||||||
|
],
|
||||||
|
index = pd.Index(
|
||||||
|
name = 'VAX_LOT',
|
||||||
|
data = ['1808982']))
|
||||||
|
|
||||||
|
batchCodeTable = TestHelper.createDataFrame(
|
||||||
|
columns = ['Adverse Reaction Reports', 'Deaths', 'Disabilities', 'Life Threatening Illnesses', 'Company', 'Severe reports', 'Lethality'],
|
||||||
|
data = [ [2, 1, 2, 2, 'MODERNA', 2/2 * 100, np.nan]],
|
||||||
|
index = pd.Index(
|
||||||
|
name = 'VAX_LOT',
|
||||||
|
data = ['1808982']))
|
||||||
|
|
||||||
|
# When
|
||||||
|
mergedTable = BatchCodeTableIntoHistogramDescriptionTableMerger.mergeBatchCodeTableIntoHistogramDescriptionTable(batchCodeTable = batchCodeTable, histogramDescriptionTable = histogramDescriptionTable)
|
||||||
|
|
||||||
|
# Then
|
||||||
|
assert_frame_equal(
|
||||||
|
mergedTable,
|
||||||
|
TestHelper.createDataFrame(
|
||||||
|
columns = ['HISTOGRAM_DESCRIPTION', 'COUNTRY'],
|
||||||
|
data = [ [
|
||||||
|
{
|
||||||
|
'batchcode': '1808982',
|
||||||
|
'Adverse Reaction Reports': 2,
|
||||||
|
'Deaths': 1,
|
||||||
|
'Disabilities': 2,
|
||||||
|
'Life Threatening Illnesses': 2,
|
||||||
|
'Company': 'MODERNA',
|
||||||
|
'Severe reports': 2/2 * 100,
|
||||||
|
'Lethality': np.nan,
|
||||||
|
'histograms': [
|
||||||
|
{
|
||||||
|
'batchcodes': ['1808982', 'EW0175', 'FD1921'],
|
||||||
|
'histogram': {'Blood pressure orthostatic abnormal': 5, 'Chest discomfort': 1}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'batchcodes': ['015M20A', '1808982'],
|
||||||
|
'histogram': {'Chest discomfort': 2}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
'Global'
|
||||||
|
]
|
||||||
|
],
|
||||||
|
index = pd.Index(
|
||||||
|
name = 'VAX_LOT',
|
||||||
|
data = ['1808982'])))
|
||||||
@@ -2,13 +2,23 @@ from HistogramFactory import createHistograms
|
|||||||
from HistogramPersister import saveHistograms
|
from HistogramPersister import saveHistograms
|
||||||
from MultiIndexExploder import MultiIndexExploder
|
from MultiIndexExploder import MultiIndexExploder
|
||||||
from HistogramDescriptionTableFactory import HistogramDescriptionTableFactory
|
from HistogramDescriptionTableFactory import HistogramDescriptionTableFactory
|
||||||
|
from BatchCodeTableIntoHistogramDescriptionTableMerger import BatchCodeTableIntoHistogramDescriptionTableMerger
|
||||||
|
|
||||||
|
|
||||||
def createAndSaveGlobalHistograms(symptomByBatchcodeTable):
|
def createAndSaveGlobalHistograms(symptomByBatchcodeTable, batchCodeTable):
|
||||||
symptomByBatchcodeTable = symptomByBatchcodeTable.assign(COUNTRY = 'Global')
|
symptomByBatchcodeTable = symptomByBatchcodeTable.assign(COUNTRY = 'Global')
|
||||||
dictByBatchcodeTable = createHistograms(symptomByBatchcodeTable)
|
dictByBatchcodeTable = createHistograms(symptomByBatchcodeTable)
|
||||||
explodedTable = MultiIndexExploder.explodeMultiIndexOfTable(dictByBatchcodeTable)
|
explodedTable = MultiIndexExploder.explodeMultiIndexOfTable(dictByBatchcodeTable)
|
||||||
histogramDescriptionTable = HistogramDescriptionTableFactory.createHistogramDescriptionTable(explodedTable)
|
histogramDescriptionTable = HistogramDescriptionTableFactory.createHistogramDescriptionTable(explodedTable)
|
||||||
|
histogramDescriptionTable = BatchCodeTableIntoHistogramDescriptionTableMerger.mergeBatchCodeTableIntoHistogramDescriptionTable(
|
||||||
|
batchCodeTable = _rearrange(batchCodeTable),
|
||||||
|
histogramDescriptionTable = histogramDescriptionTable)
|
||||||
for country, histogramDescriptionTableForCountry in histogramDescriptionTable.groupby('COUNTRY'):
|
for country, histogramDescriptionTableForCountry in histogramDescriptionTable.groupby('COUNTRY'):
|
||||||
print(f'saving histograms for {country}')
|
print(f'saving histograms for {country}')
|
||||||
saveHistograms(histogramDescriptionTableForCountry, country)
|
saveHistograms(histogramDescriptionTableForCountry, country)
|
||||||
|
|
||||||
|
|
||||||
|
def _rearrange(batchCodeTable):
|
||||||
|
batchCodeTable = batchCodeTable.set_index('Batch')
|
||||||
|
batchCodeTable.index.rename('VAX_LOT', inplace = True)
|
||||||
|
return batchCodeTable
|
||||||
@@ -112,16 +112,6 @@
|
|||||||
"symptomByBatchcodeTable"
|
"symptomByBatchcodeTable"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "23731536",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"createAndSaveGlobalHistograms(symptomByBatchcodeTable)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
@@ -149,13 +139,23 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"id": "89d57e0f",
|
"id": "189a639e",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"updateBatchCodeTableHtmlFile(batchCodeTable, batchCodeTableHtmlFile=\"../docs/batchCodeTable.html\")"
|
"updateBatchCodeTableHtmlFile(batchCodeTable, batchCodeTableHtmlFile=\"../docs/batchCodeTable.html\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "ec2a692e",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"createAndSaveGlobalHistograms(symptomByBatchcodeTable, batchCodeTable)"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
import os
|
import os
|
||||||
import json
|
import simplejson as json
|
||||||
|
|
||||||
class IOUtils:
|
class IOUtils:
|
||||||
|
|
||||||
@@ -31,7 +31,7 @@ class IOUtils:
|
|||||||
def saveDictAsJson(dict, file):
|
def saveDictAsJson(dict, file):
|
||||||
IOUtils.ensurePath(file)
|
IOUtils.ensurePath(file)
|
||||||
with open(file, 'w') as outfile:
|
with open(file, 'w') as outfile:
|
||||||
json.dump(dict, outfile)
|
json.dump(dict, outfile, ignore_nan=True)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def ensurePath(file):
|
def ensurePath(file):
|
||||||
|
|||||||
Reference in New Issue
Block a user