generating histograms for countries more efficient

This commit is contained in:
frankknoll
2023-02-16 14:59:43 +01:00
parent ef7c36d567
commit 0ffc5881b6
8 changed files with 139 additions and 136 deletions

View File

@@ -4,15 +4,34 @@ class HistogramDescriptionTableFactory:
@staticmethod
def createHistogramDescriptionTable(dictByBatchcodeTable):
histogramDescriptionTable = (
dictByBatchcodeTable
.groupby('VAX_LOT_EXPLODED')
.agg(HistogramDescriptionTableFactory._getHistograms)
.drop('nan'))
histogramDescriptionTable = HistogramDescriptionTableFactory._createHistogramDescriptionTable(dictByBatchcodeTable)
histogramDescriptionTable = histogramDescriptionTable.rename(columns = { "SYMPTOM_COUNT_BY_VAX_LOT": "HISTOGRAM_DESCRIPTION" })
histogramDescriptionTable.index.rename('VAX_LOT', inplace = True)
return histogramDescriptionTable
@staticmethod
def _createHistogramDescriptionTable(dictByBatchcodeTable):
if 'COUNTRY' in dictByBatchcodeTable.columns:
return HistogramDescriptionTableFactory._createHistogramDescriptionTableForCountries(dictByBatchcodeTable)
else:
return HistogramDescriptionTableFactory._createGlobalHistogramDescriptionTable(dictByBatchcodeTable)
@staticmethod
def _createHistogramDescriptionTableForCountries(dictByBatchcodeTable):
return (dictByBatchcodeTable
.groupby(['VAX_LOT_EXPLODED', 'COUNTRY'])
.agg(HistogramDescriptionTableFactory._getHistograms)
.reset_index(level = 'COUNTRY')
.drop('nan'))
@staticmethod
def _createGlobalHistogramDescriptionTable(dictByBatchcodeTable):
return (dictByBatchcodeTable
.groupby('VAX_LOT_EXPLODED')
.agg(HistogramDescriptionTableFactory._getHistograms)
.drop('nan'))
@staticmethod
def _getHistograms(dictByBatchcodeTable):
dictByBatchcodeTable = dictByBatchcodeTable.to_frame()