diff --git a/src/BarChartDescriptionTables.py b/src/BarChartDescriptionTables.py new file mode 100644 index 00000000000..f9b2d3a1682 --- /dev/null +++ b/src/BarChartDescriptionTables.py @@ -0,0 +1,12 @@ +import math + +class BarChartDescriptionTables: + + @staticmethod + def filterValidJensenShannonDistances(barChartDescriptionTable): + return barChartDescriptionTable[barChartDescriptionTable.apply(BarChartDescriptionTables._isValidJensenShannonDistance, axis = 'columns')] + + @staticmethod + def _isValidJensenShannonDistance(barChartDescription): + jensenShannonDistance = barChartDescription['BAR_CHART_DESCRIPTION']['Jensen-Shannon distance'] + return not math.isnan(jensenShannonDistance) diff --git a/src/BarChartDescriptionTablesTest.py b/src/BarChartDescriptionTablesTest.py new file mode 100644 index 00000000000..526c6b4080f --- /dev/null +++ b/src/BarChartDescriptionTablesTest.py @@ -0,0 +1,62 @@ +import unittest +import pandas as pd +import numpy as np +from pandas.testing import assert_frame_equal +from TestHelper import TestHelper +from BarChartDescriptionTables import BarChartDescriptionTables + +class BarChartDescriptionTablesTest(unittest.TestCase): + + def test_filterValidJensenShannonDistances(self): + # Given + barChartDescriptionTable = TestHelper.createDataFrame( + columns = ['BAR_CHART_DESCRIPTION'], + data = [ + [ + { + 'countries': ['Germany', 'Hungary'], + 'Adverse Reaction Reports guessed': [0, 0], + 'Adverse Reaction Reports known': [20, 30], + 'Jensen-Shannon distance': np.nan + } + ], + [ + { + 'countries': ['Germany'], + 'Adverse Reaction Reports guessed': [70], + 'Adverse Reaction Reports known': [80], + 'Jensen-Shannon distance': 0.4711 + } + ] + ], + index = pd.Index( + [ + '!D0181', + '# 009C01A' + ], + name = 'VAX_LOT')) + + # When + barChartDescriptionTableResult = BarChartDescriptionTables.filterValidJensenShannonDistances(barChartDescriptionTable) + + # Then + assert_frame_equal( + barChartDescriptionTableResult, + TestHelper.createDataFrame( + columns = ['BAR_CHART_DESCRIPTION'], + data = [ + [ + { + 'countries': ['Germany'], + 'Adverse Reaction Reports guessed': [70], + 'Adverse Reaction Reports known': [80], + 'Jensen-Shannon distance': 0.4711 + } + ] + ], + index = pd.Index( + [ + '# 009C01A', + ], + name = 'VAX_LOT')), + check_dtype = True) diff --git a/src/HowBadIsMyBatch.ipynb b/src/HowBadIsMyBatch.ipynb index 8044040e129..6fc48dc8087 100644 --- a/src/HowBadIsMyBatch.ipynb +++ b/src/HowBadIsMyBatch.ipynb @@ -25,6 +25,7 @@ "from IOUtils import IOUtils\n", "import os\n", "import pandas as pd\n", + "from BarChartDescriptionTables import BarChartDescriptionTables\n", "\n", "pd.set_option('display.max_rows', 100)\n", "pd.set_option('display.max_columns', None)\n" @@ -220,8 +221,41 @@ "metadata": {}, "outputs": [], "source": [ - "barChartDescriptionTableWithJensenShannonDistance = JensenShannonDistance2BarChartDescriptionColumnAdder.addJensenShannonDistance2BarChartDescriptionColumn(barChartDescriptionTable)\n", - "barChartDescriptionTableWithJensenShannonDistance" + "barChartDescriptionTable = JensenShannonDistance2BarChartDescriptionColumnAdder.addJensenShannonDistance2BarChartDescriptionColumn(barChartDescriptionTable)\n", + "barChartDescriptionTable" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39c83928", + "metadata": {}, + "outputs": [], + "source": [ + "barChartDescriptionTable = BarChartDescriptionTables.filterValidJensenShannonDistances(barChartDescriptionTable)\n", + "barChartDescriptionTable" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "123be407", + "metadata": {}, + "outputs": [], + "source": [ + "batchcodes2Retain = barChartDescriptionTable.index.values\n", + "batchCodeTable = batchCodeTable[batchCodeTable['Batch'].isin(batchcodes2Retain)]\n", + "batchCodeTable" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3bd5d78d", + "metadata": {}, + "outputs": [], + "source": [ + "IOUtils.saveDataFrameAsJson(batchCodeTable, '../docs/data/batchCodeTables/Global.json')" ] }, { @@ -232,7 +266,7 @@ "outputs": [], "source": [ "IOUtils.saveDictAsJson(\n", - " BarChartDescriptionTable2DictionaryConverter.convert2Dictionary(barChartDescriptionTableWithJensenShannonDistance, internationalVaersCovid19),\n", + " BarChartDescriptionTable2DictionaryConverter.convert2Dictionary(barChartDescriptionTable, internationalVaersCovid19),\n", " '../docs/data/barChartDescriptionTable.json')\n" ] }, diff --git a/src/help.txt b/src/help.txt index d06173d2974..f568f7401c5 100644 --- a/src/help.txt +++ b/src/help.txt @@ -5,6 +5,9 @@ FK-FIXME: FK-TODO: - add google captcha to batchCodeTable.html - Symptomhistogramm +- Filter für ein Barchart: + - im Barchart gibt es mindestens N Guessed-Einträge + - im Barchart gibt es mindestens ein Country mit Guessed > Known anacron job: sudo cp src/intensivstationen_howbadismybatch.sh /etc/cron.daily/intensivstationen_howbadismybatch