From 0a588d51ffbdfe8c8bc58f4827f5f78696ae48ed Mon Sep 17 00:00:00 2001 From: frankknoll Date: Wed, 14 Dec 2022 12:17:31 +0100 Subject: [PATCH] refactoring --- src/intensivstationen/Intensivstationen.ipynb | 69 +------------------ .../MedianOfFreeBedsByKreisTableFactory.py | 15 ++++ ...MedianOfFreeBedsByKreisTableFactoryTest.py | 41 +++++++++++ 3 files changed, 57 insertions(+), 68 deletions(-) create mode 100644 src/intensivstationen/MedianOfFreeBedsByKreisTableFactory.py create mode 100644 src/intensivstationen/MedianOfFreeBedsByKreisTableFactoryTest.py diff --git a/src/intensivstationen/Intensivstationen.ipynb b/src/intensivstationen/Intensivstationen.ipynb index 3c4c2a932ba..d3a40b8ef37 100644 --- a/src/intensivstationen/Intensivstationen.ipynb +++ b/src/intensivstationen/Intensivstationen.ipynb @@ -32,6 +32,7 @@ "from TestHelper import TestHelper\n", "from IOUtils import IOUtils\n", "from Datawrapper import Datawrapper\n", + "from MedianOfFreeBedsByKreisTableFactory import MedianOfFreeBedsByKreisTableFactory\n", "\n", "pd.set_option('display.max_rows', 100)\n", "pd.set_option('display.max_columns', None)\n", @@ -331,74 +332,6 @@ " getAndPersistIntensiveCareBeds(timeSeries, kreis)" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "d9d4acab", - "metadata": {}, - "outputs": [], - "source": [ - "class MedianOfFreeBedsByKreisTableFactory:\n", - " \n", - " def __init__(self, dataFrame):\n", - " self.dataFrame = dataFrame\n", - "\n", - " def createMedianOfFreeBedsByKreisTable(self, kreisKey):\n", - " self.dataFrame['free_beds_divided_by_all_beds_in_percent'] = self.dataFrame['betten_frei'] / (self.dataFrame['betten_frei'] + self.dataFrame['betten_belegt']) * 100\n", - " aggregated = self.dataFrame.groupby(kreisKey).agg(\n", - " median_free_beds_in_percent =\n", - " pd.NamedAgg(\n", - " column = 'free_beds_divided_by_all_beds_in_percent',\n", - " aggfunc = 'median'))\n", - " return aggregated.sort_values(by = 'median_free_beds_in_percent', ascending = False)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a739d4d1", - "metadata": {}, - "outputs": [], - "source": [ - "from pandas.testing import assert_frame_equal\n", - "import statistics\n", - "\n", - "class MedianOfFreeBedsByKreisTableFactoryTest(unittest.TestCase):\n", - "\n", - " def test_createMedianOfFreeBedsByKreisTable(self):\n", - " # Given\n", - " dataFrame = TestHelper.createDataFrame(\n", - " columns = ['date', 'betten_frei', 'betten_belegt', 'Kreis'],\n", - " data = [ ['2020-04-24', 40, 38, 'Flensburg, Stadt'],\n", - " ['2020-04-24', 42, 36, 'Flensburg, Stadt'],\n", - " ['2020-04-24', 44, 34, 'Flensburg, Stadt'],\n", - " ['2020-04-24', 9, 10, 'Bamberg']],\n", - " index = [\n", - " 0,\n", - " 1,\n", - " 2,\n", - " 3])\n", - " medianOfFreeBedsByKreisTableFactory = MedianOfFreeBedsByKreisTableFactory(dataFrame)\n", - " \n", - " # When\n", - " medianOfFreeBedsByKreisTable = medianOfFreeBedsByKreisTableFactory.createMedianOfFreeBedsByKreisTable('Kreis')\n", - "\n", - " # Then\n", - " assert_frame_equal(\n", - " medianOfFreeBedsByKreisTable,\n", - " TestHelper.createDataFrame(\n", - " columns = ['median_free_beds_in_percent'],\n", - " data = [ [statistics.median([40/(40 + 38) * 100, 42/(42 + 36) * 100, 44/(44 + 34) * 100])],\n", - " [9/(9 + 10) * 100]],\n", - " index = pd.Index(\n", - " name = 'Kreis',\n", - " data = [\n", - " 'Flensburg, Stadt',\n", - " 'Bamberg'\n", - " ])),\n", - " check_dtype = False)" - ] - }, { "cell_type": "code", "execution_count": null, diff --git a/src/intensivstationen/MedianOfFreeBedsByKreisTableFactory.py b/src/intensivstationen/MedianOfFreeBedsByKreisTableFactory.py new file mode 100644 index 00000000000..4b034e5307d --- /dev/null +++ b/src/intensivstationen/MedianOfFreeBedsByKreisTableFactory.py @@ -0,0 +1,15 @@ +import pandas as pd + +class MedianOfFreeBedsByKreisTableFactory: + + def __init__(self, dataFrame): + self.dataFrame = dataFrame + + def createMedianOfFreeBedsByKreisTable(self, kreisKey): + self.dataFrame['free_beds_divided_by_all_beds_in_percent'] = self.dataFrame['betten_frei'] / (self.dataFrame['betten_frei'] + self.dataFrame['betten_belegt']) * 100 + aggregated = self.dataFrame.groupby(kreisKey).agg( + median_free_beds_in_percent = + pd.NamedAgg( + column = 'free_beds_divided_by_all_beds_in_percent', + aggfunc = 'median')) + return aggregated.sort_values(by = 'median_free_beds_in_percent', ascending = False) \ No newline at end of file diff --git a/src/intensivstationen/MedianOfFreeBedsByKreisTableFactoryTest.py b/src/intensivstationen/MedianOfFreeBedsByKreisTableFactoryTest.py new file mode 100644 index 00000000000..2727e391138 --- /dev/null +++ b/src/intensivstationen/MedianOfFreeBedsByKreisTableFactoryTest.py @@ -0,0 +1,41 @@ +import unittest +from TestHelper import TestHelper +from pandas.testing import assert_frame_equal +import statistics +import pandas as pd +from intensivstationen.MedianOfFreeBedsByKreisTableFactory import MedianOfFreeBedsByKreisTableFactory + +class MedianOfFreeBedsByKreisTableFactoryTest(unittest.TestCase): + + def test_createMedianOfFreeBedsByKreisTable(self): + # Given + dataFrame = TestHelper.createDataFrame( + columns = ['date', 'betten_frei', 'betten_belegt', 'Kreis'], + data = [ ['2020-04-24', 40, 38, 'Flensburg, Stadt'], + ['2020-04-24', 42, 36, 'Flensburg, Stadt'], + ['2020-04-24', 44, 34, 'Flensburg, Stadt'], + ['2020-04-24', 9, 10, 'Bamberg']], + index = [ + 0, + 1, + 2, + 3]) + medianOfFreeBedsByKreisTableFactory = MedianOfFreeBedsByKreisTableFactory(dataFrame) + + # When + medianOfFreeBedsByKreisTable = medianOfFreeBedsByKreisTableFactory.createMedianOfFreeBedsByKreisTable('Kreis') + + # Then + assert_frame_equal( + medianOfFreeBedsByKreisTable, + TestHelper.createDataFrame( + columns = ['median_free_beds_in_percent'], + data = [ [statistics.median([40/(40 + 38) * 100, 42/(42 + 36) * 100, 44/(44 + 34) * 100])], + [9/(9 + 10) * 100]], + index = pd.Index( + name = 'Kreis', + data = [ + 'Flensburg, Stadt', + 'Bamberg' + ])), + check_dtype = False) \ No newline at end of file