In [1]:
import pandas as pd
from urllib import request

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', None)
pd.set_option('mode.chained_assignment', 'raise')

In [2]:
from datetime import datetime

print(datetime.now().strftime("%d.%m.%Y, %H:%M:%S Uhr"))

31.03.2022, 08:46:18 Uhr


In [3]:
! pwd

/home/frankknoll/Dokumente/Corona/projects/HowBadIsMyBatch-pages/src/intensivstationen


In [4]:
needsUpdate = False

In [5]:
from bs4 import BeautifulSoup
import requests
from datetime import datetime
from time import sleep
from selenium import webdriver
from selenium.webdriver.firefox.options import Options

class DateProvider:
    
    INTENSIVSTATIONEN_DATE_FORMAT = "%d.%m.%Y, %H:%M Uhr"

    def __init__(self):
        self.lastUpdated = None
        self.lastUpdatedDataSource = None

    def needsUpdate(self):
        return self.getLastUpdated() < self.getLastUpdatedDataSource()
        
    def getLastUpdated(self):
        if self.lastUpdated is None:
            htmlContent = requests.get("https://knollfrank.github.io/HowBadIsMyBatch/intensivstationen.html").text
            soup = BeautifulSoup(htmlContent, "lxml")
            dateStr = soup.find(id = "Datenstand").text
            self.lastUpdated = datetime.strptime(dateStr, DateProvider.INTENSIVSTATIONEN_DATE_FORMAT)
        
        return self.lastUpdated

    def getLastUpdatedDataSource(self):
        if self.lastUpdatedDataSource is None:
            html = self._getOriginalHtml()
            lastUpdatedColumn = 'Letzte Änderung'
            dataFrame = self._asDataFrame(html, lastUpdatedColumn)
            self.lastUpdatedDataSource = dataFrame.loc['Landkreis-Daten', lastUpdatedColumn].to_pydatetime()

        return self.lastUpdatedDataSource

    def _getOriginalHtml(self):
        options = Options()
        options.headless = True
        driver = webdriver.Firefox(options = options)
        driver.get('https://www.intensivregister.de/#/aktuelle-lage/downloads')
        sleep(10)
        innerHTML = driver.execute_script("return document.body.innerHTML")
        driver.quit()
        return innerHTML

    def _asDataFrame(self, html, lastUpdatedColumn):
        dataFrame = pd.read_html(html, parse_dates = [lastUpdatedColumn])[0]
        dataFrame[lastUpdatedColumn] = pd.to_datetime(dataFrame[lastUpdatedColumn], format = "%d.%m.%Y %H:%M Uhr")
        dataFrame.set_index('Name', inplace = True)
        return dataFrame


In [6]:
dateProvider = DateProvider()
print('          lastUpdated:', dateProvider.getLastUpdated())
print('lastUpdatedDataSource:', dateProvider.getLastUpdatedDataSource())        
needsUpdate = dateProvider.needsUpdate()
print('needsUpdate:', needsUpdate)

          lastUpdated: 2022-03-29 13:32:00
lastUpdatedDataSource: 2022-03-30 13:32:00
needsUpdate: True


In [7]:
from bs4 import BeautifulSoup

class HtmlTransformerUtil:
    
    def applySoupTransformerToFile(self, file, soupTransformer):
        self._writeSoup(soupTransformer(self._readSoup(file)), file)

    def _readSoup(self, file):
        with open(file) as fp:
            soup = BeautifulSoup(fp, 'lxml')
        return soup

    def _writeSoup(self, soup, file):
        with open(file, "w") as fp:
            fp.write(str(soup))    


In [8]:
def saveLastUpdatedIntensivstationen(lastUpdated):
    def setLastUpdated(soup):
        soup.find(id = "Datenstand").string.replace_with(lastUpdated.strftime(DateProvider.INTENSIVSTATIONEN_DATE_FORMAT))
        return soup

    HtmlTransformerUtil().applySoupTransformerToFile(
        file = "../../docs/intensivstationen.html",
        soupTransformer = setLastUpdated)

In [9]:
saveLastUpdatedIntensivstationen(dateProvider.getLastUpdatedDataSource())

In [10]:
def readTimeseries(download = False):
    timeSeriesFile = 'zeitreihe-tagesdaten.csv'
    if download:
        _downloadTimeseries(timeSeriesFile)

    timeseries = pd.read_csv(
        timeSeriesFile,
        low_memory = False,
        usecols = ['date', 'bundesland', 'gemeindeschluessel', 'betten_belegt', 'betten_frei'],
        parse_dates = ['date'],
        date_parser = lambda dateStr: pd.to_datetime(dateStr, format = "%Y-%m-%d"),
        dtype = {
            'gemeindeschluessel': 'string',
            'bundesland': 'string'
            })
    return timeseries.sort_values(by = 'date', ascending = True)

# download https://diviexchange.blob.core.windows.net/%24web/zeitreihe-tagesdaten.csv or https://www.intensivregister.de/#/aktuelle-lage/downloads
def _downloadTimeseries(timeSeriesFile):
    request.urlretrieve(
        'https://diviexchange.blob.core.windows.net/%24web/zeitreihe-tagesdaten.csv',
        timeSeriesFile)


In [11]:
timeSeries = readTimeseries(download = needsUpdate)
timeSeries

Unnamed: 0,date,bundesland,gemeindeschluessel,betten_frei,betten_belegt
0,2020-04-24,01,01001,40,38
267,2020-04-24,09,09471,9,9
266,2020-04-24,09,09464,17,23
265,2020-04-24,09,09463,9,25
264,2020-04-24,09,09462,12,51
...,...,...,...,...,...
279131,2022-03-30,06,06437,7,119
279130,2022-03-30,06,06436,1,23
279129,2022-03-30,06,06435,11,70
279127,2022-03-30,06,06433,2,19


In [12]:
def readKreise(download = False):
    kreiseFile = '04-kreise.xlsx'
    if download:
        _downloadKreise(kreiseFile)
    
    kreise = pd.read_excel(
        kreiseFile,
        sheet_name = 'Kreisfreie Städte u. Landkreise',
        header = 5,
        index_col = 0)
    kreise = kreise.rename(columns = {'2': 'Bundesland', 3: 'Kreis', 6: 'Einwohnerzahl'})[['Bundesland', 'Kreis', 'Einwohnerzahl']]
    kreise.index.set_names("Key", inplace = True)
    return kreise

# download https://www.destatis.de/DE/Themen/Laender-Regionen/Regionales/Gemeindeverzeichnis/Administrativ/04-kreise.xlsx?__blob=publicationFile or https://www.destatis.de/DE/Themen/Laender-Regionen/Regionales/Gemeindeverzeichnis/Administrativ/04-kreise.html
def _downloadKreise(kreiseFile):
    request.urlretrieve(
        'https://www.destatis.de/DE/Themen/Laender-Regionen/Regionales/Gemeindeverzeichnis/Administrativ/04-kreise.xlsx?__blob=publicationFile',
        kreiseFile)


In [13]:
kreise = readKreise(download = False)
kreise

Unnamed: 0_level_0,Bundesland,Kreis,Einwohnerzahl
Key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
,,,
01,Schleswig-Holstein,,
01001,Kreisfreie Stadt,"Flensburg, Stadt",89934.0
01002,Kreisfreie Stadt,"Kiel, Landeshauptstadt",246601.0
01003,Kreisfreie Stadt,"Lübeck, Hansestadt",215846.0
...,...,...,...
2) Die Ergebnisse ab Berichtsjahr 2016 sind aufgrund methodischer Änderungen und technischer Weiterentwicklung\n nur bedingt mit den Vorjahreswerten vegleichbar. Erläuterungen dazu finden Sie unter www.destatis.de beim Bevölkerungsstand.,,,
,,,
© Daten (im Auftrag der Herausgebergemeinschaft Statistische Ämter des Bundes und der Länder),,,
"Statistisches Bundesamt (Destatis), 2021",,,


In [14]:
class ColumnsAdder:

    def __init__(self, kreise):
        self.kreise = kreise

    def addKreisAndBundeslandAndEinwohnerzahlColumns(self, dataFrame):
        dataFrame = self.addKreisAndEinwohnerzahlColumns(dataFrame)
        return self._addBundeslandColumn(dataFrame)
        
    def addKreisAndEinwohnerzahlColumns(self, dataFrame):
        dataFrame_kreise = pd.merge(dataFrame, self.kreise, how = 'left', left_on = 'gemeindeschluessel', right_index = True)
        dataFrame['Kreis'] = dataFrame_kreise['Kreis']
        dataFrame['Einwohnerzahl'] = dataFrame_kreise['Einwohnerzahl']
        return dataFrame

    def _addBundeslandColumn(self, dataFrame):
        return pd.merge(
            dataFrame,
            self._createBundeslandByKeyTable(),
            how = 'left',
            left_on = 'bundesland',
            right_index = True)

    def _createBundeslandByKeyTable(self):
        return self.kreise[self.kreise.index.str.len() == 2][['Bundesland']]


In [15]:
timeSeries = ColumnsAdder(kreise).addKreisAndBundeslandAndEinwohnerzahlColumns(timeSeries)
timeSeries

Unnamed: 0,date,bundesland,gemeindeschluessel,betten_frei,betten_belegt,Kreis,Einwohnerzahl,Bundesland
0,2020-04-24,01,01001,40,38,"Flensburg, Stadt",89934.0,Schleswig-Holstein
267,2020-04-24,09,09471,9,9,Bamberg,147497.0,Bayern
266,2020-04-24,09,09464,17,23,Hof,45173.0,Bayern
265,2020-04-24,09,09463,9,25,Coburg,40842.0,Bayern
264,2020-04-24,09,09462,12,51,Bayreuth,74048.0,Bayern
...,...,...,...,...,...,...,...,...
279131,2022-03-30,06,06437,7,119,Odenwaldkreis,96754.0,Hessen
279130,2022-03-30,06,06436,1,23,Main-Taunus-Kreis,239264.0,Hessen
279129,2022-03-30,06,06435,11,70,Main-Kinzig-Kreis,421689.0,Hessen
279127,2022-03-30,06,06433,2,19,Groß-Gerau,275807.0,Hessen


In [16]:
kreisValues = sorted(timeSeries['Kreis'].drop_duplicates().values)

In [17]:
def getKreisOptions(kreisValues):
    return [getKreisOption(kreis) for kreis in kreisValues]

def getKreisOption(kreis):
    return f'<option value="{kreis}">{kreis}</option>'


In [18]:
kreisOptions = ['<option selected="" value="de">Alle Landkreise</option>']  + getKreisOptions(kreisValues)

In [19]:
from bs4 import BeautifulSoup


class KreisOptionsSetter:

    def setKreisOptions(self, html, options):
        soup = self._setKreisOptions(self._parse(html), self._parseOptions(options))
        return str(soup)

    def _setKreisOptions(self, soup, options):
        kreisSelect = soup.find(id = "kreisSelect")
        kreisSelect.clear()
        for option in options:
            kreisSelect.append(option)
        return soup

    def _parseOptions(self, options):
        return [self._parse(option).option for option in options]

    def _parse(self, html):
        return BeautifulSoup(html, 'lxml')


In [20]:
import unittest

In [21]:
class TestHelper:

    @staticmethod
    def createDataFrame(index, columns, data, dtypes = {}):
        return pd.DataFrame(index = index, columns = columns, data = data).astype(dtypes)


In [22]:
class KreisOptionsSetterTest(unittest.TestCase):

    def test_setKreisOptions(self):
        # Given
        kreisOptionsSetter = KreisOptionsSetter()

        # When
        htmlActual = kreisOptionsSetter.setKreisOptions(
            html='''
            <html>
              <body>
                <p>Test<p/>
                <select id="kreisSelect" name="kreis">
                  <option selected="" value="de">Alle Landkreise</option>
                  <option value="Ahrweiler">Ahrweiler</option>
                  <option value="Wiesbaden, Landeshauptstadt">Wiesbaden, Landeshauptstadt</option>
                  <option value="Aichach-Friedberg">Aichach-Friedberg</option>
                </select>
              </body>
            </html>
            ''',
            options=[
                '<option selected="" value="de">Alle Landkreise</option>',
                '<option value="Ahrweiler">Ahrweiler</option>',
                '<option value="Aichach-Friedberg">Aichach-Friedberg</option>'])

        # Then
        assertEqualHTML(
            htmlActual,
            '''
            <html>
              <body>
                <p>Test<p/>
                <select id="kreisSelect" name="kreis">
                  <option selected="" value="de">Alle Landkreise</option>
                  <option value="Ahrweiler">Ahrweiler</option>
                  <option value="Aichach-Friedberg">Aichach-Friedberg</option>
                </select>
              </body>
            </html>
            ''')

# adapted from https://stackoverflow.com/questions/8006909/pretty-print-assertequal-for-html-strings
def assertEqualHTML(string1, string2, file1='', file2=''):
    u'''
    Compare two unicode strings containing HTML.
    A human friendly diff goes to logging.error() if they
    are not equal, and an exception gets raised.
    '''
    from bs4 import BeautifulSoup as bs
    import difflib

    def short(mystr):
        max = 20
        if len(mystr) > max:
            return mystr[:max]
        return mystr
    p = []
    for mystr, file in [(string1, file1), (string2, file2)]:
        if not isinstance(mystr, str):
            raise Exception(u'string ist not unicode: %r %s' %
                            (short(mystr), file))
        soup = bs(mystr)
        pretty = soup.prettify()
        p.append(pretty)
    if p[0] != p[1]:
        for line in difflib.unified_diff(p[0].splitlines(), p[1].splitlines(), fromfile=file1, tofile=file2):
            display(line)
        display(p[0], ' != ', p[1])
        raise Exception('Not equal %s %s' % (file1, file2))


In [23]:
from bs4 import BeautifulSoup


def saveKreisOptions(kreisOptions):
    HtmlTransformerUtil().applySoupTransformerToFile(
        file = "../../docs/intensivstationen.html",
        soupTransformer =
            lambda soup:
                BeautifulSoup(
                    KreisOptionsSetter().setKreisOptions(html = str(soup), options = kreisOptions),
                    'lxml'))


In [24]:
saveKreisOptions(kreisOptions)

In [25]:
import os
import json


class IOUtils:

    def saveDictAsJson(dict, file):
        IOUtils.ensurePath(file)
        with open(file, 'w') as outfile:
            json.dump(dict, outfile)

    @staticmethod
    def ensurePath(file):
        directory = os.path.dirname(file)
        if not os.path.exists(directory):
            os.makedirs(directory)


In [26]:
def getIntensiveCareBeds(timeSeries, kreis = None):
    if kreis is not None:
        return timeSeries[timeSeries['Kreis'] == kreis][['date', 'betten_belegt', 'betten_frei', 'Einwohnerzahl']]
    else:
        return timeSeries.groupby('date').agg(**{
                        'betten_belegt': pd.NamedAgg(column = 'betten_belegt', aggfunc = 'sum'),
                        'betten_frei':   pd.NamedAgg(column = 'betten_frei',   aggfunc = 'sum'),
                        'Einwohnerzahl': pd.NamedAgg(column = 'Einwohnerzahl', aggfunc = 'sum')
                    }).reset_index()

In [27]:
def getAndPersistIntensiveCareBeds(timeSeries, kreis = None):
    intensiveCareBeds = getIntensiveCareBeds(timeSeries, kreis)
    display(kreis)
    _saveAsJson(intensiveCareBeds, _getFilename(kreis))
    return intensiveCareBeds


def _saveAsJson(intensiveCareBeds, file):
    IOUtils.saveDictAsJson(
        {
            'population': int(intensiveCareBeds.iloc[0]['Einwohnerzahl']),
            'data': _intensiveCareBeds2Dict(intensiveCareBeds),
        },
        file)


def _intensiveCareBeds2Dict(intensiveCareBeds):
    df = intensiveCareBeds[['date', 'betten_belegt', 'betten_frei']].copy()
    df['date'] = df['date'].dt.strftime('%Y-%m-%d')
    return df.to_dict(orient = "records")


def _getFilename(kreis):
    return f'../../docs/data/intensivstationen/intensivstationen-{_getSuffix(kreis)}.json'


def _getSuffix(kreis):
    return kreis if kreis is not None else 'de'


In [28]:
getAndPersistIntensiveCareBeds(timeSeries)

None

Unnamed: 0,date,betten_belegt,betten_frei,Einwohnerzahl
0,2020-04-24,19237,12270,82401553.0
1,2020-04-25,19100,12290,82401553.0
2,2020-04-26,18617,12694,82401553.0
3,2020-04-27,18803,12537,82360711.0
4,2020-04-28,19345,12207,82504802.0
...,...,...,...,...
701,2022-03-26,20268,3924,82658396.0
702,2022-03-27,19921,4187,82658396.0
703,2022-03-28,20123,4263,82658396.0
704,2022-03-29,20656,3904,82658396.0


In [29]:
for kreis in kreisValues:
    getAndPersistIntensiveCareBeds(timeSeries, kreis)

'Ahrweiler'

'Aichach-Friedberg'

'Alb-Donau-Kreis'

'Altenburger Land'

'Altenkirchen (Westerwald)'

'Altmarkkreis Salzwedel'

'Altötting'

'Alzey-Worms'

'Amberg'

'Amberg-Sulzbach'

'Ammerland'

'Anhalt-Bitterfeld'

'Ansbach'

'Aschaffenburg'

'Augsburg'

'Aurich'

'Bad Dürkheim'

'Bad Kissingen'

'Bad Kreuznach'

'Bad Tölz-Wolfratshausen'

'Baden-Baden, Stadtkreis'

'Bamberg'

'Barnim'

'Bautzen'

'Bayreuth'

'Berchtesgadener Land'

'Bergstraße'

'Berlin, Stadt'

'Bernkastel-Wittlich'

'Biberach'

'Bielefeld, Stadt'

'Birkenfeld'

'Bochum, Stadt'

'Bodenseekreis'

'Bonn, Stadt'

'Borken'

'Bottrop, Stadt'

'Brandenburg an der Havel, Stadt'

'Braunschweig, Stadt'

'Breisgau-Hochschwarzwald'

'Bremen, Stadt'

'Bremerhaven, Stadt'

'Burgenlandkreis'

'Böblingen'

'Börde'

'Calw'

'Celle'

'Cham'

'Chemnitz, Stadt'

'Cloppenburg'

'Coburg'

'Cochem-Zell'

'Coesfeld'

'Cottbus, Stadt'

'Cuxhaven'

'Dachau'

'Dahme-Spreewald'

'Darmstadt, Wissenschaftsstadt'

'Darmstadt-Dieburg'

'Deggendorf'

'Delmenhorst, Stadt'

'Dessau-Roßlau, Stadt'

'Diepholz'

'Dillingen a.d.Donau'

'Dingolfing-Landau'

'Dithmarschen'

'Donau-Ries'

'Donnersbergkreis'

'Dortmund, Stadt'

'Dresden, Stadt'

'Duisburg, Stadt'

'Düren'

'Düsseldorf, Stadt'

'Ebersberg'

'Eichsfeld'

'Eichstätt'

'Eifelkreis Bitburg-Prüm'

'Elbe-Elster'

'Emden, Stadt'

'Emmendingen'

'Emsland'

'Ennepe-Ruhr-Kreis'

'Enzkreis'

'Erding'

'Erfurt, Stadt'

'Erlangen'

'Erlangen-Höchstadt'

'Erzgebirgskreis'

'Essen, Stadt'

'Esslingen'

'Euskirchen'

'Flensburg, Stadt'

'Forchheim'

'Frankenthal (Pfalz), kreisfreie Stadt'

'Frankfurt (Oder), Stadt'

'Frankfurt am Main, Stadt'

'Freiburg im Breisgau, Stadtkreis'

'Freising'

'Freudenstadt'

'Freyung-Grafenau'

'Friesland'

'Fulda'

'Fürstenfeldbruck'

'Fürth'

'Garmisch-Partenkirchen'

'Gelsenkirchen, Stadt'

'Gera, Stadt'

'Germersheim'

'Gießen'

'Gifhorn'

'Goslar'

'Gotha'

'Grafschaft Bentheim'

'Greiz'

'Groß-Gerau'

'Göppingen'

'Görlitz'

'Göttingen'

'Günzburg'

'Gütersloh'

'Hagen, Stadt der FernUniversität'

'Halle (Saale), Stadt'

'Hamburg, Freie und Hansestadt'

'Hameln-Pyrmont'

'Hamm, Stadt'

'Harburg'

'Harz'

'Havelland'

'Haßberge'

'Heidekreis'

'Heidelberg, Stadtkreis'

'Heidenheim'

'Heilbronn'

'Heilbronn, Stadtkreis'

'Heinsberg'

'Helmstedt'

'Herford'

'Herne, Stadt'

'Hersfeld-Rotenburg'

'Herzogtum Lauenburg'

'Hildburghausen'

'Hildesheim'

'Hochsauerlandkreis'

'Hochtaunuskreis'

'Hof'

'Hohenlohekreis'

'Holzminden'

'Höxter'

'Ilm-Kreis'

'Ingolstadt'

'Jena, Stadt'

'Jerichower Land'

'Kaiserslautern'

'Kaiserslautern, kreisfreie Stadt'

'Karlsruhe'

'Karlsruhe, Stadtkreis'

'Kassel'

'Kassel, documenta-Stadt'

'Kaufbeuren'

'Kelheim'

'Kempten (Allgäu)'

'Kiel, Landeshauptstadt'

'Kitzingen'

'Kleve'

'Koblenz, kreisfreie Stadt'

'Konstanz'

'Krefeld, Stadt'

'Kronach'

'Kulmbach'

'Kusel'

'Kyffhäuserkreis'

'Köln, Stadt'

'Lahn-Dill-Kreis'

'Landau in der Pfalz, kreisfreie Stadt'

'Landkreis Rostock'

'Landsberg am Lech'

'Landshut'

'Leer'

'Leipzig'

'Leipzig, Stadt'

'Leverkusen, Stadt'

'Lichtenfels'

'Limburg-Weilburg'

'Lindau (Bodensee)'

'Lippe'

'Ludwigsburg'

'Ludwigshafen am Rhein, kreisfreie Stadt'

'Ludwigslust-Parchim'

'Lörrach'

'Lübeck, Hansestadt'

'Lüchow-Dannenberg'

'Lüneburg'

'Magdeburg, Landeshauptstadt'

'Main-Kinzig-Kreis'

'Main-Spessart'

'Main-Tauber-Kreis'

'Main-Taunus-Kreis'

'Mainz, kreisfreie Stadt'

'Mainz-Bingen'

'Mannheim, Stadtkreis'

'Mansfeld-Südharz'

'Marburg-Biedenkopf'

'Mayen-Koblenz'

'Mecklenburgische Seenplatte'

'Meißen'

'Memmingen'

'Merzig-Wadern'

'Mettmann'

'Miesbach'

'Miltenberg'

'Minden-Lübbecke'

'Mittelsachsen'

'Märkisch-Oderland'

'Märkischer Kreis'

'Mönchengladbach, Stadt'

'Mühldorf a.Inn'

'Mülheim an der Ruhr, Stadt'

'München'

'München, Landeshauptstadt'

'Münster, Stadt'

'Neckar-Odenwald-Kreis'

'Neu-Ulm'

'Neuburg-Schrobenhausen'

'Neumarkt i.d.OPf.'

'Neumünster, Stadt'

'Neunkirchen'

'Neustadt a.d.Aisch-Bad Windsheim'

'Neustadt an der Weinstraße, kreisfreie Stadt'

'Neuwied'

'Nienburg (Weser)'

'Nordfriesland'

'Nordhausen'

'Nordsachsen'

'Nordwestmecklenburg'

'Northeim'

'Nürnberg'

'Nürnberger Land'

'Oberallgäu'

'Oberbergischer Kreis'

'Oberhausen, Stadt'

'Oberhavel'

'Oberspreewald-Lausitz'

'Odenwaldkreis'

'Oder-Spree'

'Offenbach'

'Offenbach am Main, Stadt'

'Oldenburg'

'Oldenburg (Oldenburg), Stadt'

'Olpe'

'Ortenaukreis'

'Osnabrück'

'Osnabrück, Stadt'

'Ostalbkreis'

'Ostallgäu'

'Osterholz'

'Ostholstein'

'Ostprignitz-Ruppin'

'Paderborn'

'Passau'

'Peine'

'Pfaffenhofen a.d.Ilm'

'Pforzheim, Stadtkreis'

'Pinneberg'

'Pirmasens, kreisfreie Stadt'

'Plön'

'Potsdam, Stadt'

'Potsdam-Mittelmark'

'Prignitz'

'Rastatt'

'Ravensburg'

'Recklinghausen'

'Regen'

'Regensburg'

'Region Hannover'

'Regionalverband Saarbrücken'

'Rems-Murr-Kreis'

'Remscheid, Stadt'

'Rendsburg-Eckernförde'

'Reutlingen'

'Rhein-Erft-Kreis'

'Rhein-Hunsrück-Kreis'

'Rhein-Kreis Neuss'

'Rhein-Lahn-Kreis'

'Rhein-Neckar-Kreis'

'Rhein-Sieg-Kreis'

'Rheingau-Taunus-Kreis'

'Rheinisch-Bergischer Kreis'

'Rhön-Grabfeld'

'Rosenheim'

'Rostock'

'Rotenburg (Wümme)'

'Roth'

'Rottal-Inn'

'Rottweil'

'Saale-Holzland-Kreis'

'Saale-Orla-Kreis'

'Saalekreis'

'Saalfeld-Rudolstadt'

'Saarlouis'

'Saarpfalz-Kreis'

'Salzgitter, Stadt'

'Salzlandkreis'

'Schaumburg'

'Schleswig-Flensburg'

'Schmalkalden-Meiningen'

'Schwabach'

'Schwalm-Eder-Kreis'

'Schwandorf'

'Schwarzwald-Baar-Kreis'

'Schweinfurt'

'Schwerin'

'Schwäbisch Hall'

'Segeberg'

'Siegen-Wittgenstein'

'Sigmaringen'

'Soest'

'Solingen, Klingenstadt'

'Sonneberg'

'Speyer, kreisfreie Stadt'

'Spree-Neiße'

'St. Wendel'

'Stade'

'Starnberg'

'Steinburg'

'Steinfurt'

'Stendal'

'Stormarn'

'Straubing'

'Straubing-Bogen'

'Stuttgart, Stadtkreis'

'Städteregion Aachen'

'Suhl, Stadt'

'Sächsische Schweiz-Osterzgebirge'

'Sömmerda'

'Südliche Weinstraße'

'Südwestpfalz'

'Teltow-Fläming'

'Tirschenreuth'

'Traunstein'

'Trier, kreisfreie Stadt'

'Trier-Saarburg'

'Tuttlingen'

'Tübingen'

'Uckermark'

'Uelzen'

'Ulm, Stadtkreis'

'Unna'

'Unstrut-Hainich-Kreis'

'Unterallgäu'

'Vechta'

'Verden'

'Viersen'

'Vogelsbergkreis'

'Vogtlandkreis'

'Vorpommern-Greifswald'

'Vorpommern-Rügen'

'Vulkaneifel'

'Waldeck-Frankenberg'

'Waldshut'

'Warendorf'

'Wartburgkreis'

'Weiden i.d.OPf.'

'Weilheim-Schongau'

'Weimar, Stadt'

'Weimarer Land'

'Weißenburg-Gunzenhausen'

'Werra-Meißner-Kreis'

'Wesel'

'Wesermarsch'

'Westerwaldkreis'

'Wetteraukreis'

'Wiesbaden, Landeshauptstadt'

'Wilhelmshaven, Stadt'

'Wittenberg'

'Wittmund'

'Wolfenbüttel'

'Wolfsburg, Stadt'

'Worms, kreisfreie Stadt'

'Wunsiedel i.Fichtelgebirge'

'Wuppertal, Stadt'

'Würzburg'

'Zollernalbkreis'

'Zweibrücken, kreisfreie Stadt'

'Zwickau'

In [30]:
class MedianOfFreeBedsByKreisTableFactory:
    
    def __init__(self, dataFrame):
        self.dataFrame = dataFrame

    def createMedianOfFreeBedsByKreisTable(self, kreisKey):
        self.dataFrame['free_beds_divided_by_all_beds_in_percent'] = self.dataFrame['betten_frei'] / (self.dataFrame['betten_frei'] + self.dataFrame['betten_belegt']) * 100
        aggregated = self.dataFrame.groupby(kreisKey).agg(
            median_free_beds_in_percent =
                pd.NamedAgg(
                    column = 'free_beds_divided_by_all_beds_in_percent',
                    aggfunc = 'median'))
        return aggregated.sort_values(by = 'median_free_beds_in_percent', ascending = False)

In [31]:
from pandas.testing import assert_frame_equal
import statistics

class MedianOfFreeBedsByKreisTableFactoryTest(unittest.TestCase):

    def test_createMedianOfFreeBedsByKreisTable(self):
        # Given
        dataFrame = TestHelper.createDataFrame(
            columns = ['date',       'betten_frei', 'betten_belegt', 'Kreis'],
            data = [  ['2020-04-24', 40,            38,              'Flensburg, Stadt'],
                      ['2020-04-24', 42,            36,              'Flensburg, Stadt'],
                      ['2020-04-24', 44,            34,              'Flensburg, Stadt'],
                      ['2020-04-24', 9,             10,              'Bamberg']],
            index = [
                0,
                1,
                2,
                3])
        medianOfFreeBedsByKreisTableFactory = MedianOfFreeBedsByKreisTableFactory(dataFrame)
        
        # When
        medianOfFreeBedsByKreisTable = medianOfFreeBedsByKreisTableFactory.createMedianOfFreeBedsByKreisTable('Kreis')

        # Then
        assert_frame_equal(
            medianOfFreeBedsByKreisTable,
            TestHelper.createDataFrame(
                columns = ['median_free_beds_in_percent'],
                data = [  [statistics.median([40/(40 + 38) * 100, 42/(42 + 36) * 100, 44/(44 + 34) * 100])],
                          [9/(9 + 10) * 100]],
                index = pd.Index(
                    name = 'Kreis',
                    data = [
                        'Flensburg, Stadt',
                        'Bamberg'
                    ])),
            check_dtype = False)

In [32]:
unittest.main(argv = [''], verbosity = 2, exit = False)

test_setKreisOptions (__main__.KreisOptionsSetterTest) ... ok
test_createMedianOfFreeBedsByKreisTable (__main__.MedianOfFreeBedsByKreisTableFactoryTest) ... ok

----------------------------------------------------------------------
Ran 2 tests in 0.019s

OK


<unittest.main.TestProgram at 0x7f7609c09640>

In [33]:
medianOfFreeBedsByKreisTableFactory = MedianOfFreeBedsByKreisTableFactory(timeSeries)
medianOfFreeBedsByKreisTable = medianOfFreeBedsByKreisTableFactory.createMedianOfFreeBedsByKreisTable('Kreis')
medianOfFreeBedsByKreisTable.reset_index().to_json('../../docs/data/intensivstationen/medianOfFreeBedsByKreisTable.json', orient = "records")

In [34]:
def createMedianOfFreeBedsByKreisTableForChoroplethMap(medianOfFreeBedsByKreisTableFactory):
    medianOfFreeBedsByKreisTable = medianOfFreeBedsByKreisTableFactory.createMedianOfFreeBedsByKreisTable('gemeindeschluessel').reset_index()
    return ColumnsAdder(kreise).addKreisAndEinwohnerzahlColumns(medianOfFreeBedsByKreisTable)
    
medianOfFreeBedsByKreisTable = createMedianOfFreeBedsByKreisTableForChoroplethMap(medianOfFreeBedsByKreisTableFactory)

In [35]:
import requests
import json


class Datawrapper:

    def __init__(self, accessToken):
        self.authHeader = {"Authorization": f"Bearer {accessToken}"}

    def setChartTitle(self, title):
        response = requests.request(
            "PATCH",
            "https://api.datawrapper.de/v3/charts/dYmYb",
            json={"title": title},
            headers={
                "Accept": "*/*",
                "Content-Type": "application/json"
            } | self.authHeader)
        return json.loads(response.text)

    def uploadChartData(self, data: pd.DataFrame):
        response = requests.request(
            "PUT",
            "https://api.datawrapper.de/v3/charts/dYmYb/data",
            data=data.to_csv(
                index=False,
                columns=['gemeindeschluessel', 'median_free_beds_in_percent', 'Kreis']).encode("utf-8"),
            headers={
                "Accept": "*/*",
                "Content-Type": "text/csv"
            } | self.authHeader)
        return response.text

    def fetchChartData(self):
        response = requests.request(
            "GET",
            "https://api.datawrapper.de/v3/charts/dYmYb/data",
            headers={
                "Accept": "text/csv"
            } | self.authHeader)

        return response.text

    def publishChart(self):
        response = requests.request(
            "POST",
            "https://api.datawrapper.de/v3/charts/dYmYb/publish",
            headers={
                "Accept": "*/*"
            } | self.authHeader)
        return json.loads(response.text)


In [36]:
def getChartTitle(dateStart, dateEnd):
    def formatDate(date):
        return date.strftime("%d.%m.%Y")

    return f"Median freier Intensivbetten im Zeitraum {formatDate(dateStart)} bis {formatDate(dateEnd)}"

In [37]:
from decouple import AutoConfig

config = AutoConfig(search_path='../..')
dataWrapper = Datawrapper(config('DATAWRAPPER_API_TOKEN'))
dataWrapper.setChartTitle(
    getChartTitle(
        dateStart=timeSeries['date'].min(),
        dateEnd=timeSeries['date'].max()))
dataWrapper.uploadChartData(medianOfFreeBedsByKreisTable)
dataWrapper.publishChart()


{'data': {'publicId': 'dYmYb',
  'language': 'de-DE',
  'theme': 'datawrapper',
  'id': 'dYmYb',
  'type': 'd3-maps-choropleth',
  'title': 'Median freier Intensivbetten im Zeitraum 24.04.2020 bis 30.03.2022',
  'lastEditStep': 5,
  'publishedAt': '2022-03-31T06:50:23.522Z',
  'publicUrl': 'https://datawrapper.dwcdn.net/dYmYb/41/',
  'publicVersion': 41,
  'deleted': False,
  'deletedAt': None,
  'forkable': True,
  'isFork': False,
  'metadata': {'data': {'transpose': False,
    'vertical-header': True,
    'horizontal-header': True,
    'column-format': {'ags': {'type': 'text'},
     'Kreis': {'type': 'text'},
     'Einwohnerzahl': {'type': 'number',
      'number-append': '',
      'number-format': 'auto',
      'number-divisor': 0,
      'number-prepend': ''},
     'gemeindeschluessel': {'type': 'text'},
     'median_free_beds_in_percent': {'type': 'number'}},
    'upload-method': 'upload'},
   'describe': {'source-name': 'Landkreis-Daten',
    'source-url': 'https://www.intensivre

In [None]:
def publishGitHubPages():
    %cd /home/frankknoll/Dokumente/Corona/projects/HowBadIsMyBatch-pages
    ! git add -A
    ! git commit -m "updating data"
    ! git push

In [None]:
publishGitHubPages()