moving files around

2022-02-28 13:22:12 +01:00
parent 51545cdca0
commit 9281412843
6 changed files with 14160 additions and 111 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,5 @@
 src/VAERS/
-src/intensivstationen/
+src/intensivstationen/zeitreihe-tagesdaten.csv
 .ipynb_checkpoints/
 .history/
 src/config/
--- a/docs/intensivstationen.html
+++ b/docs/intensivstationen.html
@@ -43,7 +43,7 @@
            label: 'Belegte Betten',
            data: data,
            parsing: {
-              yAxisKey: 'Belegte_Intensivbetten'
+              yAxisKey: 'betten_belegt'
            },
            backgroundColor: 'rgba(255, 0, 0, 1)',
          },
@@ -51,17 +51,9 @@
            label: 'Freie Betten',
            data: data,
            parsing: {
-              yAxisKey: 'Freie_Intensivbetten'
+              yAxisKey: 'betten_frei'
            },
            backgroundColor: 'rgba(0, 255, 0, 1)',
-          },
-          {
-            label: 'Notfallreserve',
-            data: data,
-            parsing: {
-              yAxisKey: '7_Tage_Notfallreserve'
-            },
-            backgroundColor: 'rgba(0, 0, 255, 1)',
          }
        ]
      };
@@ -89,7 +81,7 @@
          }
        },
        parsing: {
-          xAxisKey: 'Datum'
+          xAxisKey: 'date'
        }
      };
    }
--- a/src/Intensivstationen.ipynb
+++ b/src/Intensivstationen.ipynb
@@ -1,98 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "9de5907f-18f5-4cb1-903e-26028ff1fa03",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "import pandas as pd\n",
-    "\n",
-    "pd.set_option('display.max_rows', 100)\n",
-    "pd.set_option('display.max_columns', None)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "997a4bdb",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def readTimeseries(bundesland = None):\n",
-    "    dataFrame = pd.read_csv(\n",
-    "        'intensivstationen/zeitreihe-bundeslaender.csv',\n",
-    "        low_memory = False,\n",
-    "        parse_dates = ['Datum'],\n",
-    "        date_parser = lambda dateStr: pd.to_datetime(dateStr, utc = True))\n",
-    "    if bundesland is not None:\n",
-    "        return dataFrame[dataFrame['Bundesland'] == bundesland][['Datum', 'Belegte_Intensivbetten', 'Freie_Intensivbetten', '7_Tage_Notfallreserve']]\n",
-    "    else:\n",
-    "        return dataFrame.groupby('Datum').agg(**{\n",
-    "                        'Belegte_Intensivbetten': pd.NamedAgg(column = 'Belegte_Intensivbetten', aggfunc = 'sum'),\n",
-    "                        'Freie_Intensivbetten':   pd.NamedAgg(column = 'Freie_Intensivbetten',   aggfunc = 'sum'),\n",
-    "                        '7_Tage_Notfallreserve':  pd.NamedAgg(column = '7_Tage_Notfallreserve',  aggfunc = 'sum'), \n",
-    "                    }).reset_index()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a97f5b2b",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def readAndPersistTimeseries(bundesland = None):\n",
-    "    dataFrame = readTimeseries(bundesland)\n",
-    "    dataFrame.to_csv(_getFilename(bundesland), index =False)\n",
-    "    return dataFrame\n",
-    "\n",
-    "def _getFilename(bundesland):\n",
-    "    return '../docs/data/intensivstationen/intensivstationen{suffix}.csv'.format(suffix = '-' + bundesland if bundesland is not None else '-de')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "63ca93a4",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "readAndPersistTimeseries(bundesland = 'BADEN_WUERTTEMBERG')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "349edd73",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "readAndPersistTimeseries()"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.7"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
--- a/src/help.txt
+++ b/src/help.txt
@@ -1,6 +1,12 @@
 jupyter notebook

-https://www.intensivregister.de/#/aktuelle-lage/zeitreihen
+zwei Spalten darstellen:
+- linke und rechte Spalte jeweils mit Bundesland-Menu und Landkreise-Submenu des ausgewählten Bundeslandes
+- Dargestellt werden in einem Chart die freien Betten + belegten Betten des ausgewählten Landkreises
+  (analog zu "Gesamtzahl gemeldeter Intensivbetten (Betreibbare Betten und Notfallreserve)", siehe https://www.intensivregister.de/#/aktuelle-lage/zeitreihen)
+- Zugrundegelegter Datensatz: zeitreihe-tagesdaten.csv aus https://www.intensivregister.de/#/aktuelle-lage/downloads
+
+src/intensivstationen/AGS_2022-02-28.json downloaded from https://www.xrepository.de/details/urn:de:bund:destatis:bevoelkerungsstatistik:schluessel:ags

 get VAERS data:
 - download data (e.g. 2022VAERSData.zip) from https://vaers.hhs.gov/data/datasets.html and save and unzip in VAERS folder
--- a/src/intensivstationen/Intensivstationen.ipynb
+++ b/src/intensivstationen/Intensivstationen.ipynb
@@ -0,0 +1,232 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9de5907f-18f5-4cb1-903e-26028ff1fa03",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "\n",
+    "pd.set_option('display.max_rows', 100)\n",
+    "pd.set_option('display.max_columns', None)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "eee150b8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "bundeslandById = {\n",
+    "    '01': 'Schleswig-Holstein',\n",
+    "    '02': 'Freie Hansestadt Hamburg',\n",
+    "    '03': 'Niedersachsen',\n",
+    "    '04': 'Freie Hansestadt Bremen',\n",
+    "    '05': 'Nordrhein-Westfalen',\n",
+    "    '06': 'Hessen',\n",
+    "    '07': 'Rheinland-Pfalz',\n",
+    "    '08': 'Baden-Württemberg',\n",
+    "    '09': 'Freistaat Bayern',\n",
+    "    '10': 'Saarland',\n",
+    "    '11': 'Berlin',\n",
+    "    '12': 'Brandenburg',\n",
+    "    '13': 'Mecklenburg-Vorpommern',\n",
+    "    '14': 'Freistaat Sachsen',\n",
+    "    '15': 'Sachsen-Anhalt',\n",
+    "    '16': 'Freistaat Thüringen'}\n",
+    "bundeslandById"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f7868050",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def readKreisByKreisschluessel():\n",
+    "    kreisByKreisschluessel = pd.read_csv(\n",
+    "        'kreisByKreisschluessel.csv',\n",
+    "        low_memory = False,\n",
+    "        index_col = 'Kreisschluessel',\n",
+    "        dtype = {'Kreisschluessel': 'string'})\n",
+    "    kreisByKreisschluessel['Kreis'] = kreisByKreisschluessel['Kreis'].str.strip()\n",
+    "    return kreisByKreisschluessel[kreisByKreisschluessel.index.str.len() == 5]\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "712a919c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "kreisByKreisschluessel = readKreisByKreisschluessel()\n",
+    "kreisByKreisschluessel"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c1dea262",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class TimeseriesReader:\n",
+    "    \n",
+    "    def readTimeseries(self):\n",
+    "        return pd.read_csv(\n",
+    "            'zeitreihe-tagesdaten.csv',\n",
+    "            low_memory = False,\n",
+    "            usecols = ['date', 'bundesland', 'gemeindeschluessel', 'betten_belegt', 'betten_frei'],\n",
+    "            parse_dates = ['date'],\n",
+    "            date_parser = lambda dateStr: pd.to_datetime(dateStr, format = \"%Y-%m-%d\"),\n",
+    "            dtype = {\n",
+    "                'gemeindeschluessel': 'string',\n",
+    "                'bundesland': 'string'\n",
+    "                })\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d978b069",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "timeSeries = TimeseriesReader().readTimeseries()\n",
+    "timeSeries"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "af96fb11",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class KreisAndBundeslandColumnAdder:\n",
+    "    \n",
+    "    def __init__(self, kreisByKreisschluessel, bundeslandById):\n",
+    "        self.kreisByKreisschluessel = kreisByKreisschluessel\n",
+    "        self.bundeslandById = bundeslandById\n",
+    "\n",
+    "    def addKreisAndBundeslandColumn(self, dataFrame):\n",
+    "        dataFrame['Kreis'] = dataFrame['gemeindeschluessel'].map(lambda gemeindeschluessel: self.kreisByKreisschluessel.loc[gemeindeschluessel, 'Kreis'])\n",
+    "        dataFrame['Bundesland'] = dataFrame['bundesland'].map(lambda bundesland: self.bundeslandById[bundesland])\n",
+    "        return dataFrame"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "62a20115",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "timeSeries = KreisAndBundeslandColumnAdder(kreisByKreisschluessel, bundeslandById).addKreisAndBundeslandColumn(timeSeries)\n",
+    "timeSeries"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "997a4bdb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def readTimeseries(bundesland = None):\n",
+    "    dataFrame = TimeseriesReader().readTimeseries()\n",
+    "    display(dataFrame)\n",
+    "    display(dataFrame.info())\n",
+    "    if bundesland is not None:\n",
+    "        return dataFrame[dataFrame['Bundesland'] == bundesland][['date', 'betten_belegt', 'betten_frei']]\n",
+    "    else:\n",
+    "        return dataFrame.groupby('date').agg(**{\n",
+    "                        'betten_belegt': pd.NamedAgg(column = 'betten_belegt', aggfunc = 'sum'),\n",
+    "                        'betten_frei':   pd.NamedAgg(column = 'betten_frei',   aggfunc = 'sum') \n",
+    "                    }).reset_index()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a97f5b2b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def readAndPersistTimeseries(bundesland = None):\n",
+    "    dataFrame = readTimeseries(bundesland)\n",
+    "    dataFrame.to_csv(_getFilename(bundesland), index = False)\n",
+    "    return dataFrame\n",
+    "\n",
+    "def _getFilename(bundesland):\n",
+    "    return '../../docs/data/intensivstationen/intensivstationen{suffix}.csv'.format(suffix = '-' + bundesland if bundesland is not None else '-de')\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "63ca93a4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# readAndPersistTimeseries(bundesland = 'BADEN_WUERTTEMBERG')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "349edd73",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataFrame = readAndPersistTimeseries()\n",
+    "dataFrame"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "21776a31",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataFrame.info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cc5974cd",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/src/intensivstationen/kreisByKreisschluessel.csv
+++ b/src/intensivstationen/kreisByKreisschluessel.csv