Files
HowBadIsMyBatch/src/intensivstationen/Intensivstationen.ipynb
frankknoll d73f84e4e8 refactoring
2022-03-02 11:57:32 +01:00

289 lines
8.5 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "9de5907f-18f5-4cb1-903e-26028ff1fa03",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from urllib import request\n",
"\n",
"pd.set_option('display.max_rows', 100)\n",
"pd.set_option('display.max_columns', None)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d021de84",
"metadata": {},
"outputs": [],
"source": [
"# download https://diviexchange.blob.core.windows.net/%24web/zeitreihe-tagesdaten.csv or https://www.intensivregister.de/#/aktuelle-lage/downloads\n",
"if False:\n",
" request.urlretrieve(\n",
" 'https://diviexchange.blob.core.windows.net/%24web/zeitreihe-tagesdaten.csv',\n",
" 'zeitreihe-tagesdaten.csv')\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f179762b",
"metadata": {},
"outputs": [],
"source": [
"def readTimeseries():\n",
" timeseries = pd.read_csv(\n",
" 'zeitreihe-tagesdaten.csv',\n",
" low_memory = False,\n",
" usecols = ['date', 'bundesland', 'gemeindeschluessel', 'betten_belegt', 'betten_frei'],\n",
" parse_dates = ['date'],\n",
" date_parser = lambda dateStr: pd.to_datetime(dateStr, format = \"%Y-%m-%d\"),\n",
" dtype = {\n",
" 'gemeindeschluessel': 'string',\n",
" 'bundesland': 'string'\n",
" })\n",
" return timeseries.sort_values(by = 'date', ascending = True)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2d34c6a4",
"metadata": {},
"outputs": [],
"source": [
"# download https://www.destatis.de/DE/Themen/Laender-Regionen/Regionales/Gemeindeverzeichnis/Administrativ/04-kreise.xlsx?__blob=publicationFile or https://www.destatis.de/DE/Themen/Laender-Regionen/Regionales/Gemeindeverzeichnis/Administrativ/04-kreise.html\n",
"if False:\n",
" request.urlretrieve(\n",
" 'https://www.destatis.de/DE/Themen/Laender-Regionen/Regionales/Gemeindeverzeichnis/Administrativ/04-kreise.xlsx?__blob=publicationFile',\n",
" '04-kreise.xlsx')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0927a6c2",
"metadata": {},
"outputs": [],
"source": [
"def readKreise():\n",
" kreise = pd.read_excel(\n",
" '04-kreise.xlsx',\n",
" sheet_name = 'Kreisfreie Städte u. Landkreise',\n",
" header = 5,\n",
" index_col = 0)\n",
" kreise = kreise.rename(columns = {'2': 'Bundesland', 3: 'Kreis', 6: 'Einwohnerzahl'})[['Bundesland', 'Kreis', 'Einwohnerzahl']]\n",
" kreise.index.set_names(\"Key\", inplace = True)\n",
" return kreise"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "af96fb11",
"metadata": {},
"outputs": [],
"source": [
"class ColumnsAdder:\n",
"\n",
" def __init__(self, kreise):\n",
" self.kreise = kreise\n",
"\n",
" def addKreisAndBundeslandAndEinwohnerzahlColumns(self, dataFrame):\n",
" dataFrame_kreise = pd.merge(dataFrame, self.kreise, how = 'left', left_on = 'gemeindeschluessel', right_index = True)\n",
" dataFrame['Kreis'] = dataFrame_kreise['Kreis']\n",
" dataFrame['Einwohnerzahl'] = dataFrame_kreise['Einwohnerzahl']\n",
" return self._addBundeslandColumn(dataFrame)\n",
" \n",
" def _addBundeslandColumn(self, dataFrame):\n",
" return pd.merge(\n",
" dataFrame,\n",
" self._createBundeslandByKeyTable(),\n",
" how = 'left',\n",
" left_on = 'bundesland',\n",
" right_index = True)\n",
"\n",
" def _createBundeslandByKeyTable(self):\n",
" return self.kreise[self.kreise.index.str.len() == 2][['Bundesland']]\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "62a20115",
"metadata": {},
"outputs": [],
"source": [
"timeSeries = ColumnsAdder(readKreise()).addKreisAndBundeslandAndEinwohnerzahlColumns(readTimeseries())\n",
"timeSeries"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "356494d3",
"metadata": {},
"outputs": [],
"source": [
"kreisValues = timeSeries['Kreis'].drop_duplicates().values\n",
"kreisValues"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "05aa0117",
"metadata": {},
"outputs": [],
"source": [
"def printKreisOptions(kreisValues):\n",
" for kreis in kreisValues:\n",
" printKreisOption(kreis)\n",
"\n",
"def printKreisOption(kreis):\n",
" print('<option value=\"{kreis}\">{kreis}</option>'.format(kreis = kreis))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "33a4b725",
"metadata": {},
"outputs": [],
"source": [
"kreisValues = sorted(kreisValues)\n",
"printKreisOptions(kreisValues)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "43c2f826",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"\n",
"class IOUtils:\n",
"\n",
" def saveDictAsJson(dict, file):\n",
" IOUtils.ensurePath(file)\n",
" with open(file, 'w') as outfile:\n",
" json.dump(dict, outfile)\n",
"\n",
" @staticmethod\n",
" def ensurePath(file):\n",
" directory = os.path.dirname(file)\n",
" if not os.path.exists(directory):\n",
" os.makedirs(directory)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "997a4bdb",
"metadata": {},
"outputs": [],
"source": [
"def getIntensiveCareBeds(timeSeries, kreis = None):\n",
" if kreis is not None:\n",
" return timeSeries[timeSeries['Kreis'] == kreis][['date', 'betten_belegt', 'betten_frei', 'Einwohnerzahl']]\n",
" else:\n",
" return timeSeries.groupby('date').agg(**{\n",
" 'betten_belegt': pd.NamedAgg(column = 'betten_belegt', aggfunc = 'sum'),\n",
" 'betten_frei': pd.NamedAgg(column = 'betten_frei', aggfunc = 'sum'),\n",
" 'Einwohnerzahl': pd.NamedAgg(column = 'Einwohnerzahl', aggfunc = 'sum') \n",
" }).reset_index()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a97f5b2b",
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"\n",
"\n",
"def getAndPersistIntensiveCareBeds(timeSeries, kreis=None):\n",
" dataFrame = getIntensiveCareBeds(timeSeries, kreis)\n",
" display(kreis)\n",
" _saveDataFrameAsJson(dataFrame, _getFilename(kreis))\n",
" return dataFrame\n",
"\n",
"\n",
"def _saveDataFrameAsJson(dataFrame, file):\n",
" IOUtils.saveDictAsJson(\n",
" {\n",
" 'population': int(dataFrame.iloc[0]['Einwohnerzahl']),\n",
" 'data': _dataFrame2Dict(dataFrame),\n",
" },\n",
" file)\n",
"\n",
"\n",
"def _dataFrame2Dict(dataFrame):\n",
" df = dataFrame[['date', 'betten_belegt', 'betten_frei']]\n",
" df['date'] = df['date'].dt.strftime('%Y-%m-%d')\n",
" return df.to_dict(orient=\"records\")\n",
"\n",
"\n",
"def _getFilename(kreis):\n",
" return '../../docs/data/intensivstationen/intensivstationen-{suffix}.json'.format(suffix=_getSuffix(kreis))\n",
"\n",
"\n",
"def _getSuffix(kreis):\n",
" return kreis if kreis is not None else 'de'\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "349edd73",
"metadata": {},
"outputs": [],
"source": [
"getAndPersistIntensiveCareBeds(timeSeries)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1b97137f",
"metadata": {},
"outputs": [],
"source": [
"for kreis in kreisValues:\n",
" getAndPersistIntensiveCareBeds(timeSeries, kreis)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}