Files
HowBadIsMyBatch/src/intensivstationen/Intensivstationen.ipynb
frankknoll 88996c7ada refactoring
2022-12-14 22:07:52 +01:00

395 lines
11 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "9de5907f-18f5-4cb1-903e-26028ff1fa03",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import sys\n",
"\n",
"module_path = os.path.abspath(os.path.join('..'))\n",
"if module_path not in sys.path:\n",
" sys.path.append(module_path)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "640be762",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from urllib import request\n",
"from DateProvider import DateProvider\n",
"from datetime import datetime\n",
"from HtmlTransformerUtil import HtmlTransformerUtil\n",
"from ColumnsAdder import ColumnsAdder\n",
"from KreisOptionsSetter import KreisOptionsSetter\n",
"from TestHelper import TestHelper\n",
"from IOUtils import IOUtils\n",
"from Datawrapper import Datawrapper\n",
"from MedianOfFreeBedsByKreisTableFactory import MedianOfFreeBedsByKreisTableFactory\n",
"from TimeseriesReader import readTimeseries\n",
"from IntensivstationenHtmlFileUpdater import saveKreisOptions\n",
"\n",
"pd.set_option('display.max_rows', 100)\n",
"pd.set_option('display.max_columns', None)\n",
"pd.set_option('mode.chained_assignment', 'raise')\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e40d4c8d",
"metadata": {},
"outputs": [],
"source": [
"print(datetime.now().strftime(\"%d.%m.%Y, %H:%M:%S Uhr\"))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "336f56e6",
"metadata": {},
"outputs": [],
"source": [
"dateProvider = DateProvider()\n",
"print(' lastUpdated:', dateProvider.getLastUpdated())\n",
"print('lastUpdatedDataSource:', dateProvider.getLastUpdatedDataSource()) \n",
"needsUpdate = dateProvider.needsUpdate()\n",
"print('needsUpdate:', needsUpdate)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "af101279",
"metadata": {},
"outputs": [],
"source": [
"def saveLastUpdatedIntensivstationen(lastUpdated):\n",
" def setLastUpdated(soup):\n",
" soup.find(id = \"Datenstand\").string.replace_with(lastUpdated.strftime(DateProvider.INTENSIVSTATIONEN_DATE_FORMAT))\n",
" return soup\n",
"\n",
" HtmlTransformerUtil().applySoupTransformerToFile(\n",
" file = \"../../docs/intensivstationen.html\",\n",
" soupTransformer = setLastUpdated)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "63be303c",
"metadata": {},
"outputs": [],
"source": [
"saveLastUpdatedIntensivstationen(dateProvider.getLastUpdatedDataSource())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3f992231",
"metadata": {},
"outputs": [],
"source": [
"timeSeries = readTimeseries(download = needsUpdate)\n",
"timeSeries"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2d34c6a4",
"metadata": {},
"outputs": [],
"source": [
"def readKreise(download = False):\n",
" kreiseFile = '04-kreise.xlsx'\n",
" if download:\n",
" _downloadKreise(kreiseFile)\n",
" \n",
" kreise = pd.read_excel(\n",
" kreiseFile,\n",
" sheet_name = 'Kreisfreie Städte u. Landkreise',\n",
" header = 5,\n",
" index_col = 0)\n",
" kreise = kreise.rename(columns = {'2': 'Bundesland', 3: 'Kreis', 6: 'Einwohnerzahl'})[['Bundesland', 'Kreis', 'Einwohnerzahl']]\n",
" kreise.index.set_names(\"Key\", inplace = True)\n",
" return kreise\n",
"\n",
"# download https://www.destatis.de/DE/Themen/Laender-Regionen/Regionales/Gemeindeverzeichnis/Administrativ/04-kreise.xlsx?__blob=publicationFile or https://www.destatis.de/DE/Themen/Laender-Regionen/Regionales/Gemeindeverzeichnis/Administrativ/04-kreise.html\n",
"def _downloadKreise(kreiseFile):\n",
" request.urlretrieve(\n",
" 'https://www.destatis.de/DE/Themen/Laender-Regionen/Regionales/Gemeindeverzeichnis/Administrativ/04-kreise.xlsx?__blob=publicationFile',\n",
" kreiseFile)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "74ea4d55",
"metadata": {},
"outputs": [],
"source": [
"kreise = readKreise(download = False)\n",
"kreise"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "62a20115",
"metadata": {},
"outputs": [],
"source": [
"timeSeries = ColumnsAdder(kreise).addKreisAndBundeslandAndEinwohnerzahlColumns(timeSeries)\n",
"timeSeries"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "356494d3",
"metadata": {},
"outputs": [],
"source": [
"kreisValues = sorted(timeSeries['Kreis'].drop_duplicates().values)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "05aa0117",
"metadata": {},
"outputs": [],
"source": [
"def getKreisOptions(kreisValues):\n",
" return [getKreisOption(kreis) for kreis in kreisValues]\n",
"\n",
"def getKreisOption(kreis):\n",
" return f'<option value=\"{kreis}\">{kreis}</option>'\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9c38ca16",
"metadata": {},
"outputs": [],
"source": [
"kreisOptions = ['<option selected=\"\" value=\"de\">Alle Landkreise</option>'] + getKreisOptions(kreisValues)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5dd8d864",
"metadata": {},
"outputs": [],
"source": [
"saveKreisOptions(kreisOptions, toHtmlFile=\"../../docs/intensivstationen.html\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "997a4bdb",
"metadata": {},
"outputs": [],
"source": [
"def getIntensiveCareBeds(timeSeries, kreis = None):\n",
" if kreis is not None:\n",
" return timeSeries[timeSeries['Kreis'] == kreis][['date', 'betten_belegt', 'betten_frei', 'Einwohnerzahl']]\n",
" else:\n",
" return timeSeries.groupby('date').agg(**{\n",
" 'betten_belegt': pd.NamedAgg(column = 'betten_belegt', aggfunc = 'sum'),\n",
" 'betten_frei': pd.NamedAgg(column = 'betten_frei', aggfunc = 'sum'),\n",
" 'Einwohnerzahl': pd.NamedAgg(column = 'Einwohnerzahl', aggfunc = 'sum')\n",
" }).reset_index()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a97f5b2b",
"metadata": {},
"outputs": [],
"source": [
"def getAndPersistIntensiveCareBeds(timeSeries, kreis = None):\n",
" intensiveCareBeds = getIntensiveCareBeds(timeSeries, kreis)\n",
" display(kreis)\n",
" _saveAsJson(intensiveCareBeds, _getFilename(kreis))\n",
" return intensiveCareBeds\n",
"\n",
"\n",
"def _saveAsJson(intensiveCareBeds, file):\n",
" IOUtils.saveDictAsJson(\n",
" {\n",
" 'population': int(intensiveCareBeds.iloc[0]['Einwohnerzahl']),\n",
" 'data': _intensiveCareBeds2Dict(intensiveCareBeds),\n",
" },\n",
" file)\n",
"\n",
"\n",
"def _intensiveCareBeds2Dict(intensiveCareBeds):\n",
" df = intensiveCareBeds[['date', 'betten_belegt', 'betten_frei']].copy()\n",
" df['date'] = df['date'].dt.strftime('%Y-%m-%d')\n",
" return df.to_dict(orient = \"records\")\n",
"\n",
"\n",
"def _getFilename(kreis):\n",
" return f'../../docs/data/intensivstationen/intensivstationen-{_getSuffix(kreis)}.json'\n",
"\n",
"\n",
"def _getSuffix(kreis):\n",
" return kreis if kreis is not None else 'de'\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "349edd73",
"metadata": {},
"outputs": [],
"source": [
"getAndPersistIntensiveCareBeds(timeSeries)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1b97137f",
"metadata": {},
"outputs": [],
"source": [
"for kreis in kreisValues:\n",
" getAndPersistIntensiveCareBeds(timeSeries, kreis)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f10704f3",
"metadata": {},
"outputs": [],
"source": [
"medianOfFreeBedsByKreisTableFactory = MedianOfFreeBedsByKreisTableFactory(timeSeries)\n",
"medianOfFreeBedsByKreisTable = medianOfFreeBedsByKreisTableFactory.createMedianOfFreeBedsByKreisTable('Kreis')\n",
"medianOfFreeBedsByKreisTable.reset_index().to_json('../../docs/data/intensivstationen/medianOfFreeBedsByKreisTable.json', orient = \"records\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "396cb4f8",
"metadata": {},
"outputs": [],
"source": [
"def createMedianOfFreeBedsByKreisTableForChoroplethMap(medianOfFreeBedsByKreisTableFactory):\n",
" medianOfFreeBedsByKreisTable = medianOfFreeBedsByKreisTableFactory.createMedianOfFreeBedsByKreisTable('gemeindeschluessel').reset_index()\n",
" return ColumnsAdder(kreise).addKreisAndEinwohnerzahlColumns(medianOfFreeBedsByKreisTable)\n",
" \n",
"medianOfFreeBedsByKreisTable = createMedianOfFreeBedsByKreisTableForChoroplethMap(medianOfFreeBedsByKreisTableFactory)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d1b75357",
"metadata": {},
"outputs": [],
"source": [
"def getChartTitle(dateStart, dateEnd):\n",
" def formatDate(date):\n",
" return date.strftime(\"%d.%m.%Y\")\n",
"\n",
" return f\"Median freier Intensivbetten im Zeitraum {formatDate(dateStart)} bis {formatDate(dateEnd)}\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9de2f34e",
"metadata": {},
"outputs": [],
"source": [
"from decouple import AutoConfig\n",
"\n",
"config = AutoConfig(search_path='../..')\n",
"dataWrapper = Datawrapper(config('DATAWRAPPER_API_TOKEN'))\n",
"dataWrapper.setChartTitle(\n",
" getChartTitle(\n",
" dateStart=timeSeries['date'].min(),\n",
" dateEnd=timeSeries['date'].max()))\n",
"dataWrapper.uploadChartData(medianOfFreeBedsByKreisTable)\n",
"dataWrapper.publishChart()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0218cdb4",
"metadata": {},
"outputs": [],
"source": [
"def publishGitHubPages():\n",
" %cd /home/frankknoll/Dokumente/Corona/projects/HowBadIsMyBatch-pages\n",
" ! git add -A\n",
" ! git commit -m \"updating data for Intensivstationen\"\n",
" ! git push"
]
},
{
"cell_type": "markdown",
"id": "a1e83b07",
"metadata": {},
"source": [
"### see https://knollfrank.github.io/HowBadIsMyBatch/intensivstationen.html"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5f173c2b",
"metadata": {},
"outputs": [],
"source": [
"publishGitHubPages()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "howbadismybatch-venv-kernel",
"language": "python",
"name": "howbadismybatch-venv-kernel"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.8"
},
"vscode": {
"interpreter": {
"hash": "1bce2b9b19ce5f16d695ff75ac05095b3e564c169ff454b58b87cb796c0695b8"
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}