From 3c425e46cfe705a4fa9f959f1de905e587ffa1dd Mon Sep 17 00:00:00 2001 From: frankknoll Date: Mon, 7 Mar 2022 01:05:59 +0100 Subject: [PATCH] needsUpdate for DIVI --- .gitignore | 1 + src/HowBadIsMyBatch.ipynb | 4 +- src/intensivstationen/Intensivstationen.ipynb | 48 +++++++++++++++++++ 3 files changed, 51 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 90cec823798..292bff8a8f9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ src/VAERS/ src/intensivstationen/zeitreihe-tagesdaten.csv +src/intensivstationen/geckodriver.log .ipynb_checkpoints/ .history/ src/config/ diff --git a/src/HowBadIsMyBatch.ipynb b/src/HowBadIsMyBatch.ipynb index 5ecd236e50e..8e793853ea5 100644 --- a/src/HowBadIsMyBatch.ipynb +++ b/src/HowBadIsMyBatch.ipynb @@ -27,9 +27,9 @@ "from dateutil.parser import parse\n", "\n", "def needsUpdate():\n", - " return _getLastUpdatedHowbadismybatch() < _getLastUpdatedOriginal()\n", + " return _getLastUpdatedBatchCodeTable() < _getLastUpdatedOriginal()\n", " \n", - "def _getLastUpdatedHowbadismybatch():\n", + "def _getLastUpdatedBatchCodeTable():\n", " return _getLastUpdated(\n", " url = \"https://knollfrank.github.io/HowBadIsMyBatch/batchCodeTable.html\",\n", " getDateStr = lambda soup: soup.find(id = \"last_updated\").text)\n", diff --git a/src/intensivstationen/Intensivstationen.ipynb b/src/intensivstationen/Intensivstationen.ipynb index 63d3a38b35e..33e52e60cec 100644 --- a/src/intensivstationen/Intensivstationen.ipynb +++ b/src/intensivstationen/Intensivstationen.ipynb @@ -15,6 +15,54 @@ "pd.set_option('display.max_columns', None)" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "79de4057", + "metadata": {}, + "outputs": [], + "source": [ + "from bs4 import BeautifulSoup\n", + "import requests\n", + "from datetime import datetime\n", + "from time import sleep\n", + "from selenium import webdriver\n", + "\n", + "def needsUpdate():\n", + " lastUpdatedIntensivstationen = _getLastUpdatedIntensivstationen()\n", + " print('lastUpdatedIntensivstationen:', lastUpdatedIntensivstationen)\n", + " lastUpdatedOriginal = _getLastUpdatedOriginal()\n", + " print('lastUpdatedOriginal:', lastUpdatedOriginal)\n", + " return lastUpdatedIntensivstationen < lastUpdatedOriginal\n", + " \n", + "def _getLastUpdatedIntensivstationen():\n", + " htmlContent = requests.get(\"https://knollfrank.github.io/HowBadIsMyBatch/intensivstationen.html\").text\n", + " soup = BeautifulSoup(htmlContent, \"lxml\")\n", + " dateStr = soup.find(id = \"Datenstand\").text\n", + " return datetime.strptime(dateStr, \"%d.%m.%Y, %H:%M Uhr\")\n", + "\n", + "def _getLastUpdatedOriginal():\n", + " html = _getOriginalHtml()\n", + " dataFrame = _asDataFrame(html)\n", + " return dataFrame.loc['Landkreis-Daten', 'Letzte Änderung'].to_pydatetime()\n", + "\n", + "def _getOriginalHtml():\n", + " driver = webdriver.Firefox()\n", + " driver.get('https://www.intensivregister.de/#/aktuelle-lage/downloads')\n", + " sleep(5)\n", + " innerHTML = driver.execute_script(\"return document.body.innerHTML\")\n", + " driver.quit()\n", + " return innerHTML\n", + "\n", + "def _asDataFrame(html):\n", + " dataFrame = pd.read_html(html, parse_dates = ['Letzte Änderung'])[0]\n", + " dataFrame.set_index('Name', inplace = True)\n", + " dataFrame['Letzte Änderung'] = pd.to_datetime(dataFrame['Letzte Änderung'], format = \"%d.%m.%Y %H:%M Uhr\")\n", + " return dataFrame\n", + "\n", + "print('needsUpdate: ', needsUpdate())" + ] + }, { "cell_type": "code", "execution_count": null,