refactoring
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -14,3 +14,4 @@ src/captchaImage.jpeg
|
|||||||
src/HowBadIsMyBatch.nbconvert.ipynb
|
src/HowBadIsMyBatch.nbconvert.ipynb
|
||||||
src/HowBadIsMyBatch.nbconvert.html
|
src/HowBadIsMyBatch.nbconvert.html
|
||||||
src/__pycache__/
|
src/__pycache__/
|
||||||
|
src/intensivstationen/__pycache__/
|
||||||
|
|||||||
@@ -138,7 +138,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.10.8"
|
"version": "3.10.8 (main, Nov 24 2022, 14:13:03) [GCC 11.2.0]"
|
||||||
},
|
},
|
||||||
"vscode": {
|
"vscode": {
|
||||||
"interpreter": {
|
"interpreter": {
|
||||||
|
|||||||
55
src/intensivstationen/DateProvider.py
Normal file
55
src/intensivstationen/DateProvider.py
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import requests
|
||||||
|
from datetime import datetime
|
||||||
|
from time import sleep
|
||||||
|
from selenium import webdriver
|
||||||
|
from selenium.webdriver.firefox.options import Options
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
class DateProvider:
|
||||||
|
|
||||||
|
INTENSIVSTATIONEN_DATE_FORMAT = "%d.%m.%Y, %H:%M Uhr"
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.lastUpdated = None
|
||||||
|
self.lastUpdatedDataSource = None
|
||||||
|
|
||||||
|
def needsUpdate(self):
|
||||||
|
return self.getLastUpdated() < self.getLastUpdatedDataSource()
|
||||||
|
|
||||||
|
def getLastUpdated(self):
|
||||||
|
if self.lastUpdated is None:
|
||||||
|
htmlContent = requests.get("https://knollfrank.github.io/HowBadIsMyBatch/intensivstationen.html").text
|
||||||
|
soup = BeautifulSoup(htmlContent, "lxml")
|
||||||
|
dateStr = soup.find(id = "Datenstand").text
|
||||||
|
self.lastUpdated = datetime.strptime(dateStr, DateProvider.INTENSIVSTATIONEN_DATE_FORMAT)
|
||||||
|
|
||||||
|
return self.lastUpdated
|
||||||
|
|
||||||
|
def getLastUpdatedDataSource(self):
|
||||||
|
if self.lastUpdatedDataSource is None:
|
||||||
|
html = self._getOriginalHtml()
|
||||||
|
lastUpdatedColumn = 'Letzte Änderung'
|
||||||
|
dataFrame = self._asDataFrame(html, lastUpdatedColumn)
|
||||||
|
self.lastUpdatedDataSource = dataFrame.loc['Landkreis-Daten', lastUpdatedColumn].to_pydatetime()
|
||||||
|
|
||||||
|
return self.lastUpdatedDataSource
|
||||||
|
|
||||||
|
def _getOriginalHtml(self):
|
||||||
|
options = Options()
|
||||||
|
options.headless = True
|
||||||
|
options.add_argument("-profile")
|
||||||
|
# put the root directory your default profile path here, you can check it by opening Firefox and then pasting 'about:profiles' into the url field
|
||||||
|
options.add_argument("/home/frankknoll/snap/firefox/common/.mozilla/firefox/1j6r2yp6.default")
|
||||||
|
driver = webdriver.Firefox(options = options)
|
||||||
|
driver.get('https://www.intensivregister.de/#/aktuelle-lage/downloads')
|
||||||
|
sleep(10)
|
||||||
|
innerHTML = driver.execute_script("return document.body.innerHTML")
|
||||||
|
driver.quit()
|
||||||
|
return innerHTML
|
||||||
|
|
||||||
|
def _asDataFrame(self, html, lastUpdatedColumn):
|
||||||
|
dataFrame = pd.read_html(html, parse_dates = [lastUpdatedColumn])[0]
|
||||||
|
dataFrame[lastUpdatedColumn] = pd.to_datetime(dataFrame[lastUpdatedColumn], format = "%d.%m.%Y %H:%M Uhr")
|
||||||
|
dataFrame.set_index('Name', inplace = True)
|
||||||
|
return dataFrame
|
||||||
@@ -9,10 +9,13 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"import pandas as pd\n",
|
"import pandas as pd\n",
|
||||||
"from urllib import request\n",
|
"from urllib import request\n",
|
||||||
|
"import os\n",
|
||||||
|
"from DateProvider import DateProvider\n",
|
||||||
|
"from datetime import datetime\n",
|
||||||
"\n",
|
"\n",
|
||||||
"pd.set_option('display.max_rows', 100)\n",
|
"pd.set_option('display.max_rows', 100)\n",
|
||||||
"pd.set_option('display.max_columns', None)\n",
|
"pd.set_option('display.max_columns', None)\n",
|
||||||
"pd.set_option('mode.chained_assignment', 'raise')"
|
"pd.set_option('mode.chained_assignment', 'raise')\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -22,92 +25,17 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from datetime import datetime\n",
|
|
||||||
"\n",
|
|
||||||
"print(datetime.now().strftime(\"%d.%m.%Y, %H:%M:%S Uhr\"))"
|
"print(datetime.now().strftime(\"%d.%m.%Y, %H:%M:%S Uhr\"))"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"id": "579c0911",
|
"id": "98981ab9",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"! pwd"
|
"os.getcwd()"
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "dfa836ec",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"needsUpdate = False"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "79de4057",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from bs4 import BeautifulSoup\n",
|
|
||||||
"import requests\n",
|
|
||||||
"from datetime import datetime\n",
|
|
||||||
"from time import sleep\n",
|
|
||||||
"from selenium import webdriver\n",
|
|
||||||
"from selenium.webdriver.firefox.options import Options\n",
|
|
||||||
"\n",
|
|
||||||
"class DateProvider:\n",
|
|
||||||
" \n",
|
|
||||||
" INTENSIVSTATIONEN_DATE_FORMAT = \"%d.%m.%Y, %H:%M Uhr\"\n",
|
|
||||||
"\n",
|
|
||||||
" def __init__(self):\n",
|
|
||||||
" self.lastUpdated = None\n",
|
|
||||||
" self.lastUpdatedDataSource = None\n",
|
|
||||||
"\n",
|
|
||||||
" def needsUpdate(self):\n",
|
|
||||||
" return self.getLastUpdated() < self.getLastUpdatedDataSource()\n",
|
|
||||||
" \n",
|
|
||||||
" def getLastUpdated(self):\n",
|
|
||||||
" if self.lastUpdated is None:\n",
|
|
||||||
" htmlContent = requests.get(\"https://knollfrank.github.io/HowBadIsMyBatch/intensivstationen.html\").text\n",
|
|
||||||
" soup = BeautifulSoup(htmlContent, \"lxml\")\n",
|
|
||||||
" dateStr = soup.find(id = \"Datenstand\").text\n",
|
|
||||||
" self.lastUpdated = datetime.strptime(dateStr, DateProvider.INTENSIVSTATIONEN_DATE_FORMAT)\n",
|
|
||||||
" \n",
|
|
||||||
" return self.lastUpdated\n",
|
|
||||||
"\n",
|
|
||||||
" def getLastUpdatedDataSource(self):\n",
|
|
||||||
" if self.lastUpdatedDataSource is None:\n",
|
|
||||||
" html = self._getOriginalHtml()\n",
|
|
||||||
" lastUpdatedColumn = 'Letzte Änderung'\n",
|
|
||||||
" dataFrame = self._asDataFrame(html, lastUpdatedColumn)\n",
|
|
||||||
" self.lastUpdatedDataSource = dataFrame.loc['Landkreis-Daten', lastUpdatedColumn].to_pydatetime()\n",
|
|
||||||
"\n",
|
|
||||||
" return self.lastUpdatedDataSource\n",
|
|
||||||
"\n",
|
|
||||||
" def _getOriginalHtml(self):\n",
|
|
||||||
" options = Options()\n",
|
|
||||||
" options.headless = True\n",
|
|
||||||
" options.add_argument(\"-profile\")\n",
|
|
||||||
" # put the root directory your default profile path here, you can check it by opening Firefox and then pasting 'about:profiles' into the url field \n",
|
|
||||||
" options.add_argument(\"/home/frankknoll/snap/firefox/common/.mozilla/firefox/1j6r2yp6.default\")\n",
|
|
||||||
" driver = webdriver.Firefox(options = options)\n",
|
|
||||||
" driver.get('https://www.intensivregister.de/#/aktuelle-lage/downloads')\n",
|
|
||||||
" sleep(10)\n",
|
|
||||||
" innerHTML = driver.execute_script(\"return document.body.innerHTML\")\n",
|
|
||||||
" driver.quit()\n",
|
|
||||||
" return innerHTML\n",
|
|
||||||
"\n",
|
|
||||||
" def _asDataFrame(self, html, lastUpdatedColumn):\n",
|
|
||||||
" dataFrame = pd.read_html(html, parse_dates = [lastUpdatedColumn])[0]\n",
|
|
||||||
" dataFrame[lastUpdatedColumn] = pd.to_datetime(dataFrame[lastUpdatedColumn], format = \"%d.%m.%Y %H:%M Uhr\")\n",
|
|
||||||
" dataFrame.set_index('Name', inplace = True)\n",
|
|
||||||
" return dataFrame\n"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -831,9 +759,9 @@
|
|||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3.10.8 ('howbadismybatch-venv')",
|
"display_name": "howbadismybatch-venv-kernel",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "howbadismybatch-venv-kernel"
|
||||||
},
|
},
|
||||||
"language_info": {
|
"language_info": {
|
||||||
"codemirror_mode": {
|
"codemirror_mode": {
|
||||||
|
|||||||
Reference in New Issue
Block a user