refactoring

This commit is contained in:
frankknoll
2023-01-17 16:04:38 +01:00
parent 9b3f2ae559
commit e581bf759c
2 changed files with 15 additions and 4 deletions

View File

@@ -15,6 +15,7 @@ dependencies:
- tensorflow - tensorflow
- nb_conda_kernels - nb_conda_kernels
- pillow - pillow
- python-decouple
# - selenium # - selenium
# - webdriver-manager # - webdriver-manager
# - pycountry # - pycountry

View File

@@ -3,7 +3,9 @@ import requests
from datetime import datetime from datetime import datetime
from time import sleep from time import sleep
from selenium import webdriver from selenium import webdriver
from selenium.webdriver.firefox.options import Options from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.chrome.options import Options
import pandas as pd import pandas as pd
class DateProvider: class DateProvider:
@@ -39,15 +41,23 @@ class DateProvider:
options = Options() options = Options()
options.headless = True options.headless = True
options.add_argument("-profile") options.add_argument("-profile")
# put the root directory your default profile path here, you can check it by opening Firefox and then pasting 'about:profiles' into the url field driver = self._getWebDriver()
options.add_argument("/home/frankknoll/snap/firefox/common/.mozilla/firefox/1j6r2yp6.default")
driver = webdriver.Firefox(options = options)
driver.get('https://www.intensivregister.de/#/aktuelle-lage/downloads') driver.get('https://www.intensivregister.de/#/aktuelle-lage/downloads')
sleep(10) sleep(10)
innerHTML = driver.execute_script("return document.body.innerHTML") innerHTML = driver.execute_script("return document.body.innerHTML")
driver.quit() driver.quit()
return innerHTML return innerHTML
def _getWebDriver(self):
return webdriver.Chrome(
service = ChromeService(executable_path = ChromeDriverManager().install()),
options = self._getOptions())
def _getOptions(self):
options = Options()
options.headless = True
return options
def _asDataFrame(self, html, lastUpdatedColumn): def _asDataFrame(self, html, lastUpdatedColumn):
dataFrame = pd.read_html(html, parse_dates = [lastUpdatedColumn])[0] dataFrame = pd.read_html(html, parse_dates = [lastUpdatedColumn])[0]
dataFrame[lastUpdatedColumn] = pd.to_datetime(dataFrame[lastUpdatedColumn], format = "%d.%m.%Y %H:%M Uhr") dataFrame[lastUpdatedColumn] = pd.to_datetime(dataFrame[lastUpdatedColumn], format = "%d.%m.%Y %H:%M Uhr")