refactoring

This commit is contained in:
frankknoll
2022-11-22 12:40:26 +01:00
parent 134a133da1
commit e87fe0c8ba
21 changed files with 692 additions and 731 deletions

42
src/DateProvider.py Normal file
View File

@@ -0,0 +1,42 @@
from bs4 import BeautifulSoup
import requests
import re
from datetime import datetime
class DateProvider:
DATE_FORMAT = "%B %d, %Y"
def __init__(self):
self.lastUpdated = None
self.lastUpdatedDataSource = None
def needsUpdate(self):
return self.getLastUpdated() < self.getLastUpdatedDataSource()
def getLastUpdated(self):
if self.lastUpdated is None:
self.lastUpdated = self.__getLastUpdated(
url="https://knollfrank.github.io/HowBadIsMyBatch/batchCodeTable.html",
getDateStr=lambda soup: soup.find(id="last_updated").text)
return self.lastUpdated
def getLastUpdatedDataSource(self):
if self.lastUpdatedDataSource is None:
def getDateStr(soup):
lastUpdated = soup.find(string=re.compile("Last updated"))
return re.search('Last updated: (.+).', lastUpdated).group(1)
self.lastUpdatedDataSource = self.__getLastUpdated(
url="https://vaers.hhs.gov/data/datasets.html",
getDateStr=getDateStr)
return self.lastUpdatedDataSource
def __getLastUpdated(self, url, getDateStr):
htmlContent = requests.get(url).text
soup = BeautifulSoup(htmlContent, "lxml")
dateStr = getDateStr(soup)
return datetime.strptime(dateStr, DateProvider.DATE_FORMAT)