diff --git a/src/HowBadIsMyBatch.ipynb b/src/HowBadIsMyBatch.ipynb index b4f373d9c2f..db1e05c0f8a 100644 --- a/src/HowBadIsMyBatch.ipynb +++ b/src/HowBadIsMyBatch.ipynb @@ -24,36 +24,58 @@ "from bs4 import BeautifulSoup\n", "import requests\n", "import re\n", - "from dateutil.parser import parse\n", + "from datetime import datetime\n", "\n", - "def needsUpdate():\n", - " lastUpdated = _getLastUpdated()\n", - " print(' lastUpdated:', lastUpdated)\n", - "\n", - " lastUpdatedDataSource = _getLastUpdatedDataSource()\n", - " print('lastUpdatedDataSource:', lastUpdatedDataSource)\n", - "\n", - " return lastUpdated < lastUpdatedDataSource\n", + "class DateProvider:\n", " \n", - "def _getLastUpdated():\n", - " return __getLastUpdated(\n", - " url = \"https://knollfrank.github.io/HowBadIsMyBatch/batchCodeTable.html\",\n", - " getDateStr = lambda soup: soup.find(id = \"last_updated\").text)\n", + " DATE_FORMAT = \"%B %d, %Y\"\n", "\n", - "def _getLastUpdatedDataSource():\n", - " def getDateStr(soup):\n", - " lastUpdated = soup.find(string = re.compile(\"Last updated\"))\n", - " return re.search('Last updated: (.+).', lastUpdated).group(1)\n", + " def __init__(self):\n", + " self.lastUpdated = None\n", + " self.lastUpdatedDataSource = None\n", "\n", - " return __getLastUpdated(url = \"https://vaers.hhs.gov/data/datasets.html\", getDateStr = getDateStr)\n", + " def needsUpdate(self):\n", + " return self.getLastUpdated() < self.getLastUpdatedDataSource()\n", + " \n", + " def getLastUpdated(self):\n", + " if self.lastUpdated is None:\n", + " self.lastUpdated = self.__getLastUpdated(\n", + " url = \"https://knollfrank.github.io/HowBadIsMyBatch/batchCodeTable.html\",\n", + " getDateStr = lambda soup: soup.find(id = \"last_updated\").text)\n", + " \n", + " return self.lastUpdated\n", "\n", - "def __getLastUpdated(url, getDateStr):\n", - " htmlContent = requests.get(url).text\n", - " soup = BeautifulSoup(htmlContent, \"lxml\")\n", - " dateStr = getDateStr(soup)\n", - " return parse(dateStr).date()\n", + " def getLastUpdatedDataSource(self):\n", + " if self.lastUpdatedDataSource is None:\n", + " def getDateStr(soup):\n", + " lastUpdated = soup.find(string = re.compile(\"Last updated\"))\n", + " return re.search('Last updated: (.+).', lastUpdated).group(1)\n", "\n", - "print('needsUpdate:', needsUpdate())" + " self.lastUpdatedDataSource = self.__getLastUpdated(\n", + " url = \"https://vaers.hhs.gov/data/datasets.html\",\n", + " getDateStr = getDateStr)\n", + "\n", + " return self.lastUpdatedDataSource\n", + "\n", + " def __getLastUpdated(self, url, getDateStr):\n", + " htmlContent = requests.get(url).text\n", + " soup = BeautifulSoup(htmlContent, \"lxml\")\n", + " dateStr = getDateStr(soup)\n", + " return datetime.strptime(dateStr, DateProvider.DATE_FORMAT)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ffad1c04", + "metadata": {}, + "outputs": [], + "source": [ + "dateProvider = DateProvider()\n", + "print(' lastUpdated:', dateProvider.getLastUpdated())\n", + "print('lastUpdatedDataSource:', dateProvider.getLastUpdatedDataSource()) \n", + "needsUpdate = dateProvider.needsUpdate()\n", + "print('needsUpdate:', needsUpdate)" ] }, { @@ -439,6 +461,23 @@ " 'lxml'))\n" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "f02dddfe", + "metadata": {}, + "outputs": [], + "source": [ + "def saveLastUpdatedBatchCodeTable(lastUpdated):\n", + " def setLastUpdated(soup):\n", + " soup.find(id = \"last_updated\").string.replace_with(lastUpdated.strftime(DateProvider.DATE_FORMAT))\n", + " return soup\n", + "\n", + " HtmlTransformerUtil().applySoupTransformerToFile(\n", + " file = \"../docs/batchCodeTable.html\",\n", + " soupTransformer = setLastUpdated)" + ] + }, { "cell_type": "code", "execution_count": null, @@ -933,16 +972,7 @@ "metadata": {}, "outputs": [], "source": [ - "countries = sorted(internationalVaersCovid19['COUNTRY'].unique())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1c518028", - "metadata": {}, - "outputs": [], - "source": [ + "countries = sorted(internationalVaersCovid19['COUNTRY'].unique())\n", "countryOptions = [''] + getCountryOptions(countries)" ] }, @@ -956,6 +986,16 @@ "saveCountryOptions(countryOptions)" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "9c7485b5", + "metadata": {}, + "outputs": [], + "source": [ + "saveLastUpdatedBatchCodeTable(dateProvider.getLastUpdatedDataSource())" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/src/intensivstationen/Intensivstationen.ipynb b/src/intensivstationen/Intensivstationen.ipynb index 76c9933125d..b629eace8eb 100644 --- a/src/intensivstationen/Intensivstationen.ipynb +++ b/src/intensivstationen/Intensivstationen.ipynb @@ -118,7 +118,7 @@ "print(' lastUpdated:', dateProvider.getLastUpdated())\n", "print('lastUpdatedDataSource:', dateProvider.getLastUpdatedDataSource()) \n", "needsUpdate = dateProvider.needsUpdate()\n", - "print('needsUpdate: ', needsUpdate)" + "print('needsUpdate:', needsUpdate)" ] }, {