From 6c67bd601a1ad3a96972954f99fe571e20f7ab34 Mon Sep 17 00:00:00 2001 From: frankknoll Date: Thu, 24 Mar 2022 10:30:27 +0100 Subject: [PATCH 1/3] refactoring --- src/HowBadIsMyBatch.ipynb | 57 +++++++++++++++++++++++---------------- 1 file changed, 34 insertions(+), 23 deletions(-) diff --git a/src/HowBadIsMyBatch.ipynb b/src/HowBadIsMyBatch.ipynb index b4f373d9c2f..7b9e84c10f1 100644 --- a/src/HowBadIsMyBatch.ipynb +++ b/src/HowBadIsMyBatch.ipynb @@ -26,34 +26,45 @@ "import re\n", "from dateutil.parser import parse\n", "\n", - "def needsUpdate():\n", - " lastUpdated = _getLastUpdated()\n", - " print(' lastUpdated:', lastUpdated)\n", - "\n", - " lastUpdatedDataSource = _getLastUpdatedDataSource()\n", - " print('lastUpdatedDataSource:', lastUpdatedDataSource)\n", - "\n", - " return lastUpdated < lastUpdatedDataSource\n", + "class DateProvider:\n", " \n", - "def _getLastUpdated():\n", - " return __getLastUpdated(\n", - " url = \"https://knollfrank.github.io/HowBadIsMyBatch/batchCodeTable.html\",\n", - " getDateStr = lambda soup: soup.find(id = \"last_updated\").text)\n", + " def needsUpdate(self):\n", + " lastUpdated = self._getLastUpdated()\n", + " print(' lastUpdated:', lastUpdated)\n", "\n", - "def _getLastUpdatedDataSource():\n", - " def getDateStr(soup):\n", - " lastUpdated = soup.find(string = re.compile(\"Last updated\"))\n", - " return re.search('Last updated: (.+).', lastUpdated).group(1)\n", + " lastUpdatedDataSource = self._getLastUpdatedDataSource()\n", + " print('lastUpdatedDataSource:', lastUpdatedDataSource)\n", "\n", - " return __getLastUpdated(url = \"https://vaers.hhs.gov/data/datasets.html\", getDateStr = getDateStr)\n", + " return lastUpdated < lastUpdatedDataSource\n", + " \n", + " def _getLastUpdated(self):\n", + " return self.__getLastUpdated(\n", + " url = \"https://knollfrank.github.io/HowBadIsMyBatch/batchCodeTable.html\",\n", + " getDateStr = lambda soup: soup.find(id = \"last_updated\").text)\n", "\n", - "def __getLastUpdated(url, getDateStr):\n", - " htmlContent = requests.get(url).text\n", - " soup = BeautifulSoup(htmlContent, \"lxml\")\n", - " dateStr = getDateStr(soup)\n", - " return parse(dateStr).date()\n", + " def _getLastUpdatedDataSource(self):\n", + " def getDateStr(soup):\n", + " lastUpdated = soup.find(string = re.compile(\"Last updated\"))\n", + " return re.search('Last updated: (.+).', lastUpdated).group(1)\n", "\n", - "print('needsUpdate:', needsUpdate())" + " return self.__getLastUpdated(url = \"https://vaers.hhs.gov/data/datasets.html\", getDateStr = getDateStr)\n", + "\n", + " def __getLastUpdated(self, url, getDateStr):\n", + " htmlContent = requests.get(url).text\n", + " soup = BeautifulSoup(htmlContent, \"lxml\")\n", + " dateStr = getDateStr(soup)\n", + " return parse(dateStr).date()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ffad1c04", + "metadata": {}, + "outputs": [], + "source": [ + "dateProvider = DateProvider()\n", + "print('needsUpdate:', dateProvider.needsUpdate())" ] }, { From 3f4c1bdc7b075cdf375736f2f2db74340d533821 Mon Sep 17 00:00:00 2001 From: frankknoll Date: Thu, 24 Mar 2022 10:43:18 +0100 Subject: [PATCH 2/3] refactoring --- src/HowBadIsMyBatch.ipynb | 43 +++++++++++-------- src/intensivstationen/Intensivstationen.ipynb | 2 +- 2 files changed, 27 insertions(+), 18 deletions(-) diff --git a/src/HowBadIsMyBatch.ipynb b/src/HowBadIsMyBatch.ipynb index 7b9e84c10f1..ffb29cfc145 100644 --- a/src/HowBadIsMyBatch.ipynb +++ b/src/HowBadIsMyBatch.ipynb @@ -28,26 +28,32 @@ "\n", "class DateProvider:\n", " \n", + " def __init__(self):\n", + " self.lastUpdated = None\n", + " self.lastUpdatedDataSource = None\n", + "\n", " def needsUpdate(self):\n", - " lastUpdated = self._getLastUpdated()\n", - " print(' lastUpdated:', lastUpdated)\n", - "\n", - " lastUpdatedDataSource = self._getLastUpdatedDataSource()\n", - " print('lastUpdatedDataSource:', lastUpdatedDataSource)\n", - "\n", - " return lastUpdated < lastUpdatedDataSource\n", + " return self.getLastUpdated() < self.getLastUpdatedDataSource()\n", " \n", - " def _getLastUpdated(self):\n", - " return self.__getLastUpdated(\n", - " url = \"https://knollfrank.github.io/HowBadIsMyBatch/batchCodeTable.html\",\n", - " getDateStr = lambda soup: soup.find(id = \"last_updated\").text)\n", + " def getLastUpdated(self):\n", + " if self.lastUpdated is None:\n", + " self.lastUpdated = self.__getLastUpdated(\n", + " url = \"https://knollfrank.github.io/HowBadIsMyBatch/batchCodeTable.html\",\n", + " getDateStr = lambda soup: soup.find(id = \"last_updated\").text)\n", + " \n", + " return self.lastUpdated\n", "\n", - " def _getLastUpdatedDataSource(self):\n", - " def getDateStr(soup):\n", - " lastUpdated = soup.find(string = re.compile(\"Last updated\"))\n", - " return re.search('Last updated: (.+).', lastUpdated).group(1)\n", + " def getLastUpdatedDataSource(self):\n", + " if self.lastUpdatedDataSource is None:\n", + " def getDateStr(soup):\n", + " lastUpdated = soup.find(string = re.compile(\"Last updated\"))\n", + " return re.search('Last updated: (.+).', lastUpdated).group(1)\n", "\n", - " return self.__getLastUpdated(url = \"https://vaers.hhs.gov/data/datasets.html\", getDateStr = getDateStr)\n", + " self.lastUpdatedDataSource = self.__getLastUpdated(\n", + " url = \"https://vaers.hhs.gov/data/datasets.html\",\n", + " getDateStr = getDateStr)\n", + "\n", + " return self.lastUpdatedDataSource\n", "\n", " def __getLastUpdated(self, url, getDateStr):\n", " htmlContent = requests.get(url).text\n", @@ -64,7 +70,10 @@ "outputs": [], "source": [ "dateProvider = DateProvider()\n", - "print('needsUpdate:', dateProvider.needsUpdate())" + "print(' lastUpdated:', dateProvider.getLastUpdated())\n", + "print('lastUpdatedDataSource:', dateProvider.getLastUpdatedDataSource()) \n", + "needsUpdate = dateProvider.needsUpdate()\n", + "print('needsUpdate:', needsUpdate)" ] }, { diff --git a/src/intensivstationen/Intensivstationen.ipynb b/src/intensivstationen/Intensivstationen.ipynb index 76c9933125d..b629eace8eb 100644 --- a/src/intensivstationen/Intensivstationen.ipynb +++ b/src/intensivstationen/Intensivstationen.ipynb @@ -118,7 +118,7 @@ "print(' lastUpdated:', dateProvider.getLastUpdated())\n", "print('lastUpdatedDataSource:', dateProvider.getLastUpdatedDataSource()) \n", "needsUpdate = dateProvider.needsUpdate()\n", - "print('needsUpdate: ', needsUpdate)" + "print('needsUpdate:', needsUpdate)" ] }, { From f83c1f298c1cd949acbb1775fa543838bc8d2627 Mon Sep 17 00:00:00 2001 From: frankknoll Date: Thu, 24 Mar 2022 11:30:06 +0100 Subject: [PATCH 3/3] refactoring --- src/HowBadIsMyBatch.ipynb | 44 ++++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/src/HowBadIsMyBatch.ipynb b/src/HowBadIsMyBatch.ipynb index ffb29cfc145..db1e05c0f8a 100644 --- a/src/HowBadIsMyBatch.ipynb +++ b/src/HowBadIsMyBatch.ipynb @@ -24,10 +24,12 @@ "from bs4 import BeautifulSoup\n", "import requests\n", "import re\n", - "from dateutil.parser import parse\n", + "from datetime import datetime\n", "\n", "class DateProvider:\n", " \n", + " DATE_FORMAT = \"%B %d, %Y\"\n", + "\n", " def __init__(self):\n", " self.lastUpdated = None\n", " self.lastUpdatedDataSource = None\n", @@ -59,7 +61,7 @@ " htmlContent = requests.get(url).text\n", " soup = BeautifulSoup(htmlContent, \"lxml\")\n", " dateStr = getDateStr(soup)\n", - " return parse(dateStr).date()" + " return datetime.strptime(dateStr, DateProvider.DATE_FORMAT)" ] }, { @@ -459,6 +461,23 @@ " 'lxml'))\n" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "f02dddfe", + "metadata": {}, + "outputs": [], + "source": [ + "def saveLastUpdatedBatchCodeTable(lastUpdated):\n", + " def setLastUpdated(soup):\n", + " soup.find(id = \"last_updated\").string.replace_with(lastUpdated.strftime(DateProvider.DATE_FORMAT))\n", + " return soup\n", + "\n", + " HtmlTransformerUtil().applySoupTransformerToFile(\n", + " file = \"../docs/batchCodeTable.html\",\n", + " soupTransformer = setLastUpdated)" + ] + }, { "cell_type": "code", "execution_count": null, @@ -953,16 +972,7 @@ "metadata": {}, "outputs": [], "source": [ - "countries = sorted(internationalVaersCovid19['COUNTRY'].unique())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1c518028", - "metadata": {}, - "outputs": [], - "source": [ + "countries = sorted(internationalVaersCovid19['COUNTRY'].unique())\n", "countryOptions = [''] + getCountryOptions(countries)" ] }, @@ -976,6 +986,16 @@ "saveCountryOptions(countryOptions)" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "9c7485b5", + "metadata": {}, + "outputs": [], + "source": [ + "saveLastUpdatedBatchCodeTable(dateProvider.getLastUpdatedDataSource())" + ] + }, { "cell_type": "code", "execution_count": null,