Merge branch 'main' into pages

This commit is contained in:
frankknoll
2022-03-24 11:35:09 +01:00
2 changed files with 75 additions and 35 deletions

View File

@@ -24,36 +24,58 @@
"from bs4 import BeautifulSoup\n", "from bs4 import BeautifulSoup\n",
"import requests\n", "import requests\n",
"import re\n", "import re\n",
"from dateutil.parser import parse\n", "from datetime import datetime\n",
"\n", "\n",
"def needsUpdate():\n", "class DateProvider:\n",
" lastUpdated = _getLastUpdated()\n",
" print(' lastUpdated:', lastUpdated)\n",
" \n", " \n",
" lastUpdatedDataSource = _getLastUpdatedDataSource()\n", " DATE_FORMAT = \"%B %d, %Y\"\n",
" print('lastUpdatedDataSource:', lastUpdatedDataSource)\n",
"\n", "\n",
" return lastUpdated < lastUpdatedDataSource\n", " def __init__(self):\n",
" self.lastUpdated = None\n",
" self.lastUpdatedDataSource = None\n",
"\n", "\n",
"def _getLastUpdated():\n", " def needsUpdate(self):\n",
" return __getLastUpdated(\n", " return self.getLastUpdated() < self.getLastUpdatedDataSource()\n",
" \n",
" def getLastUpdated(self):\n",
" if self.lastUpdated is None:\n",
" self.lastUpdated = self.__getLastUpdated(\n",
" url = \"https://knollfrank.github.io/HowBadIsMyBatch/batchCodeTable.html\",\n", " url = \"https://knollfrank.github.io/HowBadIsMyBatch/batchCodeTable.html\",\n",
" getDateStr = lambda soup: soup.find(id = \"last_updated\").text)\n", " getDateStr = lambda soup: soup.find(id = \"last_updated\").text)\n",
" \n", " \n",
"def _getLastUpdatedDataSource():\n", " return self.lastUpdated\n",
"\n",
" def getLastUpdatedDataSource(self):\n",
" if self.lastUpdatedDataSource is None:\n",
" def getDateStr(soup):\n", " def getDateStr(soup):\n",
" lastUpdated = soup.find(string = re.compile(\"Last updated\"))\n", " lastUpdated = soup.find(string = re.compile(\"Last updated\"))\n",
" return re.search('Last updated: (.+).', lastUpdated).group(1)\n", " return re.search('Last updated: (.+).', lastUpdated).group(1)\n",
"\n", "\n",
" return __getLastUpdated(url = \"https://vaers.hhs.gov/data/datasets.html\", getDateStr = getDateStr)\n", " self.lastUpdatedDataSource = self.__getLastUpdated(\n",
" url = \"https://vaers.hhs.gov/data/datasets.html\",\n",
" getDateStr = getDateStr)\n",
"\n", "\n",
"def __getLastUpdated(url, getDateStr):\n", " return self.lastUpdatedDataSource\n",
"\n",
" def __getLastUpdated(self, url, getDateStr):\n",
" htmlContent = requests.get(url).text\n", " htmlContent = requests.get(url).text\n",
" soup = BeautifulSoup(htmlContent, \"lxml\")\n", " soup = BeautifulSoup(htmlContent, \"lxml\")\n",
" dateStr = getDateStr(soup)\n", " dateStr = getDateStr(soup)\n",
" return parse(dateStr).date()\n", " return datetime.strptime(dateStr, DateProvider.DATE_FORMAT)"
"\n", ]
"print('needsUpdate:', needsUpdate())" },
{
"cell_type": "code",
"execution_count": null,
"id": "ffad1c04",
"metadata": {},
"outputs": [],
"source": [
"dateProvider = DateProvider()\n",
"print(' lastUpdated:', dateProvider.getLastUpdated())\n",
"print('lastUpdatedDataSource:', dateProvider.getLastUpdatedDataSource()) \n",
"needsUpdate = dateProvider.needsUpdate()\n",
"print('needsUpdate:', needsUpdate)"
] ]
}, },
{ {
@@ -439,6 +461,23 @@
" 'lxml'))\n" " 'lxml'))\n"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"id": "f02dddfe",
"metadata": {},
"outputs": [],
"source": [
"def saveLastUpdatedBatchCodeTable(lastUpdated):\n",
" def setLastUpdated(soup):\n",
" soup.find(id = \"last_updated\").string.replace_with(lastUpdated.strftime(DateProvider.DATE_FORMAT))\n",
" return soup\n",
"\n",
" HtmlTransformerUtil().applySoupTransformerToFile(\n",
" file = \"../docs/batchCodeTable.html\",\n",
" soupTransformer = setLastUpdated)"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
@@ -933,16 +972,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"countries = sorted(internationalVaersCovid19['COUNTRY'].unique())" "countries = sorted(internationalVaersCovid19['COUNTRY'].unique())\n",
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1c518028",
"metadata": {},
"outputs": [],
"source": [
"countryOptions = ['<option value=\"Global\" selected>Global</option>'] + getCountryOptions(countries)" "countryOptions = ['<option value=\"Global\" selected>Global</option>'] + getCountryOptions(countries)"
] ]
}, },
@@ -956,6 +986,16 @@
"saveCountryOptions(countryOptions)" "saveCountryOptions(countryOptions)"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"id": "9c7485b5",
"metadata": {},
"outputs": [],
"source": [
"saveLastUpdatedBatchCodeTable(dateProvider.getLastUpdatedDataSource())"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,