Merge branch 'main' into pages
This commit is contained in:
@@ -24,36 +24,58 @@
|
|||||||
"from bs4 import BeautifulSoup\n",
|
"from bs4 import BeautifulSoup\n",
|
||||||
"import requests\n",
|
"import requests\n",
|
||||||
"import re\n",
|
"import re\n",
|
||||||
"from dateutil.parser import parse\n",
|
"from datetime import datetime\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def needsUpdate():\n",
|
"class DateProvider:\n",
|
||||||
" lastUpdated = _getLastUpdated()\n",
|
|
||||||
" print(' lastUpdated:', lastUpdated)\n",
|
|
||||||
"\n",
|
|
||||||
" lastUpdatedDataSource = _getLastUpdatedDataSource()\n",
|
|
||||||
" print('lastUpdatedDataSource:', lastUpdatedDataSource)\n",
|
|
||||||
"\n",
|
|
||||||
" return lastUpdated < lastUpdatedDataSource\n",
|
|
||||||
" \n",
|
" \n",
|
||||||
"def _getLastUpdated():\n",
|
" DATE_FORMAT = \"%B %d, %Y\"\n",
|
||||||
" return __getLastUpdated(\n",
|
|
||||||
" url = \"https://knollfrank.github.io/HowBadIsMyBatch/batchCodeTable.html\",\n",
|
|
||||||
" getDateStr = lambda soup: soup.find(id = \"last_updated\").text)\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"def _getLastUpdatedDataSource():\n",
|
" def __init__(self):\n",
|
||||||
" def getDateStr(soup):\n",
|
" self.lastUpdated = None\n",
|
||||||
" lastUpdated = soup.find(string = re.compile(\"Last updated\"))\n",
|
" self.lastUpdatedDataSource = None\n",
|
||||||
" return re.search('Last updated: (.+).', lastUpdated).group(1)\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
" return __getLastUpdated(url = \"https://vaers.hhs.gov/data/datasets.html\", getDateStr = getDateStr)\n",
|
" def needsUpdate(self):\n",
|
||||||
|
" return self.getLastUpdated() < self.getLastUpdatedDataSource()\n",
|
||||||
|
" \n",
|
||||||
|
" def getLastUpdated(self):\n",
|
||||||
|
" if self.lastUpdated is None:\n",
|
||||||
|
" self.lastUpdated = self.__getLastUpdated(\n",
|
||||||
|
" url = \"https://knollfrank.github.io/HowBadIsMyBatch/batchCodeTable.html\",\n",
|
||||||
|
" getDateStr = lambda soup: soup.find(id = \"last_updated\").text)\n",
|
||||||
|
" \n",
|
||||||
|
" return self.lastUpdated\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def __getLastUpdated(url, getDateStr):\n",
|
" def getLastUpdatedDataSource(self):\n",
|
||||||
" htmlContent = requests.get(url).text\n",
|
" if self.lastUpdatedDataSource is None:\n",
|
||||||
" soup = BeautifulSoup(htmlContent, \"lxml\")\n",
|
" def getDateStr(soup):\n",
|
||||||
" dateStr = getDateStr(soup)\n",
|
" lastUpdated = soup.find(string = re.compile(\"Last updated\"))\n",
|
||||||
" return parse(dateStr).date()\n",
|
" return re.search('Last updated: (.+).', lastUpdated).group(1)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"print('needsUpdate:', needsUpdate())"
|
" self.lastUpdatedDataSource = self.__getLastUpdated(\n",
|
||||||
|
" url = \"https://vaers.hhs.gov/data/datasets.html\",\n",
|
||||||
|
" getDateStr = getDateStr)\n",
|
||||||
|
"\n",
|
||||||
|
" return self.lastUpdatedDataSource\n",
|
||||||
|
"\n",
|
||||||
|
" def __getLastUpdated(self, url, getDateStr):\n",
|
||||||
|
" htmlContent = requests.get(url).text\n",
|
||||||
|
" soup = BeautifulSoup(htmlContent, \"lxml\")\n",
|
||||||
|
" dateStr = getDateStr(soup)\n",
|
||||||
|
" return datetime.strptime(dateStr, DateProvider.DATE_FORMAT)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "ffad1c04",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"dateProvider = DateProvider()\n",
|
||||||
|
"print(' lastUpdated:', dateProvider.getLastUpdated())\n",
|
||||||
|
"print('lastUpdatedDataSource:', dateProvider.getLastUpdatedDataSource()) \n",
|
||||||
|
"needsUpdate = dateProvider.needsUpdate()\n",
|
||||||
|
"print('needsUpdate:', needsUpdate)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -439,6 +461,23 @@
|
|||||||
" 'lxml'))\n"
|
" 'lxml'))\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "f02dddfe",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def saveLastUpdatedBatchCodeTable(lastUpdated):\n",
|
||||||
|
" def setLastUpdated(soup):\n",
|
||||||
|
" soup.find(id = \"last_updated\").string.replace_with(lastUpdated.strftime(DateProvider.DATE_FORMAT))\n",
|
||||||
|
" return soup\n",
|
||||||
|
"\n",
|
||||||
|
" HtmlTransformerUtil().applySoupTransformerToFile(\n",
|
||||||
|
" file = \"../docs/batchCodeTable.html\",\n",
|
||||||
|
" soupTransformer = setLastUpdated)"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
@@ -933,16 +972,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"countries = sorted(internationalVaersCovid19['COUNTRY'].unique())"
|
"countries = sorted(internationalVaersCovid19['COUNTRY'].unique())\n",
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "1c518028",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"countryOptions = ['<option value=\"Global\" selected>Global</option>'] + getCountryOptions(countries)"
|
"countryOptions = ['<option value=\"Global\" selected>Global</option>'] + getCountryOptions(countries)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -956,6 +986,16 @@
|
|||||||
"saveCountryOptions(countryOptions)"
|
"saveCountryOptions(countryOptions)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "9c7485b5",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"saveLastUpdatedBatchCodeTable(dateProvider.getLastUpdatedDataSource())"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
|
|||||||
@@ -118,7 +118,7 @@
|
|||||||
"print(' lastUpdated:', dateProvider.getLastUpdated())\n",
|
"print(' lastUpdated:', dateProvider.getLastUpdated())\n",
|
||||||
"print('lastUpdatedDataSource:', dateProvider.getLastUpdatedDataSource()) \n",
|
"print('lastUpdatedDataSource:', dateProvider.getLastUpdatedDataSource()) \n",
|
||||||
"needsUpdate = dateProvider.needsUpdate()\n",
|
"needsUpdate = dateProvider.needsUpdate()\n",
|
||||||
"print('needsUpdate: ', needsUpdate)"
|
"print('needsUpdate:', needsUpdate)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
Reference in New Issue
Block a user