Merge branch 'Download-VAERS-Data'
This commit is contained in:
@@ -78,6 +78,190 @@
|
|||||||
"print('needsUpdate:', needsUpdate)"
|
"print('needsUpdate:', needsUpdate)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "9514f5be",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Download VAERS-Data"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "8f0bfb9c",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"import time\n",
|
||||||
|
"\n",
|
||||||
|
"class AndroidEmulator:\n",
|
||||||
|
" \n",
|
||||||
|
" @staticmethod\n",
|
||||||
|
" def run(runnable):\n",
|
||||||
|
" AndroidEmulator._start()\n",
|
||||||
|
" result = runnable()\n",
|
||||||
|
" AndroidEmulator._stop()\n",
|
||||||
|
" return result\n",
|
||||||
|
" \n",
|
||||||
|
" @staticmethod\n",
|
||||||
|
" def _start():\n",
|
||||||
|
" os.system(\"/home/frankknoll/Android/Sdk/emulator/emulator -avd Pixel_2_API_30 -no-window &\")\n",
|
||||||
|
" AndroidEmulator._waitUntilStarted()\n",
|
||||||
|
" \n",
|
||||||
|
" @staticmethod\n",
|
||||||
|
" def _waitUntilStarted():\n",
|
||||||
|
" while not AndroidEmulator._isStarted():\n",
|
||||||
|
" time.sleep(1)\n",
|
||||||
|
"\n",
|
||||||
|
" @staticmethod\n",
|
||||||
|
" def _isStarted():\n",
|
||||||
|
" boot_completed = ! adb shell getprop sys.boot_completed\n",
|
||||||
|
" return boot_completed[0] == '1'\n",
|
||||||
|
"\n",
|
||||||
|
" @staticmethod\n",
|
||||||
|
" def _stop():\n",
|
||||||
|
" ! adb emu kill"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "777ff543",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from selenium import webdriver\n",
|
||||||
|
"from webdriver_manager.chrome import ChromeDriverManager\n",
|
||||||
|
"from selenium.webdriver.chrome.service import Service as ChromeService\n",
|
||||||
|
"from selenium.webdriver.chrome.options import Options\n",
|
||||||
|
"from selenium.webdriver.common.by import By\n",
|
||||||
|
"\n",
|
||||||
|
"def _getOptions(downloadDir, isHeadless):\n",
|
||||||
|
" options = Options()\n",
|
||||||
|
" options.headless = isHeadless\n",
|
||||||
|
" options.add_experimental_option(\"prefs\", {\"download.default_directory\" : downloadDir})\n",
|
||||||
|
" return options\n",
|
||||||
|
"\n",
|
||||||
|
"def getWebDriver(downloadDir, isHeadless):\n",
|
||||||
|
" return webdriver.Chrome(\n",
|
||||||
|
" service = ChromeService(executable_path = ChromeDriverManager().install()),\n",
|
||||||
|
" options = _getOptions(downloadDir, isHeadless))\n",
|
||||||
|
"\n",
|
||||||
|
"def saveCaptchaImageAs(driver, captchaImageFile):\n",
|
||||||
|
" captchaImage = driver.find_element(By.CSS_SELECTOR, \"img[src='captchaImage']\")\n",
|
||||||
|
" with open(captchaImageFile, 'wb') as file:\n",
|
||||||
|
" file.write(captchaImage.screenshot_as_png)\n",
|
||||||
|
"\n",
|
||||||
|
"def existsElementWithId(driver, id):\n",
|
||||||
|
" return len(driver.find_elements(By.ID, id)) > 0\n",
|
||||||
|
"\n",
|
||||||
|
"def isCaptchaSolved(driver):\n",
|
||||||
|
" return not existsElementWithId(driver, \"wordverify\")\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "da7c965a",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import time\n",
|
||||||
|
"import os.path\n",
|
||||||
|
"\n",
|
||||||
|
"def waitUntilDownloadHasFinished(file):\n",
|
||||||
|
" while not os.path.exists(file):\n",
|
||||||
|
" time.sleep(2)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "918d088d",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def getTextInCaptchaImage(captchaImageFile):\n",
|
||||||
|
" baseDir = \"~/AndroidStudioProjects/TextRecognizer\"\n",
|
||||||
|
" ! cp $captchaImageFile $baseDir/app/src/main/assets/captchas/captcha_image.jpeg\n",
|
||||||
|
" ! cd $baseDir;./gradlew connectedAndroidTest\n",
|
||||||
|
" textInCaptchaImage = ! adb shell \"run-as org.textrecognizer cat /data/data/org.textrecognizer/files/captcha_image.txt\"\n",
|
||||||
|
" return textInCaptchaImage[0]\n",
|
||||||
|
" \n",
|
||||||
|
"def solveCaptchaAndStartFileDownload(driver, captchaImageFile):\n",
|
||||||
|
" saveCaptchaImageAs(driver, captchaImageFile)\n",
|
||||||
|
" textInCaptchaImage = AndroidEmulator.run(lambda: getTextInCaptchaImage(captchaImageFile))\n",
|
||||||
|
" display('textInCaptchaImage: ', textInCaptchaImage)\n",
|
||||||
|
" driver.find_element(By.ID, \"verificationCode\").send_keys(textInCaptchaImage)\n",
|
||||||
|
" driver.find_element(By.CSS_SELECTOR, '[name=\"downloadbut\"]').click()\n",
|
||||||
|
"\n",
|
||||||
|
"# FK-TODO: file is part of absoluteFile, so remove file\n",
|
||||||
|
"def downloadFile(file, absoluteFile, driver, maxTries):\n",
|
||||||
|
" def _downloadFile():\n",
|
||||||
|
" driver.get('https://vaers.hhs.gov/eSubDownload/index.jsp?fn=' + file)\n",
|
||||||
|
" solveCaptchaAndStartFileDownload(driver, 'captchaImage.jpeg')\n",
|
||||||
|
"\n",
|
||||||
|
" numTries = 1\n",
|
||||||
|
" _downloadFile()\n",
|
||||||
|
" while(not isCaptchaSolved(driver) and (maxTries is None or numTries < maxTries)):\n",
|
||||||
|
" _downloadFile()\n",
|
||||||
|
" numTries = numTries + 1\n",
|
||||||
|
"\n",
|
||||||
|
" if isCaptchaSolved(driver):\n",
|
||||||
|
" waitUntilDownloadHasFinished(absoluteFile)\n",
|
||||||
|
" return absoluteFile\n",
|
||||||
|
" else:\n",
|
||||||
|
" return None\n",
|
||||||
|
"\n",
|
||||||
|
"def downloadVAERSFile(file, downloadDir):\n",
|
||||||
|
" driver = getWebDriver(downloadDir, isHeadless = False)\n",
|
||||||
|
" downloadedFile = downloadFile(\n",
|
||||||
|
" file = file,\n",
|
||||||
|
" absoluteFile = downloadDir + \"/\" + file,\n",
|
||||||
|
" driver = driver,\n",
|
||||||
|
" maxTries = None)\n",
|
||||||
|
" driver.quit()\n",
|
||||||
|
" return downloadedFile"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "9ccc73b3",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import zipfile\n",
|
||||||
|
"\n",
|
||||||
|
"def unzip(zipFile, dstDir):\n",
|
||||||
|
" with zipfile.ZipFile(zipFile, 'r') as zip_ref:\n",
|
||||||
|
" zip_ref.extractall(dstDir)\n",
|
||||||
|
"\n",
|
||||||
|
"def unzipAndRemove(zipFile, dstDir):\n",
|
||||||
|
" unzip(zipFile, dstDir)\n",
|
||||||
|
" os.remove(zipFile)\n",
|
||||||
|
"\n",
|
||||||
|
"def downloadVAERSFileAndUnzip(file):\n",
|
||||||
|
" downloadedFile = downloadVAERSFile(file, \"/home/frankknoll/Dokumente/Corona/projects/HowBadIsMyBatch/src/VAERS/tmp\")\n",
|
||||||
|
" unzipAndRemove(\n",
|
||||||
|
" zipFile = downloadedFile,\n",
|
||||||
|
" dstDir = '/home/frankknoll/Dokumente/Corona/projects/HowBadIsMyBatch/src/VAERS/')\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "9a9e4d1e",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"if needsUpdate:\n",
|
||||||
|
" downloadVAERSFileAndUnzip('2022VAERSData.zip')\n",
|
||||||
|
" downloadVAERSFileAndUnzip('NonDomesticVAERSData.zip')"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
|
|||||||
Reference in New Issue
Block a user