Merge branch 'Download-VAERS-Data'

This commit is contained in:
frankknoll
2022-05-13 20:02:35 +02:00

View File

@@ -78,6 +78,190 @@
"print('needsUpdate:', needsUpdate)" "print('needsUpdate:', needsUpdate)"
] ]
}, },
{
"cell_type": "markdown",
"id": "9514f5be",
"metadata": {},
"source": [
"## Download VAERS-Data"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8f0bfb9c",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import time\n",
"\n",
"class AndroidEmulator:\n",
" \n",
" @staticmethod\n",
" def run(runnable):\n",
" AndroidEmulator._start()\n",
" result = runnable()\n",
" AndroidEmulator._stop()\n",
" return result\n",
" \n",
" @staticmethod\n",
" def _start():\n",
" os.system(\"/home/frankknoll/Android/Sdk/emulator/emulator -avd Pixel_2_API_30 -no-window &\")\n",
" AndroidEmulator._waitUntilStarted()\n",
" \n",
" @staticmethod\n",
" def _waitUntilStarted():\n",
" while not AndroidEmulator._isStarted():\n",
" time.sleep(1)\n",
"\n",
" @staticmethod\n",
" def _isStarted():\n",
" boot_completed = ! adb shell getprop sys.boot_completed\n",
" return boot_completed[0] == '1'\n",
"\n",
" @staticmethod\n",
" def _stop():\n",
" ! adb emu kill"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "777ff543",
"metadata": {},
"outputs": [],
"source": [
"from selenium import webdriver\n",
"from webdriver_manager.chrome import ChromeDriverManager\n",
"from selenium.webdriver.chrome.service import Service as ChromeService\n",
"from selenium.webdriver.chrome.options import Options\n",
"from selenium.webdriver.common.by import By\n",
"\n",
"def _getOptions(downloadDir, isHeadless):\n",
" options = Options()\n",
" options.headless = isHeadless\n",
" options.add_experimental_option(\"prefs\", {\"download.default_directory\" : downloadDir})\n",
" return options\n",
"\n",
"def getWebDriver(downloadDir, isHeadless):\n",
" return webdriver.Chrome(\n",
" service = ChromeService(executable_path = ChromeDriverManager().install()),\n",
" options = _getOptions(downloadDir, isHeadless))\n",
"\n",
"def saveCaptchaImageAs(driver, captchaImageFile):\n",
" captchaImage = driver.find_element(By.CSS_SELECTOR, \"img[src='captchaImage']\")\n",
" with open(captchaImageFile, 'wb') as file:\n",
" file.write(captchaImage.screenshot_as_png)\n",
"\n",
"def existsElementWithId(driver, id):\n",
" return len(driver.find_elements(By.ID, id)) > 0\n",
"\n",
"def isCaptchaSolved(driver):\n",
" return not existsElementWithId(driver, \"wordverify\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "da7c965a",
"metadata": {},
"outputs": [],
"source": [
"import time\n",
"import os.path\n",
"\n",
"def waitUntilDownloadHasFinished(file):\n",
" while not os.path.exists(file):\n",
" time.sleep(2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "918d088d",
"metadata": {},
"outputs": [],
"source": [
"def getTextInCaptchaImage(captchaImageFile):\n",
" baseDir = \"~/AndroidStudioProjects/TextRecognizer\"\n",
" ! cp $captchaImageFile $baseDir/app/src/main/assets/captchas/captcha_image.jpeg\n",
" ! cd $baseDir;./gradlew connectedAndroidTest\n",
" textInCaptchaImage = ! adb shell \"run-as org.textrecognizer cat /data/data/org.textrecognizer/files/captcha_image.txt\"\n",
" return textInCaptchaImage[0]\n",
" \n",
"def solveCaptchaAndStartFileDownload(driver, captchaImageFile):\n",
" saveCaptchaImageAs(driver, captchaImageFile)\n",
" textInCaptchaImage = AndroidEmulator.run(lambda: getTextInCaptchaImage(captchaImageFile))\n",
" display('textInCaptchaImage: ', textInCaptchaImage)\n",
" driver.find_element(By.ID, \"verificationCode\").send_keys(textInCaptchaImage)\n",
" driver.find_element(By.CSS_SELECTOR, '[name=\"downloadbut\"]').click()\n",
"\n",
"# FK-TODO: file is part of absoluteFile, so remove file\n",
"def downloadFile(file, absoluteFile, driver, maxTries):\n",
" def _downloadFile():\n",
" driver.get('https://vaers.hhs.gov/eSubDownload/index.jsp?fn=' + file)\n",
" solveCaptchaAndStartFileDownload(driver, 'captchaImage.jpeg')\n",
"\n",
" numTries = 1\n",
" _downloadFile()\n",
" while(not isCaptchaSolved(driver) and (maxTries is None or numTries < maxTries)):\n",
" _downloadFile()\n",
" numTries = numTries + 1\n",
"\n",
" if isCaptchaSolved(driver):\n",
" waitUntilDownloadHasFinished(absoluteFile)\n",
" return absoluteFile\n",
" else:\n",
" return None\n",
"\n",
"def downloadVAERSFile(file, downloadDir):\n",
" driver = getWebDriver(downloadDir, isHeadless = False)\n",
" downloadedFile = downloadFile(\n",
" file = file,\n",
" absoluteFile = downloadDir + \"/\" + file,\n",
" driver = driver,\n",
" maxTries = None)\n",
" driver.quit()\n",
" return downloadedFile"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9ccc73b3",
"metadata": {},
"outputs": [],
"source": [
"import zipfile\n",
"\n",
"def unzip(zipFile, dstDir):\n",
" with zipfile.ZipFile(zipFile, 'r') as zip_ref:\n",
" zip_ref.extractall(dstDir)\n",
"\n",
"def unzipAndRemove(zipFile, dstDir):\n",
" unzip(zipFile, dstDir)\n",
" os.remove(zipFile)\n",
"\n",
"def downloadVAERSFileAndUnzip(file):\n",
" downloadedFile = downloadVAERSFile(file, \"/home/frankknoll/Dokumente/Corona/projects/HowBadIsMyBatch/src/VAERS/tmp\")\n",
" unzipAndRemove(\n",
" zipFile = downloadedFile,\n",
" dstDir = '/home/frankknoll/Dokumente/Corona/projects/HowBadIsMyBatch/src/VAERS/')\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9a9e4d1e",
"metadata": {},
"outputs": [],
"source": [
"if needsUpdate:\n",
" downloadVAERSFileAndUnzip('2022VAERSData.zip')\n",
" downloadVAERSFileAndUnzip('NonDomesticVAERSData.zip')"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,