diff --git a/src/HowBadIsMyBatch.ipynb b/src/HowBadIsMyBatch.ipynb index 47b9af2513b..fbf9ed52e63 100644 --- a/src/HowBadIsMyBatch.ipynb +++ b/src/HowBadIsMyBatch.ipynb @@ -78,6 +78,190 @@ "print('needsUpdate:', needsUpdate)" ] }, + { + "cell_type": "markdown", + "id": "9514f5be", + "metadata": {}, + "source": [ + "## Download VAERS-Data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8f0bfb9c", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import time\n", + "\n", + "class AndroidEmulator:\n", + " \n", + " @staticmethod\n", + " def run(runnable):\n", + " AndroidEmulator._start()\n", + " result = runnable()\n", + " AndroidEmulator._stop()\n", + " return result\n", + " \n", + " @staticmethod\n", + " def _start():\n", + " os.system(\"/home/frankknoll/Android/Sdk/emulator/emulator -avd Pixel_2_API_30 -no-window &\")\n", + " AndroidEmulator._waitUntilStarted()\n", + " \n", + " @staticmethod\n", + " def _waitUntilStarted():\n", + " while not AndroidEmulator._isStarted():\n", + " time.sleep(1)\n", + "\n", + " @staticmethod\n", + " def _isStarted():\n", + " boot_completed = ! adb shell getprop sys.boot_completed\n", + " return boot_completed[0] == '1'\n", + "\n", + " @staticmethod\n", + " def _stop():\n", + " ! adb emu kill" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "777ff543", + "metadata": {}, + "outputs": [], + "source": [ + "from selenium import webdriver\n", + "from webdriver_manager.chrome import ChromeDriverManager\n", + "from selenium.webdriver.chrome.service import Service as ChromeService\n", + "from selenium.webdriver.chrome.options import Options\n", + "from selenium.webdriver.common.by import By\n", + "\n", + "def _getOptions(downloadDir, isHeadless):\n", + " options = Options()\n", + " options.headless = isHeadless\n", + " options.add_experimental_option(\"prefs\", {\"download.default_directory\" : downloadDir})\n", + " return options\n", + "\n", + "def getWebDriver(downloadDir, isHeadless):\n", + " return webdriver.Chrome(\n", + " service = ChromeService(executable_path = ChromeDriverManager().install()),\n", + " options = _getOptions(downloadDir, isHeadless))\n", + "\n", + "def saveCaptchaImageAs(driver, captchaImageFile):\n", + " captchaImage = driver.find_element(By.CSS_SELECTOR, \"img[src='captchaImage']\")\n", + " with open(captchaImageFile, 'wb') as file:\n", + " file.write(captchaImage.screenshot_as_png)\n", + "\n", + "def existsElementWithId(driver, id):\n", + " return len(driver.find_elements(By.ID, id)) > 0\n", + "\n", + "def isCaptchaSolved(driver):\n", + " return not existsElementWithId(driver, \"wordverify\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "da7c965a", + "metadata": {}, + "outputs": [], + "source": [ + "import time\n", + "import os.path\n", + "\n", + "def waitUntilDownloadHasFinished(file):\n", + " while not os.path.exists(file):\n", + " time.sleep(2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "918d088d", + "metadata": {}, + "outputs": [], + "source": [ + "def getTextInCaptchaImage(captchaImageFile):\n", + " baseDir = \"~/AndroidStudioProjects/TextRecognizer\"\n", + " ! cp $captchaImageFile $baseDir/app/src/main/assets/captchas/captcha_image.jpeg\n", + " ! cd $baseDir;./gradlew connectedAndroidTest\n", + " textInCaptchaImage = ! adb shell \"run-as org.textrecognizer cat /data/data/org.textrecognizer/files/captcha_image.txt\"\n", + " return textInCaptchaImage[0]\n", + " \n", + "def solveCaptchaAndStartFileDownload(driver, captchaImageFile):\n", + " saveCaptchaImageAs(driver, captchaImageFile)\n", + " textInCaptchaImage = AndroidEmulator.run(lambda: getTextInCaptchaImage(captchaImageFile))\n", + " display('textInCaptchaImage: ', textInCaptchaImage)\n", + " driver.find_element(By.ID, \"verificationCode\").send_keys(textInCaptchaImage)\n", + " driver.find_element(By.CSS_SELECTOR, '[name=\"downloadbut\"]').click()\n", + "\n", + "# FK-TODO: file is part of absoluteFile, so remove file\n", + "def downloadFile(file, absoluteFile, driver, maxTries):\n", + " def _downloadFile():\n", + " driver.get('https://vaers.hhs.gov/eSubDownload/index.jsp?fn=' + file)\n", + " solveCaptchaAndStartFileDownload(driver, 'captchaImage.jpeg')\n", + "\n", + " numTries = 1\n", + " _downloadFile()\n", + " while(not isCaptchaSolved(driver) and (maxTries is None or numTries < maxTries)):\n", + " _downloadFile()\n", + " numTries = numTries + 1\n", + "\n", + " if isCaptchaSolved(driver):\n", + " waitUntilDownloadHasFinished(absoluteFile)\n", + " return absoluteFile\n", + " else:\n", + " return None\n", + "\n", + "def downloadVAERSFile(file, downloadDir):\n", + " driver = getWebDriver(downloadDir, isHeadless = False)\n", + " downloadedFile = downloadFile(\n", + " file = file,\n", + " absoluteFile = downloadDir + \"/\" + file,\n", + " driver = driver,\n", + " maxTries = None)\n", + " driver.quit()\n", + " return downloadedFile" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9ccc73b3", + "metadata": {}, + "outputs": [], + "source": [ + "import zipfile\n", + "\n", + "def unzip(zipFile, dstDir):\n", + " with zipfile.ZipFile(zipFile, 'r') as zip_ref:\n", + " zip_ref.extractall(dstDir)\n", + "\n", + "def unzipAndRemove(zipFile, dstDir):\n", + " unzip(zipFile, dstDir)\n", + " os.remove(zipFile)\n", + "\n", + "def downloadVAERSFileAndUnzip(file):\n", + " downloadedFile = downloadVAERSFile(file, \"/home/frankknoll/Dokumente/Corona/projects/HowBadIsMyBatch/src/VAERS/tmp\")\n", + " unzipAndRemove(\n", + " zipFile = downloadedFile,\n", + " dstDir = '/home/frankknoll/Dokumente/Corona/projects/HowBadIsMyBatch/src/VAERS/')\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a9e4d1e", + "metadata": {}, + "outputs": [], + "source": [ + "if needsUpdate:\n", + " downloadVAERSFileAndUnzip('2022VAERSData.zip')\n", + " downloadVAERSFileAndUnzip('NonDomesticVAERSData.zip')" + ] + }, { "cell_type": "code", "execution_count": null,