downloading VAERSBeforeDeletion.7z
This commit is contained in:
7
.github/workflows/buildAndDeployWebsite.yml
vendored
7
.github/workflows/buildAndDeployWebsite.yml
vendored
@@ -25,19 +25,20 @@ jobs:
|
|||||||
- name: Installing Google Chrome
|
- name: Installing Google Chrome
|
||||||
shell: bash -el {0}
|
shell: bash -el {0}
|
||||||
run: |
|
run: |
|
||||||
|
conda activate howbadismybatch-venv
|
||||||
wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb
|
wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb
|
||||||
sudo dpkg -i google-chrome-stable_current_amd64.deb
|
sudo dpkg -i google-chrome-stable_current_amd64.deb
|
||||||
sudo apt install google-chrome-stable
|
sudo apt install google-chrome-stable
|
||||||
pip install selenium webdriver-manager
|
pip install --upgrade selenium webdriver-manager
|
||||||
pip install pycountry
|
|
||||||
pip install python-decouple
|
|
||||||
- name: Installing ipython kernel
|
- name: Installing ipython kernel
|
||||||
shell: bash -el {0}
|
shell: bash -el {0}
|
||||||
run: |
|
run: |
|
||||||
|
conda activate howbadismybatch-venv
|
||||||
ipython kernel install --user --name=howbadismybatch-venv-kernel
|
ipython kernel install --user --name=howbadismybatch-venv-kernel
|
||||||
- name: Executing HowBadIsMyBatch.ipynb
|
- name: Executing HowBadIsMyBatch.ipynb
|
||||||
shell: bash -el {0}
|
shell: bash -el {0}
|
||||||
run: |
|
run: |
|
||||||
|
conda activate howbadismybatch-venv
|
||||||
cd src
|
cd src
|
||||||
jupyter nbconvert --to script HowBadIsMyBatch.ipynb
|
jupyter nbconvert --to script HowBadIsMyBatch.ipynb
|
||||||
python ./HowBadIsMyBatch.py
|
python ./HowBadIsMyBatch.py
|
||||||
|
|||||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -17,3 +17,4 @@ google-chrome-stable_current_amd64*
|
|||||||
src/captcha/__pycache__
|
src/captcha/__pycache__
|
||||||
src/GoogleAnalytics/__pycache__
|
src/GoogleAnalytics/__pycache__
|
||||||
src/SymptomsCausedByVaccines/__pycache__
|
src/SymptomsCausedByVaccines/__pycache__
|
||||||
|
src/HowBadIsMyBatch.py
|
||||||
|
|||||||
@@ -9,6 +9,8 @@ dependencies:
|
|||||||
- pandas=1.4.0
|
- pandas=1.4.0
|
||||||
- urllib3
|
- urllib3
|
||||||
- requests
|
- requests
|
||||||
|
- gdown
|
||||||
|
- py7zr
|
||||||
- bs4
|
- bs4
|
||||||
- lxml
|
- lxml
|
||||||
- jupyter
|
- jupyter
|
||||||
|
|||||||
17
src/GoogleDriveDownloader.py
Normal file
17
src/GoogleDriveDownloader.py
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
import gdown
|
||||||
|
import py7zr
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
class GoogleDriveDownloader:
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def downloadIfNotYetDownloaded(remoteSrcFile, localDstFile):
|
||||||
|
if not os.path.exists(localDstFile):
|
||||||
|
gdown.download(url = remoteSrcFile, output = localDstFile, fuzzy = True)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def downloadSevenZipFileAndExtract(remoteSevenZipSrcFile, localSevenZipDstFile):
|
||||||
|
GoogleDriveDownloader.downloadIfNotYetDownloaded(remoteSevenZipSrcFile, localSevenZipDstFile);
|
||||||
|
with py7zr.SevenZipFile(localSevenZipDstFile, mode='r') as sevenZipFile:
|
||||||
|
sevenZipFile.extractall(path = os.path.dirname(localSevenZipDstFile))
|
||||||
52
src/GoogleDriveDownloaderTest.py
Normal file
52
src/GoogleDriveDownloaderTest.py
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
import unittest
|
||||||
|
from pathlib import Path
|
||||||
|
import os
|
||||||
|
from IOUtils import IOUtils
|
||||||
|
from GoogleDriveDownloader import GoogleDriveDownloader
|
||||||
|
|
||||||
|
|
||||||
|
class GoogleDriveDownloaderTest(unittest.TestCase):
|
||||||
|
|
||||||
|
def test_downloadIfNotYetDownloaded_notYetDownloaded(self):
|
||||||
|
# Given
|
||||||
|
remoteSrcFile = "https://drive.google.com/file/d/1LstnMvxW4LVxgNvfk5h4AnbvPktMeNSd/view?usp=drive_link"
|
||||||
|
localDstFile = 'src/tmp/test.txt'
|
||||||
|
IOUtils.silentlyRemoveFile(localDstFile)
|
||||||
|
|
||||||
|
# When
|
||||||
|
GoogleDriveDownloader.downloadIfNotYetDownloaded(remoteSrcFile, localDstFile)
|
||||||
|
|
||||||
|
# Then
|
||||||
|
self.assertEqual(Path(localDstFile).read_text(), 'test')
|
||||||
|
|
||||||
|
def test_downloadIfNotYetDownloaded_alreadyDownloaded(self):
|
||||||
|
# Given
|
||||||
|
remoteSrcFile = "https://drive.google.com/file/d/1LstnMvxW4LVxgNvfk5h4AnbvPktMeNSd/view?usp=drive_link"
|
||||||
|
localDstFile = 'src/tmp/test.txt'
|
||||||
|
content = 'local file content'
|
||||||
|
self._createFileWithContent(localDstFile, content);
|
||||||
|
|
||||||
|
# When
|
||||||
|
GoogleDriveDownloader.downloadIfNotYetDownloaded(remoteSrcFile, localDstFile)
|
||||||
|
|
||||||
|
# Then
|
||||||
|
self.assertEqual(Path(localDstFile).read_text(), content)
|
||||||
|
|
||||||
|
def test_downloadSevenZipFileAndExtract(self):
|
||||||
|
# Given
|
||||||
|
remoteSevenZipSrcFile = "https://drive.google.com/file/d/14hFKlt48dzDnEjHS_7vYVca5elfzX0l1/view?usp=drive_link"
|
||||||
|
localSevenZipDstFile = 'src/tmp/test.7z'
|
||||||
|
localDstFolder = os.path.dirname(localSevenZipDstFile)
|
||||||
|
IOUtils.silentlyRemoveFile(localSevenZipDstFile)
|
||||||
|
IOUtils.silentlyRemoveFolder(localDstFolder + '/test')
|
||||||
|
|
||||||
|
# When
|
||||||
|
GoogleDriveDownloader.downloadSevenZipFileAndExtract(remoteSevenZipSrcFile, localSevenZipDstFile)
|
||||||
|
|
||||||
|
# Then
|
||||||
|
self.assertEqual(Path(localDstFolder + '/test/test.txt').read_text(), 'test')
|
||||||
|
|
||||||
|
def _createFileWithContent(self, file, content):
|
||||||
|
with open(file, 'w') as file:
|
||||||
|
file.write(content)
|
||||||
|
|
||||||
@@ -1,6 +1,9 @@
|
|||||||
import os
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
import shutil
|
||||||
import simplejson as json
|
import simplejson as json
|
||||||
|
|
||||||
|
|
||||||
class IOUtils:
|
class IOUtils:
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@@ -13,19 +16,19 @@ class IOUtils:
|
|||||||
IOUtils.ensurePath(file)
|
IOUtils.ensurePath(file)
|
||||||
dataFrame.to_html(
|
dataFrame.to_html(
|
||||||
file,
|
file,
|
||||||
index = False,
|
index=False,
|
||||||
table_id = 'batchCodeTable',
|
table_id='batchCodeTable',
|
||||||
classes = 'display',
|
classes='display',
|
||||||
justify = 'unset',
|
justify='unset',
|
||||||
border = 0)
|
border=0)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def saveDataFrameAsJson(dataFrame, file):
|
def saveDataFrameAsJson(dataFrame, file):
|
||||||
IOUtils.ensurePath(file)
|
IOUtils.ensurePath(file)
|
||||||
dataFrame.to_json(
|
dataFrame.to_json(
|
||||||
file,
|
file,
|
||||||
orient = "split",
|
orient="split",
|
||||||
index = False)
|
index=False)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def saveDictAsJson(dict, file):
|
def saveDictAsJson(dict, file):
|
||||||
@@ -38,3 +41,11 @@ class IOUtils:
|
|||||||
directory = os.path.dirname(file)
|
directory = os.path.dirname(file)
|
||||||
if not os.path.exists(directory):
|
if not os.path.exists(directory):
|
||||||
os.makedirs(directory)
|
os.makedirs(directory)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def silentlyRemoveFile(file):
|
||||||
|
Path(file).unlink(missing_ok=True)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def silentlyRemoveFolder(folder):
|
||||||
|
shutil.rmtree(folder, ignore_errors=True)
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ import VaersReader
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
from VaersDescrReader import VaersDescrReader
|
from VaersDescrReader import VaersDescrReader
|
||||||
from CountryColumnAdder import CountryColumnAdder
|
from CountryColumnAdder import CountryColumnAdder
|
||||||
|
from GoogleDriveDownloader import GoogleDriveDownloader
|
||||||
|
|
||||||
def getInternationalVaersCovid19(dataDir, years):
|
def getInternationalVaersCovid19(dataDir, years):
|
||||||
internationalVaers = pd.concat(
|
internationalVaers = pd.concat(
|
||||||
@@ -16,6 +16,9 @@ def getInternationalVaersCovid19(dataDir, years):
|
|||||||
|
|
||||||
|
|
||||||
def getInternationalVaersCovid19BeforeDeletion():
|
def getInternationalVaersCovid19BeforeDeletion():
|
||||||
|
GoogleDriveDownloader.downloadSevenZipFileAndExtract(
|
||||||
|
remoteSevenZipSrcFile = "https://drive.google.com/file/d/1Rb-lfxNxw_WwvRDVLEhvqOyv_a2f8ern/view?usp=drive_link",
|
||||||
|
localSevenZipDstFile = 'VAERS/VAERSBeforeDeletion.7z')
|
||||||
return getInternationalVaersCovid19(dataDir = 'VAERS/VAERSBeforeDeletion', years = [2020, 2021, 2022])
|
return getInternationalVaersCovid19(dataDir = 'VAERS/VAERSBeforeDeletion', years = [2020, 2021, 2022])
|
||||||
|
|
||||||
def get_international_VAERSVAX_VAERSSYMPTOMS_Covid19(years):
|
def get_international_VAERSVAX_VAERSSYMPTOMS_Covid19(years):
|
||||||
|
|||||||
@@ -16,34 +16,34 @@ from captcha.CaptchaShape import CaptchaShape
|
|||||||
|
|
||||||
def updateVAERSFiles(years, workingDirectory):
|
def updateVAERSFiles(years, workingDirectory):
|
||||||
for year in years:
|
for year in years:
|
||||||
downloadVAERSFileAndUnzip(f'{year}VAERSData.zip', workingDirectory)
|
_downloadVAERSFileAndUnzip(f'{year}VAERSData.zip', workingDirectory)
|
||||||
downloadVAERSFileAndUnzip('NonDomesticVAERSData.zip', workingDirectory)
|
_downloadVAERSFileAndUnzip('NonDomesticVAERSData.zip', workingDirectory)
|
||||||
|
|
||||||
def downloadVAERSFileAndUnzip(file, workingDirectory):
|
def _downloadVAERSFileAndUnzip(file, workingDirectory):
|
||||||
downloadedFile = downloadVAERSFile(file, workingDirectory + "/VAERS/tmp")
|
downloadedFile = _downloadVAERSFile(file, workingDirectory + "/VAERS/tmp")
|
||||||
unzipAndRemove(
|
unzipAndRemove(
|
||||||
zipFile = downloadedFile,
|
zipFile = downloadedFile,
|
||||||
dstDir = workingDirectory + '/VAERS/')
|
dstDir = workingDirectory + '/VAERS/')
|
||||||
|
|
||||||
def downloadVAERSFile(file, downloadDir):
|
def _downloadVAERSFile(file, downloadDir):
|
||||||
driver = getWebDriver(downloadDir, isHeadless = False)
|
driver = getWebDriver(downloadDir, isHeadless = True)
|
||||||
downloadedFile = downloadFile(
|
downloadedFile = _downloadFile(
|
||||||
absoluteFile = downloadDir + "/" + file,
|
absoluteFile = downloadDir + "/" + file,
|
||||||
driver = driver,
|
driver = driver,
|
||||||
maxTries = None)
|
maxTries = None)
|
||||||
driver.quit()
|
driver.quit()
|
||||||
return downloadedFile
|
return downloadedFile
|
||||||
|
|
||||||
def downloadFile(absoluteFile, driver, maxTries):
|
def _downloadFile(absoluteFile, driver, maxTries):
|
||||||
captchaReader = _createCaptchaReader()
|
captchaReader = _createCaptchaReader()
|
||||||
def _downloadFile():
|
def downloadFile():
|
||||||
driver.get('https://vaers.hhs.gov/eSubDownload/index.jsp?fn=' + os.path.basename(absoluteFile))
|
driver.get('https://vaers.hhs.gov/eSubDownload/index.jsp?fn=' + os.path.basename(absoluteFile))
|
||||||
solveCaptchaAndStartFileDownload(driver, captchaReader, 'captchaImage.jpeg')
|
_solveCaptchaAndStartFileDownload(driver, captchaReader, 'captchaImage.jpeg')
|
||||||
|
|
||||||
numTries = 1
|
numTries = 1
|
||||||
_downloadFile()
|
downloadFile()
|
||||||
while(not isCaptchaSolved(driver) and (maxTries is None or numTries < maxTries)):
|
while(not isCaptchaSolved(driver) and (maxTries is None or numTries < maxTries)):
|
||||||
_downloadFile()
|
downloadFile()
|
||||||
numTries = numTries + 1
|
numTries = numTries + 1
|
||||||
|
|
||||||
if isCaptchaSolved(driver):
|
if isCaptchaSolved(driver):
|
||||||
@@ -57,7 +57,7 @@ def _createCaptchaReader():
|
|||||||
return CaptchaReader(modelFilepath = f'{working_directory}/captcha/MobileNetV3Small',
|
return CaptchaReader(modelFilepath = f'{working_directory}/captcha/MobileNetV3Small',
|
||||||
captchaShape = CaptchaShape())
|
captchaShape = CaptchaShape())
|
||||||
|
|
||||||
def solveCaptchaAndStartFileDownload(driver, captchaReader, captchaImageFile):
|
def _solveCaptchaAndStartFileDownload(driver, captchaReader, captchaImageFile):
|
||||||
saveCaptchaImageAs(driver, captchaImageFile)
|
saveCaptchaImageAs(driver, captchaImageFile)
|
||||||
textInCaptchaImage = captchaReader.getTextInCaptchaImage(captchaImageFile)
|
textInCaptchaImage = captchaReader.getTextInCaptchaImage(captchaImageFile)
|
||||||
print('textInCaptchaImage:', textInCaptchaImage)
|
print('textInCaptchaImage:', textInCaptchaImage)
|
||||||
|
|||||||
@@ -6,7 +6,8 @@ from selenium.webdriver.common.by import By
|
|||||||
|
|
||||||
def _getOptions(downloadDir, isHeadless):
|
def _getOptions(downloadDir, isHeadless):
|
||||||
options = Options()
|
options = Options()
|
||||||
options.headless = isHeadless
|
if isHeadless:
|
||||||
|
options.add_argument('--headless=new')
|
||||||
options.add_experimental_option("prefs", {"download.default_directory" : downloadDir})
|
options.add_experimental_option("prefs", {"download.default_directory" : downloadDir})
|
||||||
return options
|
return options
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user