From 21c00eb06c56f0e4be4c35a4ffdabde6d5c2a045 Mon Sep 17 00:00:00 2001 From: Frank Knoll Date: Thu, 25 Apr 2024 12:08:21 +0200 Subject: [PATCH] restoreVAERSBeforeDeletionFolder() --- environment.yml | 1 - src/CountriesByBatchcodeProvider.py | 8 ++-- src/GoogleDriveDownloader.py | 17 -------- src/GoogleDriveDownloaderTest.py | 52 ------------------------ src/HowBadIsMyBatch.ipynb | 28 ++++++++++--- src/InternationalVaersCovid19Provider.py | 8 +--- 6 files changed, 28 insertions(+), 86 deletions(-) delete mode 100644 src/GoogleDriveDownloader.py delete mode 100644 src/GoogleDriveDownloaderTest.py diff --git a/environment.yml b/environment.yml index 9ce59672ae0..9e4f96e6060 100644 --- a/environment.yml +++ b/environment.yml @@ -10,7 +10,6 @@ dependencies: - urllib3 - requests - gdown - - py7zr - bs4 - lxml - jupyter diff --git a/src/CountriesByBatchcodeProvider.py b/src/CountriesByBatchcodeProvider.py index 979c50a31e1..47bdd40b392 100644 --- a/src/CountriesByBatchcodeProvider.py +++ b/src/CountriesByBatchcodeProvider.py @@ -3,14 +3,14 @@ from InternationalVaersCovid19Provider import getInternationalVaersCovid19Before from CountryCountsByBatchcodeTablesMerger import CountryCountsByBatchcodeTablesMerger -def getCountryCountsByBatchcodeTable(): +def getCountryCountsByBatchcodeTable(vaersBeforeDeletionDataDir): return _combineCountryCountsByBatchcodeTables( countryCountsByClickedBatchcode = CountryCountsByBatchcodeTablesMerger.getCountryCountsByClickedBatchcodeTable(), - countryCountsByBatchcodeBeforeDeletion = _getCountryCountsByBatchcodeBeforeDeletion()) + countryCountsByBatchcodeBeforeDeletion = _getCountryCountsByBatchcodeBeforeDeletion(vaersBeforeDeletionDataDir)) -def _getCountryCountsByBatchcodeBeforeDeletion(): - return (getInternationalVaersCovid19BeforeDeletion() +def _getCountryCountsByBatchcodeBeforeDeletion(vaersBeforeDeletionDataDir): + return (getInternationalVaersCovid19BeforeDeletion(vaersBeforeDeletionDataDir) .groupby('VAX_LOT') ['COUNTRY'].value_counts() .to_frame(name = 'COUNTRY_COUNT_BY_VAX_LOT')) diff --git a/src/GoogleDriveDownloader.py b/src/GoogleDriveDownloader.py deleted file mode 100644 index ab33e9a1240..00000000000 --- a/src/GoogleDriveDownloader.py +++ /dev/null @@ -1,17 +0,0 @@ -import gdown -import py7zr -import os - - -class GoogleDriveDownloader: - - @staticmethod - def downloadIfNotYetDownloaded(remoteSrcFile, localDstFile): - if not os.path.exists(localDstFile): - gdown.download(url = remoteSrcFile, output = localDstFile, fuzzy = True) - - @staticmethod - def downloadSevenZipFileAndExtract(remoteSevenZipSrcFile, localSevenZipDstFile): - GoogleDriveDownloader.downloadIfNotYetDownloaded(remoteSevenZipSrcFile, localSevenZipDstFile); - with py7zr.SevenZipFile(localSevenZipDstFile, mode='r') as sevenZipFile: - sevenZipFile.extractall(path = os.path.dirname(localSevenZipDstFile)) \ No newline at end of file diff --git a/src/GoogleDriveDownloaderTest.py b/src/GoogleDriveDownloaderTest.py deleted file mode 100644 index 927bfba65b7..00000000000 --- a/src/GoogleDriveDownloaderTest.py +++ /dev/null @@ -1,52 +0,0 @@ -import unittest -from pathlib import Path -import os -from IOUtils import IOUtils -from GoogleDriveDownloader import GoogleDriveDownloader - - -class GoogleDriveDownloaderTest(unittest.TestCase): - - def test_downloadIfNotYetDownloaded_notYetDownloaded(self): - # Given - remoteSrcFile = "https://drive.google.com/file/d/1LstnMvxW4LVxgNvfk5h4AnbvPktMeNSd/view?usp=drive_link" - localDstFile = 'src/tmp/test.txt' - IOUtils.silentlyRemoveFile(localDstFile) - - # When - GoogleDriveDownloader.downloadIfNotYetDownloaded(remoteSrcFile, localDstFile) - - # Then - self.assertEqual(Path(localDstFile).read_text(), 'test') - - def test_downloadIfNotYetDownloaded_alreadyDownloaded(self): - # Given - remoteSrcFile = "https://drive.google.com/file/d/1LstnMvxW4LVxgNvfk5h4AnbvPktMeNSd/view?usp=drive_link" - localDstFile = 'src/tmp/test.txt' - content = 'local file content' - self._createFileWithContent(localDstFile, content); - - # When - GoogleDriveDownloader.downloadIfNotYetDownloaded(remoteSrcFile, localDstFile) - - # Then - self.assertEqual(Path(localDstFile).read_text(), content) - - def test_downloadSevenZipFileAndExtract(self): - # Given - remoteSevenZipSrcFile = "https://drive.google.com/file/d/14hFKlt48dzDnEjHS_7vYVca5elfzX0l1/view?usp=drive_link" - localSevenZipDstFile = 'src/tmp/test.7z' - localDstFolder = os.path.dirname(localSevenZipDstFile) - IOUtils.silentlyRemoveFile(localSevenZipDstFile) - IOUtils.silentlyRemoveFolder(localDstFolder + '/test') - - # When - GoogleDriveDownloader.downloadSevenZipFileAndExtract(remoteSevenZipSrcFile, localSevenZipDstFile) - - # Then - self.assertEqual(Path(localDstFolder + '/test/test.txt').read_text(), 'test') - - def _createFileWithContent(self, file, content): - with open(file, 'w') as file: - file.write(content) - diff --git a/src/HowBadIsMyBatch.ipynb b/src/HowBadIsMyBatch.ipynb index 2b283b24ac9..6c492444239 100644 --- a/src/HowBadIsMyBatch.ipynb +++ b/src/HowBadIsMyBatch.ipynb @@ -30,7 +30,23 @@ "import pandas as pd\n", "\n", "pd.set_option('display.max_rows', 100)\n", - "pd.set_option('display.max_columns', None)\n" + "pd.set_option('display.max_columns', None)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3b89b2d6", + "metadata": {}, + "outputs": [], + "source": [ + "# split --bytes=45MiB VAERSBeforeDeletion.7z VAERSBeforeDeletion_\n", + "def restoreVAERSBeforeDeletionFolder():\n", + " !cat data/VAERSBeforeDeletion/VAERSBeforeDeletion_* > VAERS/VAERSBeforeDeletion.7z\n", + " !cd VAERS; 7z x -y VAERSBeforeDeletion.7z\n", + "\n", + "vaersBeforeDeletionDataDir = 'VAERS/VAERSBeforeDeletion'\n", + "restoreVAERSBeforeDeletionFolder()" ] }, { @@ -127,7 +143,7 @@ "outputs": [], "source": [ "internationalVaersCovid19 = CountryColumnsMerger.mergeCountryColumnOfSrcIntoDst(\n", - " src = getInternationalVaersCovid19BeforeDeletion(),\n", + " src = getInternationalVaersCovid19BeforeDeletion(vaersBeforeDeletionDataDir),\n", " dst = internationalVaersCovid19)\n", "internationalVaersCovid19" ] @@ -203,7 +219,7 @@ "metadata": {}, "outputs": [], "source": [ - "countryCountsByBatchcode = filterByBatchcodes(getCountryCountsByBatchcodeTable(), batchCodeTable['Batch'].values)\n", + "countryCountsByBatchcode = filterByBatchcodes(getCountryCountsByBatchcodeTable(vaersBeforeDeletionDataDir), batchCodeTable['Batch'].values)\n", "countryCountsByBatchcode" ] }, @@ -670,9 +686,9 @@ ], "metadata": { "kernelspec": { - "display_name": "howbadismybatch-venv-kernel", + "display_name": "howbadismybatch-venv", "language": "python", - "name": "howbadismybatch-venv-kernel" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -684,7 +700,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.15" + "version": "3.9.19" } }, "nbformat": 4, diff --git a/src/InternationalVaersCovid19Provider.py b/src/InternationalVaersCovid19Provider.py index f9b47e18905..ce0f44c06af 100644 --- a/src/InternationalVaersCovid19Provider.py +++ b/src/InternationalVaersCovid19Provider.py @@ -3,7 +3,6 @@ import VaersReader import pandas as pd from VaersDescrReader import VaersDescrReader from CountryColumnAdder import CountryColumnAdder -from GoogleDriveDownloader import GoogleDriveDownloader def getInternationalVaersCovid19(dataDir, years): internationalVaers = pd.concat( @@ -15,11 +14,8 @@ def getInternationalVaersCovid19(dataDir, years): return internationalVaersCovid19 -def getInternationalVaersCovid19BeforeDeletion(): - GoogleDriveDownloader.downloadSevenZipFileAndExtract( - remoteSevenZipSrcFile = "https://drive.google.com/file/d/1Rb-lfxNxw_WwvRDVLEhvqOyv_a2f8ern/view?usp=drive_link", - localSevenZipDstFile = 'VAERS/VAERSBeforeDeletion.7z') - return getInternationalVaersCovid19(dataDir = 'VAERS/VAERSBeforeDeletion', years = [2020, 2021, 2022]) +def getInternationalVaersCovid19BeforeDeletion(dataDir): + return getInternationalVaersCovid19(dataDir = dataDir, years = [2020, 2021, 2022]) def get_international_VAERSVAX_VAERSSYMPTOMS_Covid19(years): VAERSDATA, VAERSVAX, VAERSSYMPTOMS = _get_VAERSDATA_VAERSVAX_VAERSSYMPTOMS(years)