restoreVAERSBeforeDeletionFolder()
This commit is contained in:
@@ -10,7 +10,6 @@ dependencies:
|
||||
- urllib3
|
||||
- requests
|
||||
- gdown
|
||||
- py7zr
|
||||
- bs4
|
||||
- lxml
|
||||
- jupyter
|
||||
|
||||
@@ -3,14 +3,14 @@ from InternationalVaersCovid19Provider import getInternationalVaersCovid19Before
|
||||
from CountryCountsByBatchcodeTablesMerger import CountryCountsByBatchcodeTablesMerger
|
||||
|
||||
|
||||
def getCountryCountsByBatchcodeTable():
|
||||
def getCountryCountsByBatchcodeTable(vaersBeforeDeletionDataDir):
|
||||
return _combineCountryCountsByBatchcodeTables(
|
||||
countryCountsByClickedBatchcode = CountryCountsByBatchcodeTablesMerger.getCountryCountsByClickedBatchcodeTable(),
|
||||
countryCountsByBatchcodeBeforeDeletion = _getCountryCountsByBatchcodeBeforeDeletion())
|
||||
countryCountsByBatchcodeBeforeDeletion = _getCountryCountsByBatchcodeBeforeDeletion(vaersBeforeDeletionDataDir))
|
||||
|
||||
|
||||
def _getCountryCountsByBatchcodeBeforeDeletion():
|
||||
return (getInternationalVaersCovid19BeforeDeletion()
|
||||
def _getCountryCountsByBatchcodeBeforeDeletion(vaersBeforeDeletionDataDir):
|
||||
return (getInternationalVaersCovid19BeforeDeletion(vaersBeforeDeletionDataDir)
|
||||
.groupby('VAX_LOT')
|
||||
['COUNTRY'].value_counts()
|
||||
.to_frame(name = 'COUNTRY_COUNT_BY_VAX_LOT'))
|
||||
|
||||
@@ -1,17 +0,0 @@
|
||||
import gdown
|
||||
import py7zr
|
||||
import os
|
||||
|
||||
|
||||
class GoogleDriveDownloader:
|
||||
|
||||
@staticmethod
|
||||
def downloadIfNotYetDownloaded(remoteSrcFile, localDstFile):
|
||||
if not os.path.exists(localDstFile):
|
||||
gdown.download(url = remoteSrcFile, output = localDstFile, fuzzy = True)
|
||||
|
||||
@staticmethod
|
||||
def downloadSevenZipFileAndExtract(remoteSevenZipSrcFile, localSevenZipDstFile):
|
||||
GoogleDriveDownloader.downloadIfNotYetDownloaded(remoteSevenZipSrcFile, localSevenZipDstFile);
|
||||
with py7zr.SevenZipFile(localSevenZipDstFile, mode='r') as sevenZipFile:
|
||||
sevenZipFile.extractall(path = os.path.dirname(localSevenZipDstFile))
|
||||
@@ -1,52 +0,0 @@
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
import os
|
||||
from IOUtils import IOUtils
|
||||
from GoogleDriveDownloader import GoogleDriveDownloader
|
||||
|
||||
|
||||
class GoogleDriveDownloaderTest(unittest.TestCase):
|
||||
|
||||
def test_downloadIfNotYetDownloaded_notYetDownloaded(self):
|
||||
# Given
|
||||
remoteSrcFile = "https://drive.google.com/file/d/1LstnMvxW4LVxgNvfk5h4AnbvPktMeNSd/view?usp=drive_link"
|
||||
localDstFile = 'src/tmp/test.txt'
|
||||
IOUtils.silentlyRemoveFile(localDstFile)
|
||||
|
||||
# When
|
||||
GoogleDriveDownloader.downloadIfNotYetDownloaded(remoteSrcFile, localDstFile)
|
||||
|
||||
# Then
|
||||
self.assertEqual(Path(localDstFile).read_text(), 'test')
|
||||
|
||||
def test_downloadIfNotYetDownloaded_alreadyDownloaded(self):
|
||||
# Given
|
||||
remoteSrcFile = "https://drive.google.com/file/d/1LstnMvxW4LVxgNvfk5h4AnbvPktMeNSd/view?usp=drive_link"
|
||||
localDstFile = 'src/tmp/test.txt'
|
||||
content = 'local file content'
|
||||
self._createFileWithContent(localDstFile, content);
|
||||
|
||||
# When
|
||||
GoogleDriveDownloader.downloadIfNotYetDownloaded(remoteSrcFile, localDstFile)
|
||||
|
||||
# Then
|
||||
self.assertEqual(Path(localDstFile).read_text(), content)
|
||||
|
||||
def test_downloadSevenZipFileAndExtract(self):
|
||||
# Given
|
||||
remoteSevenZipSrcFile = "https://drive.google.com/file/d/14hFKlt48dzDnEjHS_7vYVca5elfzX0l1/view?usp=drive_link"
|
||||
localSevenZipDstFile = 'src/tmp/test.7z'
|
||||
localDstFolder = os.path.dirname(localSevenZipDstFile)
|
||||
IOUtils.silentlyRemoveFile(localSevenZipDstFile)
|
||||
IOUtils.silentlyRemoveFolder(localDstFolder + '/test')
|
||||
|
||||
# When
|
||||
GoogleDriveDownloader.downloadSevenZipFileAndExtract(remoteSevenZipSrcFile, localSevenZipDstFile)
|
||||
|
||||
# Then
|
||||
self.assertEqual(Path(localDstFolder + '/test/test.txt').read_text(), 'test')
|
||||
|
||||
def _createFileWithContent(self, file, content):
|
||||
with open(file, 'w') as file:
|
||||
file.write(content)
|
||||
|
||||
@@ -30,7 +30,23 @@
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"pd.set_option('display.max_rows', 100)\n",
|
||||
"pd.set_option('display.max_columns', None)\n"
|
||||
"pd.set_option('display.max_columns', None)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3b89b2d6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# split --bytes=45MiB VAERSBeforeDeletion.7z VAERSBeforeDeletion_\n",
|
||||
"def restoreVAERSBeforeDeletionFolder():\n",
|
||||
" !cat data/VAERSBeforeDeletion/VAERSBeforeDeletion_* > VAERS/VAERSBeforeDeletion.7z\n",
|
||||
" !cd VAERS; 7z x -y VAERSBeforeDeletion.7z\n",
|
||||
"\n",
|
||||
"vaersBeforeDeletionDataDir = 'VAERS/VAERSBeforeDeletion'\n",
|
||||
"restoreVAERSBeforeDeletionFolder()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -127,7 +143,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"internationalVaersCovid19 = CountryColumnsMerger.mergeCountryColumnOfSrcIntoDst(\n",
|
||||
" src = getInternationalVaersCovid19BeforeDeletion(),\n",
|
||||
" src = getInternationalVaersCovid19BeforeDeletion(vaersBeforeDeletionDataDir),\n",
|
||||
" dst = internationalVaersCovid19)\n",
|
||||
"internationalVaersCovid19"
|
||||
]
|
||||
@@ -203,7 +219,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"countryCountsByBatchcode = filterByBatchcodes(getCountryCountsByBatchcodeTable(), batchCodeTable['Batch'].values)\n",
|
||||
"countryCountsByBatchcode = filterByBatchcodes(getCountryCountsByBatchcodeTable(vaersBeforeDeletionDataDir), batchCodeTable['Batch'].values)\n",
|
||||
"countryCountsByBatchcode"
|
||||
]
|
||||
},
|
||||
@@ -670,9 +686,9 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "howbadismybatch-venv-kernel",
|
||||
"display_name": "howbadismybatch-venv",
|
||||
"language": "python",
|
||||
"name": "howbadismybatch-venv-kernel"
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
@@ -684,7 +700,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.15"
|
||||
"version": "3.9.19"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -3,7 +3,6 @@ import VaersReader
|
||||
import pandas as pd
|
||||
from VaersDescrReader import VaersDescrReader
|
||||
from CountryColumnAdder import CountryColumnAdder
|
||||
from GoogleDriveDownloader import GoogleDriveDownloader
|
||||
|
||||
def getInternationalVaersCovid19(dataDir, years):
|
||||
internationalVaers = pd.concat(
|
||||
@@ -15,11 +14,8 @@ def getInternationalVaersCovid19(dataDir, years):
|
||||
return internationalVaersCovid19
|
||||
|
||||
|
||||
def getInternationalVaersCovid19BeforeDeletion():
|
||||
GoogleDriveDownloader.downloadSevenZipFileAndExtract(
|
||||
remoteSevenZipSrcFile = "https://drive.google.com/file/d/1Rb-lfxNxw_WwvRDVLEhvqOyv_a2f8ern/view?usp=drive_link",
|
||||
localSevenZipDstFile = 'VAERS/VAERSBeforeDeletion.7z')
|
||||
return getInternationalVaersCovid19(dataDir = 'VAERS/VAERSBeforeDeletion', years = [2020, 2021, 2022])
|
||||
def getInternationalVaersCovid19BeforeDeletion(dataDir):
|
||||
return getInternationalVaersCovid19(dataDir = dataDir, years = [2020, 2021, 2022])
|
||||
|
||||
def get_international_VAERSVAX_VAERSSYMPTOMS_Covid19(years):
|
||||
VAERSDATA, VAERSVAX, VAERSSYMPTOMS = _get_VAERSDATA_VAERSVAX_VAERSSYMPTOMS(years)
|
||||
|
||||
Reference in New Issue
Block a user