restoreVAERSBeforeDeletionFolder()

This commit is contained in:
Frank Knoll
2024-04-25 12:08:21 +02:00
parent 02a82d8884
commit 21c00eb06c
6 changed files with 28 additions and 86 deletions

View File

@@ -10,7 +10,6 @@ dependencies:
- urllib3
- requests
- gdown
- py7zr
- bs4
- lxml
- jupyter

View File

@@ -3,14 +3,14 @@ from InternationalVaersCovid19Provider import getInternationalVaersCovid19Before
from CountryCountsByBatchcodeTablesMerger import CountryCountsByBatchcodeTablesMerger
def getCountryCountsByBatchcodeTable():
def getCountryCountsByBatchcodeTable(vaersBeforeDeletionDataDir):
return _combineCountryCountsByBatchcodeTables(
countryCountsByClickedBatchcode = CountryCountsByBatchcodeTablesMerger.getCountryCountsByClickedBatchcodeTable(),
countryCountsByBatchcodeBeforeDeletion = _getCountryCountsByBatchcodeBeforeDeletion())
countryCountsByBatchcodeBeforeDeletion = _getCountryCountsByBatchcodeBeforeDeletion(vaersBeforeDeletionDataDir))
def _getCountryCountsByBatchcodeBeforeDeletion():
return (getInternationalVaersCovid19BeforeDeletion()
def _getCountryCountsByBatchcodeBeforeDeletion(vaersBeforeDeletionDataDir):
return (getInternationalVaersCovid19BeforeDeletion(vaersBeforeDeletionDataDir)
.groupby('VAX_LOT')
['COUNTRY'].value_counts()
.to_frame(name = 'COUNTRY_COUNT_BY_VAX_LOT'))

View File

@@ -1,17 +0,0 @@
import gdown
import py7zr
import os
class GoogleDriveDownloader:
@staticmethod
def downloadIfNotYetDownloaded(remoteSrcFile, localDstFile):
if not os.path.exists(localDstFile):
gdown.download(url = remoteSrcFile, output = localDstFile, fuzzy = True)
@staticmethod
def downloadSevenZipFileAndExtract(remoteSevenZipSrcFile, localSevenZipDstFile):
GoogleDriveDownloader.downloadIfNotYetDownloaded(remoteSevenZipSrcFile, localSevenZipDstFile);
with py7zr.SevenZipFile(localSevenZipDstFile, mode='r') as sevenZipFile:
sevenZipFile.extractall(path = os.path.dirname(localSevenZipDstFile))

View File

@@ -1,52 +0,0 @@
import unittest
from pathlib import Path
import os
from IOUtils import IOUtils
from GoogleDriveDownloader import GoogleDriveDownloader
class GoogleDriveDownloaderTest(unittest.TestCase):
def test_downloadIfNotYetDownloaded_notYetDownloaded(self):
# Given
remoteSrcFile = "https://drive.google.com/file/d/1LstnMvxW4LVxgNvfk5h4AnbvPktMeNSd/view?usp=drive_link"
localDstFile = 'src/tmp/test.txt'
IOUtils.silentlyRemoveFile(localDstFile)
# When
GoogleDriveDownloader.downloadIfNotYetDownloaded(remoteSrcFile, localDstFile)
# Then
self.assertEqual(Path(localDstFile).read_text(), 'test')
def test_downloadIfNotYetDownloaded_alreadyDownloaded(self):
# Given
remoteSrcFile = "https://drive.google.com/file/d/1LstnMvxW4LVxgNvfk5h4AnbvPktMeNSd/view?usp=drive_link"
localDstFile = 'src/tmp/test.txt'
content = 'local file content'
self._createFileWithContent(localDstFile, content);
# When
GoogleDriveDownloader.downloadIfNotYetDownloaded(remoteSrcFile, localDstFile)
# Then
self.assertEqual(Path(localDstFile).read_text(), content)
def test_downloadSevenZipFileAndExtract(self):
# Given
remoteSevenZipSrcFile = "https://drive.google.com/file/d/14hFKlt48dzDnEjHS_7vYVca5elfzX0l1/view?usp=drive_link"
localSevenZipDstFile = 'src/tmp/test.7z'
localDstFolder = os.path.dirname(localSevenZipDstFile)
IOUtils.silentlyRemoveFile(localSevenZipDstFile)
IOUtils.silentlyRemoveFolder(localDstFolder + '/test')
# When
GoogleDriveDownloader.downloadSevenZipFileAndExtract(remoteSevenZipSrcFile, localSevenZipDstFile)
# Then
self.assertEqual(Path(localDstFolder + '/test/test.txt').read_text(), 'test')
def _createFileWithContent(self, file, content):
with open(file, 'w') as file:
file.write(content)

View File

@@ -30,7 +30,23 @@
"import pandas as pd\n",
"\n",
"pd.set_option('display.max_rows', 100)\n",
"pd.set_option('display.max_columns', None)\n"
"pd.set_option('display.max_columns', None)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3b89b2d6",
"metadata": {},
"outputs": [],
"source": [
"# split --bytes=45MiB VAERSBeforeDeletion.7z VAERSBeforeDeletion_\n",
"def restoreVAERSBeforeDeletionFolder():\n",
" !cat data/VAERSBeforeDeletion/VAERSBeforeDeletion_* > VAERS/VAERSBeforeDeletion.7z\n",
" !cd VAERS; 7z x -y VAERSBeforeDeletion.7z\n",
"\n",
"vaersBeforeDeletionDataDir = 'VAERS/VAERSBeforeDeletion'\n",
"restoreVAERSBeforeDeletionFolder()"
]
},
{
@@ -127,7 +143,7 @@
"outputs": [],
"source": [
"internationalVaersCovid19 = CountryColumnsMerger.mergeCountryColumnOfSrcIntoDst(\n",
" src = getInternationalVaersCovid19BeforeDeletion(),\n",
" src = getInternationalVaersCovid19BeforeDeletion(vaersBeforeDeletionDataDir),\n",
" dst = internationalVaersCovid19)\n",
"internationalVaersCovid19"
]
@@ -203,7 +219,7 @@
"metadata": {},
"outputs": [],
"source": [
"countryCountsByBatchcode = filterByBatchcodes(getCountryCountsByBatchcodeTable(), batchCodeTable['Batch'].values)\n",
"countryCountsByBatchcode = filterByBatchcodes(getCountryCountsByBatchcodeTable(vaersBeforeDeletionDataDir), batchCodeTable['Batch'].values)\n",
"countryCountsByBatchcode"
]
},
@@ -670,9 +686,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "howbadismybatch-venv-kernel",
"display_name": "howbadismybatch-venv",
"language": "python",
"name": "howbadismybatch-venv-kernel"
"name": "python3"
},
"language_info": {
"codemirror_mode": {
@@ -684,7 +700,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.15"
"version": "3.9.19"
}
},
"nbformat": 4,

View File

@@ -3,7 +3,6 @@ import VaersReader
import pandas as pd
from VaersDescrReader import VaersDescrReader
from CountryColumnAdder import CountryColumnAdder
from GoogleDriveDownloader import GoogleDriveDownloader
def getInternationalVaersCovid19(dataDir, years):
internationalVaers = pd.concat(
@@ -15,11 +14,8 @@ def getInternationalVaersCovid19(dataDir, years):
return internationalVaersCovid19
def getInternationalVaersCovid19BeforeDeletion():
GoogleDriveDownloader.downloadSevenZipFileAndExtract(
remoteSevenZipSrcFile = "https://drive.google.com/file/d/1Rb-lfxNxw_WwvRDVLEhvqOyv_a2f8ern/view?usp=drive_link",
localSevenZipDstFile = 'VAERS/VAERSBeforeDeletion.7z')
return getInternationalVaersCovid19(dataDir = 'VAERS/VAERSBeforeDeletion', years = [2020, 2021, 2022])
def getInternationalVaersCovid19BeforeDeletion(dataDir):
return getInternationalVaersCovid19(dataDir = dataDir, years = [2020, 2021, 2022])
def get_international_VAERSVAX_VAERSSYMPTOMS_Covid19(years):
VAERSDATA, VAERSVAX, VAERSSYMPTOMS = _get_VAERSDATA_VAERSVAX_VAERSSYMPTOMS(years)