From 447c2ed55293e205882f34bede0dc18abc8c1ef0 Mon Sep 17 00:00:00 2001 From: frankknoll Date: Tue, 22 Nov 2022 15:51:53 +0100 Subject: [PATCH] refactoring --- src/BatchCodeTableHtmlUpdater.py | 15 +++- src/BatchCodeTablePersister.py | 35 +++++++-- src/HowBadIsMyBatch.ipynb | 93 ++---------------------- src/HtmlUtils.py | 4 + src/InternationalVaersCovid19Provider.py | 8 ++ 5 files changed, 60 insertions(+), 95 deletions(-) create mode 100644 src/InternationalVaersCovid19Provider.py diff --git a/src/BatchCodeTableHtmlUpdater.py b/src/BatchCodeTableHtmlUpdater.py index 9fccbff0c72..d96f25b6293 100644 --- a/src/BatchCodeTableHtmlUpdater.py +++ b/src/BatchCodeTableHtmlUpdater.py @@ -2,6 +2,17 @@ from bs4 import BeautifulSoup from HtmlTransformerUtil import HtmlTransformerUtil from CountryOptionsSetter import CountryOptionsSetter from DateProvider import DateProvider +from HtmlUtils import getCountryOptions, getCountries +from DateProvider import DateProvider +from BatchCodeTablePersister import createAndSaveBatchCodeTables + + +def updateBatchCodeTableHtmlFile(internationalVaersCovid19): + countryOptions = getCountryOptions(getCountries(internationalVaersCovid19)) + saveCountryOptions(countryOptions) + saveLastUpdatedBatchCodeTable(DateProvider().getLastUpdatedDataSource()) + createAndSaveBatchCodeTables(internationalVaersCovid19, minADRsForLethality=100) + def saveCountryOptions(countryOptions): HtmlTransformerUtil().applySoupTransformerToFile( @@ -12,6 +23,7 @@ def saveCountryOptions(countryOptions): CountryOptionsSetter().setCountryOptions(html = str(soup), options = countryOptions), 'lxml')) + def saveLastUpdatedBatchCodeTable(lastUpdated): def setLastUpdated(soup): soup.find(id = "last_updated").string.replace_with(lastUpdated.strftime(DateProvider.DATE_FORMAT)) @@ -19,4 +31,5 @@ def saveLastUpdatedBatchCodeTable(lastUpdated): HtmlTransformerUtil().applySoupTransformerToFile( file = "../docs/batchCodeTable.html", - soupTransformer = setLastUpdated) \ No newline at end of file + soupTransformer = setLastUpdated) + \ No newline at end of file diff --git a/src/BatchCodeTablePersister.py b/src/BatchCodeTablePersister.py index 3ab1df1dda9..e685e9a86bb 100644 --- a/src/BatchCodeTablePersister.py +++ b/src/BatchCodeTablePersister.py @@ -1,15 +1,38 @@ from IOUtils import IOUtils +from BatchCodeTableFactory import BatchCodeTableFactory import numpy as np +from HtmlUtils import getCountries -def createAndSaveBatchCodeTableForCountry(createBatchCodeTableForCountry, country, minADRsForLethality = None): + +def createAndSaveBatchCodeTables(internationalVaersCovid19, minADRsForLethality): + batchCodeTableFactory = BatchCodeTableFactory(internationalVaersCovid19) + _createAndSaveBatchCodeTablesForCountries( + createBatchCodeTableForCountry=lambda country: batchCodeTableFactory.createBatchCodeTableByCountry( + country), + countries=getCountries(internationalVaersCovid19), + minADRsForLethality=minADRsForLethality) + _createAndSaveBatchCodeTableForCountry( + createBatchCodeTableForCountry=lambda country: batchCodeTableFactory.createGlobalBatchCodeTable(), + country='Global', + minADRsForLethality=minADRsForLethality) + + +def _createAndSaveBatchCodeTableForCountry(createBatchCodeTableForCountry, country, minADRsForLethality=None): batchCodeTable = createBatchCodeTableForCountry(country) - batchCodeTable.index.set_names("Batch", inplace = True) + batchCodeTable.index.set_names("Batch", inplace=True) if minADRsForLethality is not None: - batchCodeTable.loc[batchCodeTable['Adverse Reaction Reports'] < minADRsForLethality, ['Severe reports', 'Lethality']] = [np.nan, np.nan] - IOUtils.saveDataFrame(batchCodeTable, '../docs/data/batchCodeTables/' + country) + batchCodeTable.loc[ + batchCodeTable['Adverse Reaction Reports'] < minADRsForLethality, + ['Severe reports', 'Lethality'] + ] = [np.nan, np.nan] + IOUtils.saveDataFrame( + batchCodeTable, + '../docs/data/batchCodeTables/' + country) # display(country + ":", batchCodeTable) display(country) -def createAndSaveBatchCodeTablesForCountries(createBatchCodeTableForCountry, countries, minADRsForLethality = None): + +def _createAndSaveBatchCodeTablesForCountries(createBatchCodeTableForCountry, countries, minADRsForLethality=None): for country in countries: - createAndSaveBatchCodeTableForCountry(createBatchCodeTableForCountry, country, minADRsForLethality) \ No newline at end of file + _createAndSaveBatchCodeTableForCountry( + createBatchCodeTableForCountry, country, minADRsForLethality) diff --git a/src/HowBadIsMyBatch.ipynb b/src/HowBadIsMyBatch.ipynb index 1712b872fe1..dbcd8776dd2 100644 --- a/src/HowBadIsMyBatch.ipynb +++ b/src/HowBadIsMyBatch.ipynb @@ -103,26 +103,6 @@ " downloadVAERSFileAndUnzip('NonDomesticVAERSData.zip', workingDirectory)" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "3ebcba86", - "metadata": {}, - "outputs": [], - "source": [ - "from DataFrameFilter import DataFrameFilter" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "71456a79", - "metadata": {}, - "outputs": [], - "source": [ - "from BatchCodeTableFactory import BatchCodeTableFactory" - ] - }, { "cell_type": "code", "execution_count": null, @@ -130,17 +110,17 @@ "metadata": {}, "outputs": [], "source": [ - "from BatchCodeTableHtmlUpdater import saveCountryOptions, saveLastUpdatedBatchCodeTable" + "from BatchCodeTableHtmlUpdater import updateBatchCodeTableHtmlFile" ] }, { "cell_type": "code", "execution_count": null, - "id": "86e0e4f2", + "id": "62132e68", "metadata": {}, "outputs": [], "source": [ - "from VaersReader import getVaersForYears, getNonDomesticVaers" + "from InternationalVaersCovid19Provider import getInternationalVaersCovid19" ] }, { @@ -150,42 +130,10 @@ "metadata": {}, "outputs": [], "source": [ - "internationalVaers = pd.concat([getVaersForYears([2020, 2021, 2022]), getNonDomesticVaers()])\n", - "internationalVaersCovid19 = DataFrameFilter().filterByCovid19(internationalVaers)\n", + "internationalVaersCovid19 = getInternationalVaersCovid19([2020, 2021, 2022])\n", "internationalVaersCovid19" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "ff259a35", - "metadata": {}, - "outputs": [], - "source": [ - "from BatchCodeTablePersister import createAndSaveBatchCodeTableForCountry, createAndSaveBatchCodeTablesForCountries" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cc1ef82a", - "metadata": {}, - "outputs": [], - "source": [ - "from HtmlUtils import getCountryOptions" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0c4d04fb", - "metadata": {}, - "outputs": [], - "source": [ - "countries = sorted(internationalVaersCovid19['COUNTRY'].unique())\n", - "countryOptions = getCountryOptions(countries)\n" - ] - }, { "cell_type": "code", "execution_count": null, @@ -193,38 +141,7 @@ "metadata": {}, "outputs": [], "source": [ - "saveCountryOptions(countryOptions)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9c7485b5", - "metadata": {}, - "outputs": [], - "source": [ - "saveLastUpdatedBatchCodeTable(dateProvider.getLastUpdatedDataSource())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7e7e01a5", - "metadata": {}, - "outputs": [], - "source": [ - "minADRsForLethality = 100\n", - "batchCodeTableFactory = BatchCodeTableFactory(internationalVaersCovid19)\n", - "\n", - "createAndSaveBatchCodeTablesForCountries(\n", - " createBatchCodeTableForCountry = lambda country: batchCodeTableFactory.createBatchCodeTableByCountry(country),\n", - " countries = countries,\n", - " minADRsForLethality = minADRsForLethality)\n", - "\n", - "createAndSaveBatchCodeTableForCountry(\n", - " createBatchCodeTableForCountry = lambda country: batchCodeTableFactory.createGlobalBatchCodeTable(),\n", - " country = 'Global',\n", - " minADRsForLethality = minADRsForLethality)" + "updateBatchCodeTableHtmlFile(internationalVaersCovid19)" ] }, { diff --git a/src/HtmlUtils.py b/src/HtmlUtils.py index 6bd9ca8f537..eea1b38b96e 100644 --- a/src/HtmlUtils.py +++ b/src/HtmlUtils.py @@ -1,3 +1,7 @@ +def getCountries(internationalVaersCovid19): + return sorted(internationalVaersCovid19['COUNTRY'].unique()) + + def getCountryOptions(countries): return [''] + _getCountryOptions(countries) diff --git a/src/InternationalVaersCovid19Provider.py b/src/InternationalVaersCovid19Provider.py new file mode 100644 index 00000000000..79e1897cbce --- /dev/null +++ b/src/InternationalVaersCovid19Provider.py @@ -0,0 +1,8 @@ +from DataFrameFilter import DataFrameFilter +from VaersReader import getVaersForYears, getNonDomesticVaers +import pandas as pd + +def getInternationalVaersCovid19(years): + internationalVaers = pd.concat([getVaersForYears(years), getNonDomesticVaers()]) + internationalVaersCovid19 = DataFrameFilter().filterByCovid19(internationalVaers) + return internationalVaersCovid19 \ No newline at end of file