diff --git a/src/CountriesByBatchcodeProvider.py b/src/CountriesByBatchcodeProvider.py index 4b41282348b..81e0f26a0cd 100644 --- a/src/CountriesByBatchcodeProvider.py +++ b/src/CountriesByBatchcodeProvider.py @@ -1,39 +1,50 @@ import pandas as pd -from CompletedBatchcodeColumnAdder import CompletedBatchcodeColumnAdder -from BatchcodeCompletion import BatchcodeCompletion -from CountriesColumnAdder import CountriesColumnAdder from BatchCodeTableFactory import BatchCodeTableFactory from InternationalVaersCovid19Provider import getInternationalVaersCovid19 -def getCountriesByCompletedBatchcode(internationalVaersCovid19): - result = _readExploration('data/Country By Batchcode Search Term.csv', indexName = 'Batchcode Search Term') - result = _addCompletedBatchcodeColumn(result, internationalVaersCovid19) - columnName = 'Countries' - result = CountriesColumnAdder().addCountriesColumn(result, columnName = columnName) - return result[[columnName]].droplevel('Batchcode Search Term') -def _addCompletedBatchcodeColumn(country_By_Batchcode_Search_Term, internationalVaersCovid19): - return CompletedBatchcodeColumnAdder(_getCompleteBatchcode(internationalVaersCovid19)).addCompletedBatchcodeColumn(country_By_Batchcode_Search_Term) +def getCountryCountsByBatchcodeTable(): + return _combineCountryCountsByBatchcodeTables( + countryCountsByClickedBatchcode = _getCountryCountsByClickedBatchcode(), + countryCountsByBatchcodeBeforeDeletion = _getCountryCountsByBatchcodeBeforeDeletion()) -def _getCompleteBatchcode(internationalVaersCovid19): - batchCodeTable = BatchCodeTableFactory(internationalVaersCovid19).createGlobalBatchCodeTable() - return BatchcodeCompletion(ADR_by_Batchcode = batchCodeTable).completeBatchcode - -def getCountriesByClickedBatchcode(): - result = _readExploration('data/Country By Clicked Batchcode.csv', indexName = 'Clicked Batchcode') - columnName = 'Countries' - result = CountriesColumnAdder().addCountriesColumn(result, columnName = columnName) - return result[[columnName]] -def _readExploration(csvFile, indexName): - exploration = pd.read_csv(csvFile, header = [0], index_col = 0, skiprows = 6, on_bad_lines = 'warn') - exploration.drop(index=indexName, inplace=True) - exploration.index.rename(indexName, inplace=True) - exploration.drop(columns='Totals', inplace=True) - for column in exploration.columns: - exploration[column] = exploration[column].astype('int64') +def _getCountryCountsByClickedBatchcode(): + exploration = pd.read_csv('data/Country By Clicked Batchcode.csv', index_col = 0, skiprows = [0, 1, 2, 3, 4, 5, 7]) + exploration.index.name = 'VAX_LOT' + exploration.rename( + columns = + { + 'Country': 'COUNTRY', + 'Event count': 'COUNTRY_COUNT_BY_VAX_LOT' + }, + inplace = True) + exploration.set_index('COUNTRY', append = True, inplace = True) return exploration + +def _getCountryCountsByBatchcodeBeforeDeletion(): + internationalVaersCovid19 = getInternationalVaersCovid19(dataDir = 'VAERS/VAERSBeforeDeletion', years = [2020, 2021, 2022]) + return (internationalVaersCovid19 + .groupby('VAX_LOT') + ['COUNTRY'].value_counts() + .to_frame(name = 'COUNTRY_COUNT_BY_VAX_LOT')) + + +def _combineCountryCountsByBatchcodeTables(countryCountsByClickedBatchcode, countryCountsByBatchcodeBeforeDeletion): + countryCountsByBatchcode = pd.merge( + countryCountsByClickedBatchcode, + countryCountsByBatchcodeBeforeDeletion, + how = 'outer', + left_index = True, + right_index = True, + suffixes=(' Clicked', ' Before Deletion')) + countryCountsByBatchcode.fillna(0, inplace = True) + for column in countryCountsByBatchcode.columns: + countryCountsByBatchcode[column] = countryCountsByBatchcode[column].astype('int64') + return countryCountsByBatchcode + + def getCountriesByBatchcodeBeforeDeletion(): internationalVaersCovid19 = getInternationalVaersCovid19(dataDir = 'VAERS/VAERSBeforeDeletion', years = [2020, 2021, 2022]) batchCodeTable = BatchCodeTableFactory(internationalVaersCovid19).createGlobalBatchCodeTable(countriesAsList = True) diff --git a/src/HowBadIsMyBatch.ipynb b/src/HowBadIsMyBatch.ipynb index 0c5244e27df..ea00af3157c 100644 --- a/src/HowBadIsMyBatch.ipynb +++ b/src/HowBadIsMyBatch.ipynb @@ -15,14 +15,14 @@ "import os\n", "from VAERSFileDownloader import updateVAERSFiles\n", "from datetime import datetime\n", - "from DateProvider import DateProvider\n", "from InternationalVaersCovid19Provider import getInternationalVaersCovid19, get_international_VAERSVAX_VAERSSYMPTOMS_Covid19\n", "from BatchCodeTableHtmlUpdater import updateBatchCodeTableHtmlFile\n", "from BatchCodeTablePersister import createAndSaveBatchCodeTables\n", "from SymptomByBatchcodeTableFactory import SymptomByBatchcodeTableFactory\n", "from HistogramFactoryAndPersister import createAndSaveGlobalHistograms, createAndSaveHistogramsForCountries\n", "from BatchCodeTableFactory import BatchCodeTableFactory\n", - "from BatchCodeTableHavingGuessedCountriesFactory import BatchCodeTableHavingGuessedCountriesFactory\n" + "from BatchCodeTableHavingGuessedCountriesFactory import BatchCodeTableHavingGuessedCountriesFactory\n", + "from CountriesByBatchcodeProvider import getCountryCountsByBatchcodeTable\n" ] }, { @@ -35,20 +35,6 @@ "print(datetime.now().strftime(\"%d.%m.%Y, %H:%M:%S Uhr\"))" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "ffad1c04", - "metadata": {}, - "outputs": [], - "source": [ - "dateProvider = DateProvider()\n", - "print(' lastUpdated:', dateProvider.getLastUpdated())\n", - "print('lastUpdatedDataSource:', dateProvider.getLastUpdatedDataSource())\n", - "needsUpdate = dateProvider.needsUpdate()\n", - "print('needsUpdate:', needsUpdate)" - ] - }, { "cell_type": "code", "execution_count": null, @@ -169,1038 +155,13 @@ }, { "cell_type": "code", - "execution_count": 6, - "id": "dc8fa52b", + "execution_count": null, + "id": "d6c2b650", "metadata": {}, "outputs": [], "source": [ - "import pandas as pd\n", - "from CountriesByBatchcodeProvider import getCountriesByClickedBatchcode, getCountriesByBatchcodeBeforeDeletion" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "b01a5f94", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "b'Skipping line 9: expected 22 fields, saw 23\\n'\n" - ] - }, - { - "data": { - "text/html": [ - "
| \n", - " | Countries | \n", - "
|---|---|
| Clicked Batchcode | \n", - "\n", - " |
| (not set) | \n", - "{Germany, Poland, United Kingdom, Australia, B... | \n", - "
| FE6208 | \n", - "{Germany, Poland, United Kingdom, Australia, B... | \n", - "
| 039K20A | \n", - "{Sweden, United States, Spain, France, Austral... | \n", - "
| EN6201 | \n", - "{Sweden, United States, Spain, France, Austral... | \n", - "
| FD6840 | \n", - "{Sweden, United States, Spain, France, Austral... | \n", - "
| ... | \n", - "... | \n", - "
| ZTC540693 | \n", - "{United States} | \n", - "
| ZU5A21A | \n", - "{} | \n", - "
| ZZ3249 | \n", - "{Hungary} | \n", - "
| 026L20A | \n", - "{United States} | \n", - "
| ÂK4244 | \n", - "{Hungary} | \n", - "
5392 rows × 1 columns
\n", - "| \n", - " | Countries | \n", - "
|---|---|
| VAX_LOT | \n", - "\n", - " |
| 6362737 | \n", - "[United States] | \n", - "
| SCH2 | \n", - "[United States] | \n", - "
| NO72A | \n", - "[Philippines] | \n", - "
| EW096 | \n", - "[United States] | \n", - "
| FS1926 | \n", - "[United States] | \n", - "
| ... | \n", - "... | \n", - "
| 211221A | \n", - "[United States] | \n", - "
| 2112D21A | \n", - "[United States] | \n", - "
| 2114214 | \n", - "[United States] | \n", - "
| 211421A | \n", - "[United States] | \n", - "
| Ø94F21A | \n", - "[Norway] | \n", - "
47901 rows × 1 columns
\n", - "| \n", - " | Countries Clicked | \n", - "Countries Before Deletion | \n", - "
|---|---|---|
| VAX_LOT | \n", - "\n", - " | \n", - " |
| FE6208 | \n", - "{Germany, Poland, United Kingdom, Australia, B... | \n", - "[Austria, Croatia, Czechia, Germany, Greece, H... | \n", - "
| 039K20A | \n", - "{Sweden, United States, Spain, France, Austral... | \n", - "[United States] | \n", - "
| EN6201 | \n", - "{Sweden, United States, Spain, France, Austral... | \n", - "[United Kingdom, United States] | \n", - "
| FD6840 | \n", - "{Sweden, United States, Spain, France, Austral... | \n", - "[Austria, Croatia, Cyprus, Czechia, Germany, G... | \n", - "
| FF0680 | \n", - "{Sweden, United States, Canada, Brazil, German... | \n", - "[Austria, Croatia, Czechia, Finland, France, G... | \n", - "
| ... | \n", - "... | \n", - "... | \n", - "
| ZTC540693 | \n", - "{United States} | \n", - "[United States] | \n", - "
| ZU5A21A | \n", - "{} | \n", - "[United States] | \n", - "
| ZZ3249 | \n", - "{Hungary} | \n", - "[United States] | \n", - "
| 026L20A | \n", - "{United States} | \n", - "[United States] | \n", - "
| ÂK4244 | \n", - "{Hungary} | \n", - "[United Kingdom] | \n", - "
5188 rows × 2 columns
\n", - "| \n", - " | Countries Clicked | \n", - "Countries Before Deletion | \n", - "
|---|---|---|
| VAX_LOT | \n", - "\n", - " | \n", - " |
| FE6208 | \n", - "{Germany, Poland, United Kingdom, Australia, B... | \n", - "{Lithuania, Spain, Czechia, Greece, Germany, A... | \n", - "
| 039K20A | \n", - "{Sweden, United States, Spain, France, Austral... | \n", - "{United States} | \n", - "
| EN6201 | \n", - "{Sweden, United States, Spain, France, Austral... | \n", - "{United Kingdom, United States} | \n", - "
| FD6840 | \n", - "{Sweden, United States, Spain, France, Austral... | \n", - "{Lithuania, Cyprus, Czechia, Greece, Germany, ... | \n", - "
| FF0680 | \n", - "{Sweden, United States, Canada, Brazil, German... | \n", - "{Portugal, Sweden, Czechia, France, Finland, G... | \n", - "
| ... | \n", - "... | \n", - "... | \n", - "
| ZTC540693 | \n", - "{United States} | \n", - "{United States} | \n", - "
| ZU5A21A | \n", - "{} | \n", - "{United States} | \n", - "
| ZZ3249 | \n", - "{Hungary} | \n", - "{United States} | \n", - "
| 026L20A | \n", - "{United States} | \n", - "{United States} | \n", - "
| ÂK4244 | \n", - "{Hungary} | \n", - "{United Kingdom} | \n", - "
5188 rows × 2 columns
\n", - "| \n", - " | Countries Clicked | \n", - "Countries Before Deletion | \n", - "Countries Intersection | \n", - "
|---|---|---|---|
| VAX_LOT | \n", - "\n", - " | \n", - " | \n", - " |
| FE6208 | \n", - "{Germany, Poland, United Kingdom, Australia, B... | \n", - "{Lithuania, Spain, Czechia, Greece, Germany, A... | \n", - "[Austria, Germany, Hungary, Spain, Sweden, Uni... | \n", - "
| 039K20A | \n", - "{Sweden, United States, Spain, France, Austral... | \n", - "{United States} | \n", - "[United States] | \n", - "
| EN6201 | \n", - "{Sweden, United States, Spain, France, Austral... | \n", - "{United Kingdom, United States} | \n", - "[United Kingdom, United States] | \n", - "
| FD6840 | \n", - "{Sweden, United States, Spain, France, Austral... | \n", - "{Lithuania, Cyprus, Czechia, Greece, Germany, ... | \n", - "[Austria, Germany, Hungary, Sweden, United Kin... | \n", - "
| FF0680 | \n", - "{Sweden, United States, Canada, Brazil, German... | \n", - "{Portugal, Sweden, Czechia, France, Finland, G... | \n", - "[Austria, Germany, Hungary, Italy, Netherlands... | \n", - "
| ... | \n", - "... | \n", - "... | \n", - "... | \n", - "
| ZTC540693 | \n", - "{United States} | \n", - "{United States} | \n", - "[United States] | \n", - "
| ZU5A21A | \n", - "{} | \n", - "{United States} | \n", - "[] | \n", - "
| ZZ3249 | \n", - "{Hungary} | \n", - "{United States} | \n", - "[] | \n", - "
| 026L20A | \n", - "{United States} | \n", - "{United States} | \n", - "[United States] | \n", - "
| ÂK4244 | \n", - "{Hungary} | \n", - "{United Kingdom} | \n", - "[] | \n", - "
5188 rows × 3 columns
\n", - "| \n", - " | Countries Clicked | \n", - "Countries Before Deletion | \n", - "Countries Intersection | \n", - "
|---|---|---|---|
| VAX_LOT | \n", - "\n", - " | \n", - " | \n", - " |
| FH3253 | \n", - "{Australia, Malaysia} | \n", - "{Austria, Switzerland, Belgium} | \n", - "[] | \n", - "
| FE2090 | \n", - "{United Kingdom, Australia, Germany} | \n", - "{Estonia, Spain, France, Denmark, South Africa... | \n", - "[] | \n", - "
| FF0884 | \n", - "{Australia, Malaysia, Japan} | \n", - "{Ireland, Denmark} | \n", - "[] | \n", - "
| FG2872 | \n", - "{Malaysia, Brazil} | \n", - "{Georgia, United States, Israel} | \n", - "[] | \n", - "
| FG3716 | \n", - "{Australia, Italy, Malaysia} | \n", - "{Estonia, Spain, France, Norway, Austria, Denm... | \n", - "[] | \n", - "
| ... | \n", - "... | \n", - "... | \n", - "... | \n", - "
| YFL8095 | \n", - "{Japan} | \n", - "{United States} | \n", - "[] | \n", - "
| Z07AZ1A | \n", - "{Brazil} | \n", - "{United States} | \n", - "[] | \n", - "
| ZU5A21A | \n", - "{} | \n", - "{United States} | \n", - "[] | \n", - "
| ZZ3249 | \n", - "{Hungary} | \n", - "{United States} | \n", - "[] | \n", - "
| ÂK4244 | \n", - "{Hungary} | \n", - "{United Kingdom} | \n", - "[] | \n", - "
1345 rows × 3 columns
\n", - "| \n", - " | Countries Clicked | \n", - "Countries Before Deletion | \n", - "Countries Intersection | \n", - "
|---|---|---|---|
| VAX_LOT | \n", - "\n", - " | \n", - " | \n", - " |
| FE6208 | \n", - "{Germany, Poland, United Kingdom, Australia, B... | \n", - "{Lithuania, Spain, Czechia, Greece, Germany, A... | \n", - "[Austria, Germany, Hungary, Spain, Sweden, Uni... | \n", - "
| 039K20A | \n", - "{Sweden, United States, Spain, France, Austral... | \n", - "{United States} | \n", - "[United States] | \n", - "
| EN6201 | \n", - "{Sweden, United States, Spain, France, Austral... | \n", - "{United Kingdom, United States} | \n", - "[United Kingdom, United States] | \n", - "
| FD6840 | \n", - "{Sweden, United States, Spain, France, Austral... | \n", - "{Lithuania, Cyprus, Czechia, Greece, Germany, ... | \n", - "[Austria, Germany, Hungary, Sweden, United Kin... | \n", - "
| FF0680 | \n", - "{Sweden, United States, Canada, Brazil, German... | \n", - "{Portugal, Sweden, Czechia, France, Finland, G... | \n", - "[Austria, Germany, Hungary, Italy, Netherlands... | \n", - "
| ... | \n", - "... | \n", - "... | \n", - "... | \n", - "
| XXXXXX | \n", - "{United States} | \n", - "{United States} | \n", - "[United States] | \n", - "
| XYZ98765 | \n", - "{Germany} | \n", - "{Germany} | \n", - "[Germany] | \n", - "
| Z01AZ1A | \n", - "{United States} | \n", - "{United States} | \n", - "[United States] | \n", - "
| ZTC540693 | \n", - "{United States} | \n", - "{United States} | \n", - "[United States] | \n", - "
| 026L20A | \n", - "{United States} | \n", - "{United States} | \n", - "[United States] | \n", - "
3843 rows × 3 columns
\n", - "