Files
HowBadIsMyBatch/src/HowBadIsMyBatch.ipynb
2023-10-03 23:50:46 +02:00

449 lines
13 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "9de5907f-18f5-4cb1-903e-26028ff1fa03",
"metadata": {},
"outputs": [],
"source": [
"from CountryCountsByBatchcodeTable2BarChartDescriptionTableConverter import CountryCountsByBatchcodeTable2BarChartDescriptionTableConverter\n",
"from JensenShannonDistance2BarChartDescriptionColumnAdder import JensenShannonDistance2BarChartDescriptionColumnAdder\n",
"from CountriesByBatchcodeProvider import getCountryCountsByBatchcodeTable, filterByBatchcodes\n",
"from Column2DataframeAdder import addColumn2Dataframe\n",
"from BatchCodeTableFactory import BatchCodeTableFactory\n",
"from HistogramFactoryAndPersister import createAndSaveGlobalHistograms\n",
"from SymptomByBatchcodeTableFactory import SymptomByBatchcodeTableFactory\n",
"from BatchCodeTablePersister import createGlobalBatchCodeTable\n",
"from BatchCodeTableHtmlUpdater import updateBatchCodeTableHtmlFile, saveLastUpdated2HtmlFile\n",
"from InternationalVaersCovid19Provider import getInternationalVaersCovid19, getInternationalVaersCovid19BeforeDeletion, get_international_VAERSVAX_VAERSSYMPTOMS_Covid19\n",
"from DateProvider import DateProvider\n",
"from BarChartDescriptionTable2DictionaryConverter import BarChartDescriptionTable2DictionaryConverter\n",
"from CountryColumnsMerger import CountryColumnsMerger\n",
"from datetime import datetime\n",
"from VAERSFileDownloader import updateVAERSFiles\n",
"from GoogleAnalytics.RegionCountsByBatchcodeTablesMerger import RegionCountsByBatchcodeTablesMerger\n",
"from IOUtils import IOUtils\n",
"import os\n",
"import pandas as pd\n",
"\n",
"pd.set_option('display.max_rows', 100)\n",
"pd.set_option('display.max_columns', None)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d1e4fa9e",
"metadata": {},
"outputs": [],
"source": [
"print(datetime.now().strftime(\"%d.%m.%Y, %H:%M:%S Uhr\"))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ffad1c04",
"metadata": {},
"outputs": [],
"source": [
"dateProvider = DateProvider()\n",
"print(' lastUpdated:', dateProvider.getLastUpdated())\n",
"print('lastUpdatedDataSource:', dateProvider.getLastUpdatedDataSource())\n",
"needsUpdate = dateProvider.needsUpdate()\n",
"print('needsUpdate:', needsUpdate)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "194b7357",
"metadata": {},
"outputs": [],
"source": [
"years_from_start_of_COVID_vaccination_to_present = list(range(2020, datetime.now().year + 1))\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a793dff0",
"metadata": {},
"outputs": [],
"source": [
"updateVAERSFiles(\n",
" years = years_from_start_of_COVID_vaccination_to_present,\n",
" workingDirectory = os.getcwd())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "58333a19",
"metadata": {},
"outputs": [],
"source": [
"international_VAERSVAX_Covid19, international_VAERSSYMPTOMS = get_international_VAERSVAX_VAERSSYMPTOMS_Covid19(years = years_from_start_of_COVID_vaccination_to_present)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f10b558f",
"metadata": {},
"outputs": [],
"source": [
"international_VAERSVAX_Covid19\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3e0908fe",
"metadata": {},
"outputs": [],
"source": [
"international_VAERSSYMPTOMS\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "781ac80e",
"metadata": {},
"outputs": [],
"source": [
"internationalVaersCovid19 = getInternationalVaersCovid19(dataDir = 'VAERS', years = years_from_start_of_COVID_vaccination_to_present)\n",
"internationalVaersCovid19"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "29366235",
"metadata": {},
"outputs": [],
"source": [
"internationalVaersCovid19 = CountryColumnsMerger.mergeCountryColumnOfSrcIntoDst(\n",
" src = getInternationalVaersCovid19BeforeDeletion(),\n",
" dst = internationalVaersCovid19)\n",
"internationalVaersCovid19"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0915aa5a",
"metadata": {},
"outputs": [],
"source": [
"batchCodeTable = createGlobalBatchCodeTable(\n",
" deleteEntriesWithADRsLessThanOrEqual = 2,\n",
" minADRsForLethality = 100,\n",
" batchCodeTableFactory = BatchCodeTableFactory(internationalVaersCovid19))\n",
"batchCodeTable"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d8e81ffc",
"metadata": {},
"outputs": [],
"source": [
"IOUtils.saveDataFrameAsJson(batchCodeTable, '../docs/data/batchCodeTables/Global.json')\n",
"saveLastUpdated2HtmlFile(dateProvider.getLastUpdatedDataSource(), \"../docs/batchCodes.html\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "189a639e",
"metadata": {},
"outputs": [],
"source": [
"updateBatchCodeTableHtmlFile(\n",
" batchCodeTable,\n",
" batchCodeTableHtmlFile = \"../docs/HowBadIsMyBatch.html\",\n",
" lastUpdated = dateProvider.getLastUpdatedDataSource())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "19fa050b",
"metadata": {},
"outputs": [],
"source": [
"symptomByBatchcodeTable = SymptomByBatchcodeTableFactory.createSymptomByBatchcodeTable(\n",
" VAERSVAX = international_VAERSVAX_Covid19[international_VAERSVAX_Covid19['VAX_LOT'].isin(batchCodeTable['Batch'])],\n",
" VAERSSYMPTOMS = international_VAERSSYMPTOMS)\n",
"symptomByBatchcodeTable"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ec2a692e",
"metadata": {},
"outputs": [],
"source": [
"createAndSaveGlobalHistograms(symptomByBatchcodeTable, batchCodeTable)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "70fcc420",
"metadata": {},
"outputs": [],
"source": [
"countryCountsByBatchcode = filterByBatchcodes(getCountryCountsByBatchcodeTable(), batchCodeTable['Batch'].values)\n",
"countryCountsByBatchcode"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0257505b",
"metadata": {},
"outputs": [],
"source": [
"regionCountsByClickedBatchcodeTable = RegionCountsByBatchcodeTablesMerger.getRegionCountsByClickedBatchcode('data/GoogleAnalytics')\n",
"regionCountsByClickedBatchcodeTable"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c276eede",
"metadata": {},
"outputs": [],
"source": [
"df = regionCountsByClickedBatchcodeTable.loc[(slice(None), 'Germany', slice(None)), :].sort_values(by = 'VAX_LOT', ascending = False)\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "64c57c23",
"metadata": {},
"outputs": [],
"source": [
"df.to_excel('tmp/germany.xlsx')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0d755bc8",
"metadata": {},
"outputs": [],
"source": [
"# df = countryCountsByBatchcode.loc[(slice(None), 'Germany'), :][['COUNTRY_COUNT_BY_VAX_LOT Clicked']].sort_values(by = 'COUNTRY_COUNT_BY_VAX_LOT Clicked', ascending = False)\n",
"df = countryCountsByBatchcode.loc[(slice(None), 'Germany'), :].sort_values(by = 'COUNTRY_COUNT_BY_VAX_LOT Clicked', ascending = False)\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7a67b1f6",
"metadata": {},
"outputs": [],
"source": [
"barChartDescriptionTable = CountryCountsByBatchcodeTable2BarChartDescriptionTableConverter.convert2BarChartDescriptionTable(countryCountsByBatchcode)\n",
"barChartDescriptionTable"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "13bdd443",
"metadata": {},
"outputs": [],
"source": [
"barChartDescriptionTable = JensenShannonDistance2BarChartDescriptionColumnAdder.addJensenShannonDistance2BarChartDescriptionColumn(barChartDescriptionTable)\n",
"barChartDescriptionTable"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "163830ba",
"metadata": {},
"outputs": [],
"source": [
"IOUtils.saveDictAsJson(\n",
" BarChartDescriptionTable2DictionaryConverter.convert2Dictionary(barChartDescriptionTable, internationalVaersCovid19),\n",
" '../docs/data/barChartDescriptionTable.json')\n"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "2d93b511",
"metadata": {},
"source": [
"# Vaccine Distribution by Zipcode"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cfcbad44",
"metadata": {},
"outputs": [],
"source": [
"vaccineDistributionByZipcode = pd.read_excel(\n",
" io = 'data/Amended-22-01962-Pfizer-2022-0426-pulled-2022-0823.xlsx',\n",
" usecols = ['PROVIDER_NAME', 'ZIPCODE_SHP', 'LOT_NUMBER', 'DOSES_SHIPPED'])\n",
"vaccineDistributionByZipcode"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "59c745d2",
"metadata": {},
"outputs": [],
"source": [
"from VaccineDistributionByZipcodeSimplifier import VaccineDistributionByZipcodeSimplifier\n",
"\n",
"vaccineDistributionByZipcode = VaccineDistributionByZipcodeSimplifier.sumDoses(vaccineDistributionByZipcode)\n",
"vaccineDistributionByZipcode"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8cd250f7",
"metadata": {},
"outputs": [],
"source": [
"OVERALL_DOSES_SHIPPED_by_LOT_NUMBER = vaccineDistributionByZipcode.groupby('LOT_NUMBER').agg(OVERALL_DOSES_SHIPPED = pd.NamedAgg(column = 'DOSES_SHIPPED', aggfunc = sum))\n",
"OVERALL_DOSES_SHIPPED_by_LOT_NUMBER"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1a5667be",
"metadata": {},
"outputs": [],
"source": [
"vaccineDistributionByZipcode = addColumn2Dataframe(dataframe = vaccineDistributionByZipcode, column = OVERALL_DOSES_SHIPPED_by_LOT_NUMBER)\n",
"vaccineDistributionByZipcode"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f77505c6",
"metadata": {},
"outputs": [],
"source": [
"from ADR_by_Batchcode_Table_Factory import create_ADR_by_Batchcode_Table_4USA\n",
"\n",
"ADR_by_Batchcode_Table_4USA = create_ADR_by_Batchcode_Table_4USA(internationalVaersCovid19)\n",
"ADR_by_Batchcode_Table_4USA"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "99120c77",
"metadata": {},
"outputs": [],
"source": [
"vaccineDistributionByZipcode = addColumn2Dataframe(dataframe = vaccineDistributionByZipcode, column = ADR_by_Batchcode_Table_4USA)\n",
"vaccineDistributionByZipcode"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3276cce7",
"metadata": {},
"outputs": [],
"source": [
"vaccineDistributionByZipcode['Statistical Number of Adverse Reaction Reports'] = (vaccineDistributionByZipcode['DOSES_SHIPPED'] / vaccineDistributionByZipcode['OVERALL_DOSES_SHIPPED'] * vaccineDistributionByZipcode['Adverse Reaction Reports']).round(2)\n",
"vaccineDistributionByZipcode['Statistical Number of Adverse Reaction Reports (per 100,000)'] = (vaccineDistributionByZipcode['DOSES_SHIPPED'] / vaccineDistributionByZipcode['OVERALL_DOSES_SHIPPED'] * 100000).round().astype(int)\n",
"vaccineDistributionByZipcode"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "10cf731f",
"metadata": {},
"outputs": [],
"source": [
"vaccineDistributionByZipcode = vaccineDistributionByZipcode[['PROVIDER_NAME', 'ZIPCODE_SHP', 'LOT_NUMBER', 'DOSES_SHIPPED', 'Statistical Number of Adverse Reaction Reports', 'Statistical Number of Adverse Reaction Reports (per 100,000)']]\n",
"vaccineDistributionByZipcode"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0c2020e9",
"metadata": {},
"outputs": [],
"source": [
"vaccineDistributionByZipcode = vaccineDistributionByZipcode.rename(\n",
" columns = {\n",
" 'PROVIDER_NAME': 'Provider',\n",
" 'ZIPCODE_SHP': 'ZIP Code',\n",
" 'LOT_NUMBER': 'Lot Number',\n",
" 'DOSES_SHIPPED': 'Doses Shipped'\n",
" })\n",
"vaccineDistributionByZipcode"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f68c72d0",
"metadata": {},
"outputs": [],
"source": [
"# vaccineDistributionByZipcode.to_excel('tmp/Amended-22-01962-Pfizer-2022-0426-pulled-2022-0823_sumDoses.xlsx')\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9b8f0b6e",
"metadata": {},
"outputs": [],
"source": [
"IOUtils.saveDataFrameAsJson(vaccineDistributionByZipcode, '../docs/data/vaccineDistributionByZipcode/VaccineDistributionByZipcode.json')\n",
"saveLastUpdated2HtmlFile(dateProvider.getLastUpdatedDataSource(), \"../docs/VaccineDistributionByZipcode.html\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "howbadismybatch-venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.15"
}
},
"nbformat": 4,
"nbformat_minor": 5
}