Files
HowBadIsMyBatch/src/HowBadIsMyBatch.ipynb
frankknoll c2f4182fea merging
2023-06-09 16:46:49 +02:00

392 lines
11 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "9de5907f-18f5-4cb1-903e-26028ff1fa03",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"pd.set_option('display.max_rows', 100)\n",
"pd.set_option('display.max_columns', None)\n",
"\n",
"import os\n",
"from IOUtils import IOUtils\n",
"from VAERSFileDownloader import updateVAERSFiles\n",
"from datetime import datetime\n",
"from DateProvider import DateProvider\n",
"from InternationalVaersCovid19Provider import getInternationalVaersCovid19, get_international_VAERSVAX_VAERSSYMPTOMS_Covid19\n",
"from BatchCodeTableHtmlUpdater import updateBatchCodeTableHtmlFile, saveLastUpdatedBatchCodeTable\n",
"from BatchCodeTablePersister import createGlobalBatchCodeTable\n",
"from SymptomByBatchcodeTableFactory import SymptomByBatchcodeTableFactory\n",
"from HistogramFactoryAndPersister import createAndSaveGlobalHistograms\n",
"from BatchCodeTableFactory import BatchCodeTableFactory\n",
"from Column2DataframeAdder import addColumn2Dataframe\n",
"from CountriesByBatchcodeProvider import getCountryCountsByBatchcodeTable\n",
"from CountryCountsByBatchcodeTable2BarChartDescriptionTableConverter import CountryCountsByBatchcodeTable2BarChartDescriptionTableConverter\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d1e4fa9e",
"metadata": {},
"outputs": [],
"source": [
"print(datetime.now().strftime(\"%d.%m.%Y, %H:%M:%S Uhr\"))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ffad1c04",
"metadata": {},
"outputs": [],
"source": [
"dateProvider = DateProvider()\n",
"print(' lastUpdated:', dateProvider.getLastUpdated())\n",
"print('lastUpdatedDataSource:', dateProvider.getLastUpdatedDataSource())\n",
"needsUpdate = dateProvider.needsUpdate()\n",
"print('needsUpdate:', needsUpdate)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "194b7357",
"metadata": {},
"outputs": [],
"source": [
"years_from_start_of_COVID_vaccination_to_present = list(range(2020, datetime.now().year + 1))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a793dff0",
"metadata": {},
"outputs": [],
"source": [
"updateVAERSFiles(\n",
" years = years_from_start_of_COVID_vaccination_to_present,\n",
" workingDirectory = os.getcwd())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "58333a19",
"metadata": {},
"outputs": [],
"source": [
"international_VAERSVAX_Covid19, international_VAERSSYMPTOMS = get_international_VAERSVAX_VAERSSYMPTOMS_Covid19(years = years_from_start_of_COVID_vaccination_to_present)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f10b558f",
"metadata": {},
"outputs": [],
"source": [
"international_VAERSVAX_Covid19"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3e0908fe",
"metadata": {},
"outputs": [],
"source": [
"international_VAERSSYMPTOMS"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "781ac80e",
"metadata": {},
"outputs": [],
"source": [
"internationalVaersCovid19 = getInternationalVaersCovid19(dataDir = 'VAERS', years = years_from_start_of_COVID_vaccination_to_present)\n",
"internationalVaersCovid19"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0915aa5a",
"metadata": {},
"outputs": [],
"source": [
"batchCodeTable = createGlobalBatchCodeTable(\n",
" deleteEntriesWithADRsLessThanOrEqual = 2,\n",
" minADRsForLethality = 100,\n",
" batchCodeTableFactory = BatchCodeTableFactory(internationalVaersCovid19))\n",
"batchCodeTable"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d8e81ffc",
"metadata": {},
"outputs": [],
"source": [
"IOUtils.saveDataFrameAsJson(batchCodeTable, '../docs/data/batchCodeTables/Global.json')\n",
"saveLastUpdatedBatchCodeTable(\n",
" DateProvider().getLastUpdatedDataSource(),\n",
" batchCodeTableHtmlFile=\"../docs/batchCodes.html\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "189a639e",
"metadata": {},
"outputs": [],
"source": [
"updateBatchCodeTableHtmlFile(batchCodeTable, batchCodeTableHtmlFile=\"../docs/HowBadIsMyBatch.html\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "19fa050b",
"metadata": {},
"outputs": [],
"source": [
"symptomByBatchcodeTable = SymptomByBatchcodeTableFactory.createSymptomByBatchcodeTable(\n",
" VAERSVAX = international_VAERSVAX_Covid19[international_VAERSVAX_Covid19['VAX_LOT'].isin(batchCodeTable['Batch'])],\n",
" VAERSSYMPTOMS = international_VAERSSYMPTOMS)\n",
"symptomByBatchcodeTable"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ec2a692e",
"metadata": {},
"outputs": [],
"source": [
"createAndSaveGlobalHistograms(symptomByBatchcodeTable, batchCodeTable)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9588a10c",
"metadata": {},
"outputs": [],
"source": [
"countryCountsByBatchcode = getCountryCountsByBatchcodeTable()\n",
"countryCountsByBatchcode"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "70fcc420",
"metadata": {},
"outputs": [],
"source": [
"countryCountsByBatchcode.to_excel('tmp/countryCountsByBatchcode.xlsx')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7a67b1f6",
"metadata": {},
"outputs": [],
"source": [
"barChartDescriptionTable = CountryCountsByBatchcodeTable2BarChartDescriptionTableConverter.convert2BarChartDescriptionTable(countryCountsByBatchcode)\n",
"barChartDescriptionTable"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8e1b471a",
"metadata": {},
"outputs": [],
"source": [
"barChartDescriptionTable['BAR_CHART_DESCRIPTION'].to_json('../docs/data/barChartDescriptionTable.json')"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "2d93b511",
"metadata": {},
"source": [
"# Vaccine Distribution by Zipcode"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cfcbad44",
"metadata": {},
"outputs": [],
"source": [
"vaccineDistributionByZipcode = pd.read_excel(\n",
" io = 'data/Amended-22-01962-Pfizer-2022-0426-pulled-2022-0823.xlsx',\n",
" usecols = ['PROVIDER_NAME', 'ZIPCODE_SHP', 'LOT_NUMBER', 'DOSES_SHIPPED'],\n",
" dtype = {'DOSES_SHIPPED': 'int'})\n",
"vaccineDistributionByZipcode"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "59c745d2",
"metadata": {},
"outputs": [],
"source": [
"from VaccineDistributionByZipcodeSimplifier import VaccineDistributionByZipcodeSimplifier\n",
"\n",
"vaccineDistributionByZipcode = VaccineDistributionByZipcodeSimplifier.sumDoses(vaccineDistributionByZipcode)\n",
"vaccineDistributionByZipcode"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8cd250f7",
"metadata": {},
"outputs": [],
"source": [
"OVERALL_DOSES_SHIPPED_by_LOT_NUMBER = vaccineDistributionByZipcode.groupby('LOT_NUMBER').agg(OVERALL_DOSES_SHIPPED = pd.NamedAgg(column = 'DOSES_SHIPPED', aggfunc = sum))\n",
"OVERALL_DOSES_SHIPPED_by_LOT_NUMBER"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1a5667be",
"metadata": {},
"outputs": [],
"source": [
"vaccineDistributionByZipcode = addColumn2Dataframe(dataframe = vaccineDistributionByZipcode, column = OVERALL_DOSES_SHIPPED_by_LOT_NUMBER)\n",
"vaccineDistributionByZipcode"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f77505c6",
"metadata": {},
"outputs": [],
"source": [
"from ADR_by_Batchcode_Table_Factory import create_ADR_by_Batchcode_Table_4USA\n",
"\n",
"ADR_by_Batchcode_Table_4USA = create_ADR_by_Batchcode_Table_4USA(internationalVaersCovid19)\n",
"ADR_by_Batchcode_Table_4USA"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "99120c77",
"metadata": {},
"outputs": [],
"source": [
"vaccineDistributionByZipcode = addColumn2Dataframe(dataframe = vaccineDistributionByZipcode, column = ADR_by_Batchcode_Table_4USA)\n",
"vaccineDistributionByZipcode"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3276cce7",
"metadata": {},
"outputs": [],
"source": [
"vaccineDistributionByZipcode['Statistical Number of Adverse Reaction Reports'] = (vaccineDistributionByZipcode['DOSES_SHIPPED'] / vaccineDistributionByZipcode['OVERALL_DOSES_SHIPPED'] * vaccineDistributionByZipcode['Adverse Reaction Reports']).round(2)\n",
"vaccineDistributionByZipcode['Statistical Number of Adverse Reaction Reports (per 100,000)'] = (vaccineDistributionByZipcode['DOSES_SHIPPED'] / vaccineDistributionByZipcode['OVERALL_DOSES_SHIPPED'] * 100000).round().astype(int)\n",
"vaccineDistributionByZipcode"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "10cf731f",
"metadata": {},
"outputs": [],
"source": [
"vaccineDistributionByZipcode = vaccineDistributionByZipcode[['PROVIDER_NAME', 'ZIPCODE_SHP', 'LOT_NUMBER', 'DOSES_SHIPPED', 'Statistical Number of Adverse Reaction Reports', 'Statistical Number of Adverse Reaction Reports (per 100,000)']]\n",
"vaccineDistributionByZipcode"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0c2020e9",
"metadata": {},
"outputs": [],
"source": [
"vaccineDistributionByZipcode = vaccineDistributionByZipcode.rename(\n",
" columns = {\n",
" 'PROVIDER_NAME': 'Provider',\n",
" 'ZIPCODE_SHP': 'ZIP Code',\n",
" 'LOT_NUMBER': 'Lot Number',\n",
" 'DOSES_SHIPPED': 'Doses Shipped'\n",
" })\n",
"vaccineDistributionByZipcode"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f68c72d0",
"metadata": {},
"outputs": [],
"source": [
"# vaccineDistributionByZipcode.to_excel('tmp/Amended-22-01962-Pfizer-2022-0426-pulled-2022-0823_sumDoses.xlsx')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9b8f0b6e",
"metadata": {},
"outputs": [],
"source": [
"from IOUtils import IOUtils\n",
"\n",
"IOUtils.saveDataFrameAsJson(vaccineDistributionByZipcode, '../docs/data/vaccineDistributionByZipcode/VaccineDistributionByZipcode.json')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "howbadismybatch-venv-kernel",
"language": "python",
"name": "howbadismybatch-venv-kernel"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.15"
},
"vscode": {
"interpreter": {
"hash": "1bce2b9b19ce5f16d695ff75ac05095b3e564c169ff454b58b87cb796c0695b8"
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}