Files
HowBadIsMyBatch/src/HowBadIsMyBatch.ipynb
frankknoll 9d51ef94e7 refactoring
2023-10-11 17:26:13 +02:00

488 lines
14 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "9de5907f-18f5-4cb1-903e-26028ff1fa03",
"metadata": {},
"outputs": [],
"source": [
"from CountryCountsByBatchcodeTable2BarChartDescriptionTableConverter import CountryCountsByBatchcodeTable2BarChartDescriptionTableConverter\n",
"from JensenShannonDistance2BarChartDescriptionColumnAdder import JensenShannonDistance2BarChartDescriptionColumnAdder\n",
"from CountriesByBatchcodeProvider import getCountryCountsByBatchcodeTable, filterByBatchcodes\n",
"from Column2DataframeAdder import addColumn2Dataframe\n",
"from BatchCodeTableFactory import BatchCodeTableFactory\n",
"from HistogramFactoryAndPersister import createAndSaveGlobalHistograms\n",
"from SymptomByBatchcodeTableFactory import SymptomByBatchcodeTableFactory\n",
"from BatchCodeTablePersister import createGlobalBatchCodeTable\n",
"from BatchCodeTableHtmlUpdater import updateBatchCodeTableHtmlFile, saveLastUpdated2HtmlFile\n",
"from InternationalVaersCovid19Provider import getInternationalVaersCovid19, getInternationalVaersCovid19BeforeDeletion, get_international_VAERSVAX_VAERSSYMPTOMS_Covid19\n",
"from DateProvider import DateProvider\n",
"from BarChartDescriptionTable2DictionaryConverter import BarChartDescriptionTable2DictionaryConverter\n",
"from CountryColumnsMerger import CountryColumnsMerger\n",
"from datetime import datetime\n",
"from VAERSFileDownloader import updateVAERSFiles\n",
"from GoogleAnalytics.CityCountsByBatchcodeTablesMerger import CityCountsByBatchcodeTablesMerger\n",
"from IOUtils import IOUtils\n",
"from pathlib import Path\n",
"from GoogleAnalytics.Helper import persistCityCountsByClickedBatchcodeTables\n",
"import os\n",
"import pandas as pd\n",
"\n",
"pd.set_option('display.max_rows', 100)\n",
"pd.set_option('display.max_columns', None)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d1e4fa9e",
"metadata": {},
"outputs": [],
"source": [
"print(datetime.now().strftime(\"%d.%m.%Y, %H:%M:%S Uhr\"))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ffad1c04",
"metadata": {},
"outputs": [],
"source": [
"dateProvider = DateProvider()\n",
"print(' lastUpdated:', dateProvider.getLastUpdated())\n",
"print('lastUpdatedDataSource:', dateProvider.getLastUpdatedDataSource())\n",
"needsUpdate = dateProvider.needsUpdate()\n",
"print('needsUpdate:', needsUpdate)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "194b7357",
"metadata": {},
"outputs": [],
"source": [
"years_from_start_of_COVID_vaccination_to_present = list(range(2020, datetime.now().year + 1))\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a793dff0",
"metadata": {},
"outputs": [],
"source": [
"updateVAERSFiles(\n",
" years = years_from_start_of_COVID_vaccination_to_present,\n",
" workingDirectory = os.getcwd())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "58333a19",
"metadata": {},
"outputs": [],
"source": [
"international_VAERSVAX_Covid19, international_VAERSSYMPTOMS = get_international_VAERSVAX_VAERSSYMPTOMS_Covid19(years = years_from_start_of_COVID_vaccination_to_present)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f10b558f",
"metadata": {},
"outputs": [],
"source": [
"international_VAERSVAX_Covid19\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3e0908fe",
"metadata": {},
"outputs": [],
"source": [
"international_VAERSSYMPTOMS\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "781ac80e",
"metadata": {},
"outputs": [],
"source": [
"internationalVaersCovid19 = getInternationalVaersCovid19(dataDir = 'VAERS', years = years_from_start_of_COVID_vaccination_to_present)\n",
"internationalVaersCovid19"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "29366235",
"metadata": {},
"outputs": [],
"source": [
"internationalVaersCovid19 = CountryColumnsMerger.mergeCountryColumnOfSrcIntoDst(\n",
" src = getInternationalVaersCovid19BeforeDeletion(),\n",
" dst = internationalVaersCovid19)\n",
"internationalVaersCovid19"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0915aa5a",
"metadata": {},
"outputs": [],
"source": [
"batchCodeTable = createGlobalBatchCodeTable(\n",
" deleteEntriesWithADRsLessThanOrEqual = 2,\n",
" minADRsForLethality = 100,\n",
" batchCodeTableFactory = BatchCodeTableFactory(internationalVaersCovid19))\n",
"batchCodeTable"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d8e81ffc",
"metadata": {},
"outputs": [],
"source": [
"IOUtils.saveDataFrameAsJson(batchCodeTable, '../docs/data/batchCodeTables/Global.json')\n",
"saveLastUpdated2HtmlFile(dateProvider.getLastUpdatedDataSource(), \"../docs/batchCodes.html\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "189a639e",
"metadata": {},
"outputs": [],
"source": [
"updateBatchCodeTableHtmlFile(\n",
" batchCodeTable,\n",
" batchCodeTableHtmlFile = \"../docs/HowBadIsMyBatch.html\",\n",
" lastUpdated = dateProvider.getLastUpdatedDataSource())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "19fa050b",
"metadata": {},
"outputs": [],
"source": [
"symptomByBatchcodeTable = SymptomByBatchcodeTableFactory.createSymptomByBatchcodeTable(\n",
" VAERSVAX = international_VAERSVAX_Covid19[international_VAERSVAX_Covid19['VAX_LOT'].isin(batchCodeTable['Batch'])],\n",
" VAERSSYMPTOMS = international_VAERSSYMPTOMS)\n",
"symptomByBatchcodeTable"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ec2a692e",
"metadata": {},
"outputs": [],
"source": [
"createAndSaveGlobalHistograms(symptomByBatchcodeTable, batchCodeTable)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "70fcc420",
"metadata": {},
"outputs": [],
"source": [
"countryCountsByBatchcode = filterByBatchcodes(getCountryCountsByBatchcodeTable(), batchCodeTable['Batch'].values)\n",
"countryCountsByBatchcode"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7a67b1f6",
"metadata": {},
"outputs": [],
"source": [
"barChartDescriptionTable = CountryCountsByBatchcodeTable2BarChartDescriptionTableConverter.convert2BarChartDescriptionTable(countryCountsByBatchcode)\n",
"barChartDescriptionTable"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "13bdd443",
"metadata": {},
"outputs": [],
"source": [
"barChartDescriptionTable = JensenShannonDistance2BarChartDescriptionColumnAdder.addJensenShannonDistance2BarChartDescriptionColumn(barChartDescriptionTable)\n",
"barChartDescriptionTable"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "163830ba",
"metadata": {},
"outputs": [],
"source": [
"IOUtils.saveDictAsJson(\n",
" BarChartDescriptionTable2DictionaryConverter.convert2Dictionary(barChartDescriptionTable, internationalVaersCovid19),\n",
" '../docs/data/barChartDescriptionTable.json')\n"
]
},
{
"cell_type": "markdown",
"id": "c5a66a3b",
"metadata": {},
"source": [
"# Google Analytics"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9740c40b",
"metadata": {},
"outputs": [],
"source": [
"cityCountsByClickedBatchcodeTable = CityCountsByBatchcodeTablesMerger.getCityCountsByClickedBatchcode('data/GoogleAnalytics')\n",
"cityCountsByClickedBatchcodeTable"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0dac0ea6",
"metadata": {},
"outputs": [],
"source": [
"persistCityCountsByClickedBatchcodeTables(\n",
" dataDir = Path('tmp/cityCountsByClickedBatchcodeTables'),\n",
" n = 10,\n",
" cityCountsByClickedBatchcodeTable = cityCountsByClickedBatchcodeTable)\n",
"# import generated xlsx files in dataDir into https://www.google.com/mymaps"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "2d93b511",
"metadata": {},
"source": [
"# Vaccine Distribution by Zipcode"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cfcbad44",
"metadata": {},
"outputs": [],
"source": [
"vaccineDistributionByZipcode = pd.read_excel(\n",
" io = 'data/Amended-22-01962-Pfizer-2022-0426-pulled-2022-0823.xlsx',\n",
" usecols = ['PROVIDER_NAME', 'ZIPCODE_SHP', 'LOT_NUMBER', 'DOSES_SHIPPED'])\n",
"vaccineDistributionByZipcode"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "59c745d2",
"metadata": {},
"outputs": [],
"source": [
"from VaccineDistributionByZipcodeSimplifier import VaccineDistributionByZipcodeSimplifier\n",
"\n",
"vaccineDistributionByZipcode = VaccineDistributionByZipcodeSimplifier.sumDoses(vaccineDistributionByZipcode)\n",
"vaccineDistributionByZipcode"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8cd250f7",
"metadata": {},
"outputs": [],
"source": [
"OVERALL_DOSES_SHIPPED_by_LOT_NUMBER = (\n",
" vaccineDistributionByZipcode\n",
" .groupby('LOT_NUMBER')\n",
" .agg(OVERALL_DOSES_SHIPPED = pd.NamedAgg(column = 'DOSES_SHIPPED', aggfunc = sum)))\n",
"OVERALL_DOSES_SHIPPED_by_LOT_NUMBER"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1a5667be",
"metadata": {},
"outputs": [],
"source": [
"vaccineDistributionByZipcode = addColumn2Dataframe(dataframe = vaccineDistributionByZipcode, column = OVERALL_DOSES_SHIPPED_by_LOT_NUMBER)\n",
"vaccineDistributionByZipcode"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f77505c6",
"metadata": {},
"outputs": [],
"source": [
"from ADR_by_Batchcode_Table_Factory import create_ADR_by_Batchcode_Table_4USA\n",
"\n",
"ADR_by_Batchcode_Table_4USA = create_ADR_by_Batchcode_Table_4USA(internationalVaersCovid19)\n",
"ADR_by_Batchcode_Table_4USA"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "99120c77",
"metadata": {},
"outputs": [],
"source": [
"vaccineDistributionByZipcode = addColumn2Dataframe(dataframe = vaccineDistributionByZipcode, column = ADR_by_Batchcode_Table_4USA)\n",
"vaccineDistributionByZipcode"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3276cce7",
"metadata": {},
"outputs": [],
"source": [
"vaccineDistributionByZipcode['Statistical Number of Adverse Reaction Reports'] = (vaccineDistributionByZipcode['DOSES_SHIPPED'] / vaccineDistributionByZipcode['OVERALL_DOSES_SHIPPED'] * vaccineDistributionByZipcode['Adverse Reaction Reports']).round(2)\n",
"vaccineDistributionByZipcode['Statistical Number of Adverse Reaction Reports (per 100,000)'] = (vaccineDistributionByZipcode['DOSES_SHIPPED'] / vaccineDistributionByZipcode['OVERALL_DOSES_SHIPPED'] * 100000).round().astype(int)\n",
"vaccineDistributionByZipcode"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "10cf731f",
"metadata": {},
"outputs": [],
"source": [
"vaccineDistributionByZipcode = vaccineDistributionByZipcode[['PROVIDER_NAME', 'ZIPCODE_SHP', 'LOT_NUMBER', 'DOSES_SHIPPED', 'Statistical Number of Adverse Reaction Reports', 'Statistical Number of Adverse Reaction Reports (per 100,000)']]\n",
"vaccineDistributionByZipcode"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0c2020e9",
"metadata": {},
"outputs": [],
"source": [
"vaccineDistributionByZipcode = vaccineDistributionByZipcode.rename(\n",
" columns = {\n",
" 'PROVIDER_NAME': 'Provider',\n",
" 'ZIPCODE_SHP': 'ZIP Code',\n",
" 'LOT_NUMBER': 'Lot Number',\n",
" 'DOSES_SHIPPED': 'Doses Shipped'\n",
" })\n",
"vaccineDistributionByZipcode"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f68c72d0",
"metadata": {},
"outputs": [],
"source": [
"# vaccineDistributionByZipcode.to_excel('tmp/Amended-22-01962-Pfizer-2022-0426-pulled-2022-0823_sumDoses.xlsx')\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9b8f0b6e",
"metadata": {},
"outputs": [],
"source": [
"IOUtils.saveDataFrameAsJson(vaccineDistributionByZipcode, '../docs/data/vaccineDistributionByZipcode/VaccineDistributionByZipcode.json')\n",
"saveLastUpdated2HtmlFile(dateProvider.getLastUpdatedDataSource(), \"../docs/VaccineDistributionByZipcode.html\")"
]
},
{
"cell_type": "markdown",
"id": "3e47c62c",
"metadata": {},
"source": [
"# Symptoms caused by Vaccines\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5cd9935f",
"metadata": {},
"outputs": [],
"source": [
"from SymptomsCausedByVaccines.HtmlUpdater import updateHtmlFile\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6505ec4c",
"metadata": {},
"outputs": [],
"source": [
"prrByVaccineAndSymptom = pd.read_csv(\n",
" 'data/prr-ratios-all-vaccines.csv',\n",
" index_col = 'VAX_TYPE',\n",
" usecols = lambda columnName: columnName != 'Unnamed: 0')\n",
"prrByVaccineAndSymptom"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "803dfbef",
"metadata": {},
"outputs": [],
"source": [
"updateHtmlFile(\n",
" symptoms = list(prrByVaccineAndSymptom.columns),\n",
" htmlFile = \"../docs/SymptomsCausedByVaccines/index.html\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "howbadismybatch-venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.15"
}
},
"nbformat": 4,
"nbformat_minor": 5
}