847 lines
23 KiB
Plaintext
847 lines
23 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "9de5907f-18f5-4cb1-903e-26028ff1fa03",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from CountryCountsByBatchcodeTable2BarChartDescriptionTableConverter import CountryCountsByBatchcodeTable2BarChartDescriptionTableConverter\n",
|
|
"from JensenShannonDistance2BarChartDescriptionColumnAdder import JensenShannonDistance2BarChartDescriptionColumnAdder\n",
|
|
"from CountriesByBatchcodeProvider import getCountryCountsByBatchcodeTable, filterByBatchcodes\n",
|
|
"from Column2DataframeAdder import addColumn2Dataframe\n",
|
|
"from BatchCodeTableFactory import BatchCodeTableFactory\n",
|
|
"from HistogramFactoryAndPersister import createAndSaveGlobalHistograms\n",
|
|
"from SymptomByBatchcodeTableFactory import SymptomByBatchcodeTableFactory\n",
|
|
"from BatchCodeTablePersister import createGlobalBatchCodeTable\n",
|
|
"from BatchCodeTableHtmlUpdater import updateBatchCodeTableHtmlFile, saveLastUpdated2HtmlFile\n",
|
|
"from InternationalVaersCovid19Provider import getInternationalVaersCovid19, getInternationalVaersCovid19BeforeDeletion, get_international_VAERSVAX_VAERSSYMPTOMS_Covid19\n",
|
|
"from DateProvider import DateProvider\n",
|
|
"from BarChartDescriptionTable2DictionaryConverter import BarChartDescriptionTable2DictionaryConverter\n",
|
|
"from CountryColumnsMerger import CountryColumnsMerger\n",
|
|
"from datetime import datetime\n",
|
|
"from VAERSFileDownloader import updateVAERSFiles\n",
|
|
"from GoogleAnalytics.CityCountsByBatchcodeTablesMerger import CityCountsByBatchcodeTablesMerger\n",
|
|
"from IOUtils import IOUtils\n",
|
|
"from pathlib import Path\n",
|
|
"from GoogleAnalytics.Helper import persistCityCountsByClickedBatchcodeTables\n",
|
|
"import os\n",
|
|
"import pandas as pd\n",
|
|
"\n",
|
|
"pd.set_option('display.max_rows', 100)\n",
|
|
"pd.set_option('display.max_columns', None)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "d1e4fa9e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"print(datetime.now().strftime(\"%d.%m.%Y, %H:%M:%S Uhr\"))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "ffad1c04",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"dateProvider = DateProvider()\n",
|
|
"print(' lastUpdated:', dateProvider.getLastUpdated())\n",
|
|
"print('lastUpdatedDataSource:', dateProvider.getLastUpdatedDataSource())\n",
|
|
"needsUpdate = dateProvider.needsUpdate()\n",
|
|
"print('needsUpdate:', needsUpdate)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "194b7357",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"years_from_start_of_COVID_vaccination_to_present = list(range(2020, datetime.now().year + 1))\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "a793dff0",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"updateVAERSFiles(\n",
|
|
" years = years_from_start_of_COVID_vaccination_to_present,\n",
|
|
" workingDirectory = os.getcwd())"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "58333a19",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"international_VAERSVAX_Covid19, international_VAERSSYMPTOMS = get_international_VAERSVAX_VAERSSYMPTOMS_Covid19(years = years_from_start_of_COVID_vaccination_to_present)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "f10b558f",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"international_VAERSVAX_Covid19\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "3e0908fe",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"international_VAERSSYMPTOMS\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "781ac80e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"internationalVaersCovid19 = getInternationalVaersCovid19(dataDir = 'VAERS', years = years_from_start_of_COVID_vaccination_to_present)\n",
|
|
"internationalVaersCovid19"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "29366235",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"internationalVaersCovid19 = CountryColumnsMerger.mergeCountryColumnOfSrcIntoDst(\n",
|
|
" src = getInternationalVaersCovid19BeforeDeletion(),\n",
|
|
" dst = internationalVaersCovid19)\n",
|
|
"internationalVaersCovid19"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "0915aa5a",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"batchCodeTable = createGlobalBatchCodeTable(\n",
|
|
" deleteEntriesWithADRsLessThanOrEqual = 2,\n",
|
|
" minADRsForLethality = 100,\n",
|
|
" batchCodeTableFactory = BatchCodeTableFactory(internationalVaersCovid19))\n",
|
|
"batchCodeTable"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "d8e81ffc",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"IOUtils.saveDataFrameAsJson(batchCodeTable, '../docs/data/batchCodeTables/Global.json')\n",
|
|
"saveLastUpdated2HtmlFile(\n",
|
|
" lastUpdated = dateProvider.getLastUpdatedDataSource(),\n",
|
|
" htmlFile = \"../docs/batchCodes.html\",\n",
|
|
" lastUpdatedElementId = 'last_updated')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "189a639e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"updateBatchCodeTableHtmlFile(\n",
|
|
" batchCodeTable,\n",
|
|
" batchCodeTableHtmlFile = \"../docs/HowBadIsMyBatch.html\",\n",
|
|
" lastUpdated = dateProvider.getLastUpdatedDataSource())"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "19fa050b",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"symptomByBatchcodeTable = SymptomByBatchcodeTableFactory.createSymptomByBatchcodeTable(\n",
|
|
" VAERSVAX = international_VAERSVAX_Covid19[international_VAERSVAX_Covid19['VAX_LOT'].isin(batchCodeTable['Batch'])],\n",
|
|
" VAERSSYMPTOMS = international_VAERSSYMPTOMS)\n",
|
|
"symptomByBatchcodeTable"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "ec2a692e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"createAndSaveGlobalHistograms(symptomByBatchcodeTable, batchCodeTable)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "70fcc420",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"countryCountsByBatchcode = filterByBatchcodes(getCountryCountsByBatchcodeTable(), batchCodeTable['Batch'].values)\n",
|
|
"countryCountsByBatchcode"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "7a67b1f6",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"barChartDescriptionTable = CountryCountsByBatchcodeTable2BarChartDescriptionTableConverter.convert2BarChartDescriptionTable(countryCountsByBatchcode)\n",
|
|
"barChartDescriptionTable"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "13bdd443",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"barChartDescriptionTable = JensenShannonDistance2BarChartDescriptionColumnAdder.addJensenShannonDistance2BarChartDescriptionColumn(barChartDescriptionTable)\n",
|
|
"barChartDescriptionTable"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "163830ba",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"IOUtils.saveDictAsJson(\n",
|
|
" BarChartDescriptionTable2DictionaryConverter.convert2Dictionary(barChartDescriptionTable, internationalVaersCovid19),\n",
|
|
" '../docs/data/barChartDescriptionTable.json')\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "c5a66a3b",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Google Analytics"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "9740c40b",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"cityCountsByClickedBatchcodeTable = CityCountsByBatchcodeTablesMerger.getCityCountsByClickedBatchcode('data/GoogleAnalytics')\n",
|
|
"cityCountsByClickedBatchcodeTable"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "0dac0ea6",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"persistCityCountsByClickedBatchcodeTables(\n",
|
|
" dataDir = Path('tmp/cityCountsByClickedBatchcodeTables'),\n",
|
|
" n = 10,\n",
|
|
" cityCountsByClickedBatchcodeTable = cityCountsByClickedBatchcodeTable)\n",
|
|
"# import generated xlsx files in dataDir into https://www.google.com/mymaps"
|
|
]
|
|
},
|
|
{
|
|
"attachments": {},
|
|
"cell_type": "markdown",
|
|
"id": "2d93b511",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Vaccine Distribution by Zipcode"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "cfcbad44",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"vaccineDistributionByZipcode = pd.read_excel(\n",
|
|
" io = 'data/Amended-22-01962-Pfizer-2022-0426-pulled-2022-0823.xlsx',\n",
|
|
" usecols = ['PROVIDER_NAME', 'ZIPCODE_SHP', 'LOT_NUMBER', 'DOSES_SHIPPED'])\n",
|
|
"vaccineDistributionByZipcode"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "59c745d2",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from VaccineDistributionByZipcodeSimplifier import VaccineDistributionByZipcodeSimplifier\n",
|
|
"\n",
|
|
"vaccineDistributionByZipcode = VaccineDistributionByZipcodeSimplifier.sumDoses(vaccineDistributionByZipcode)\n",
|
|
"vaccineDistributionByZipcode"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "8cd250f7",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"OVERALL_DOSES_SHIPPED_by_LOT_NUMBER = (\n",
|
|
" vaccineDistributionByZipcode\n",
|
|
" .groupby('LOT_NUMBER')\n",
|
|
" .agg(OVERALL_DOSES_SHIPPED = pd.NamedAgg(column = 'DOSES_SHIPPED', aggfunc = sum)))\n",
|
|
"OVERALL_DOSES_SHIPPED_by_LOT_NUMBER"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "1a5667be",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"vaccineDistributionByZipcode = addColumn2Dataframe(dataframe = vaccineDistributionByZipcode, column = OVERALL_DOSES_SHIPPED_by_LOT_NUMBER)\n",
|
|
"vaccineDistributionByZipcode"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "f77505c6",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from ADR_by_Batchcode_Table_Factory import create_ADR_by_Batchcode_Table_4USA\n",
|
|
"\n",
|
|
"ADR_by_Batchcode_Table_4USA = create_ADR_by_Batchcode_Table_4USA(internationalVaersCovid19)\n",
|
|
"ADR_by_Batchcode_Table_4USA"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "99120c77",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"vaccineDistributionByZipcode = addColumn2Dataframe(dataframe = vaccineDistributionByZipcode, column = ADR_by_Batchcode_Table_4USA)\n",
|
|
"vaccineDistributionByZipcode"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "3276cce7",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"vaccineDistributionByZipcode['Statistical Number of Adverse Reaction Reports'] = (vaccineDistributionByZipcode['DOSES_SHIPPED'] / vaccineDistributionByZipcode['OVERALL_DOSES_SHIPPED'] * vaccineDistributionByZipcode['Adverse Reaction Reports']).round(2)\n",
|
|
"vaccineDistributionByZipcode['Statistical Number of Adverse Reaction Reports (per 100,000)'] = (vaccineDistributionByZipcode['DOSES_SHIPPED'] / vaccineDistributionByZipcode['OVERALL_DOSES_SHIPPED'] * 100000).round().astype(int)\n",
|
|
"vaccineDistributionByZipcode"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "10cf731f",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"vaccineDistributionByZipcode = vaccineDistributionByZipcode[['PROVIDER_NAME', 'ZIPCODE_SHP', 'LOT_NUMBER', 'DOSES_SHIPPED', 'Statistical Number of Adverse Reaction Reports', 'Statistical Number of Adverse Reaction Reports (per 100,000)']]\n",
|
|
"vaccineDistributionByZipcode"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "0c2020e9",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"vaccineDistributionByZipcode = vaccineDistributionByZipcode.rename(\n",
|
|
" columns = {\n",
|
|
" 'PROVIDER_NAME': 'Provider',\n",
|
|
" 'ZIPCODE_SHP': 'ZIP Code',\n",
|
|
" 'LOT_NUMBER': 'Lot Number',\n",
|
|
" 'DOSES_SHIPPED': 'Doses Shipped'\n",
|
|
" })\n",
|
|
"vaccineDistributionByZipcode"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "f68c72d0",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# vaccineDistributionByZipcode.to_excel('tmp/Amended-22-01962-Pfizer-2022-0426-pulled-2022-0823_sumDoses.xlsx')\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "9b8f0b6e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"IOUtils.saveDataFrameAsJson(vaccineDistributionByZipcode, '../docs/data/vaccineDistributionByZipcode/VaccineDistributionByZipcode.json')\n",
|
|
"saveLastUpdated2HtmlFile(\n",
|
|
" lastUpdated = dateProvider.getLastUpdatedDataSource(),\n",
|
|
" htmlFile = \"../docs/VaccineDistributionByZipcode.html\",\n",
|
|
" lastUpdatedElementId = 'last_updated')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "3e47c62c",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Symptoms caused by Vaccines\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "5cd9935f",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from SymptomsCausedByVaccines.HtmlUpdater import updateHtmlFile, updateHtmlFile4SymptomsCausedByCOVIDLots\n",
|
|
"from SymptomsCausedByVaccines.PrrSeriesFactory import PrrSeriesFactory\n",
|
|
"from SymptomsCausedByVaccines.PrrSeriesTransformer import PrrSeriesTransformer\n",
|
|
"from SymptomsCausedByVaccines.ProportionalReportingRatiosPersister import saveProportionalReportingRatios\n",
|
|
"import os\n",
|
|
"import pandas as pd"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "eaf8fe21",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"prrByVaccineAndSymptom = pd.read_csv(\n",
|
|
" 'data/safety-signal-sym.csv',\n",
|
|
" index_col = 'VACCINE')\n",
|
|
"prrByVaccineAndSymptom"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "640868c7",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"prrByVaccineBySymptom = PrrSeriesFactory.getPrrByVaccineBySymptom(prrByVaccineAndSymptom)\n",
|
|
"prrByVaccineBySymptom"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "0f247c64",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"prrBySymptomByVaccine = PrrSeriesFactory.getPrrBySymptomByVaccine(prrByVaccineAndSymptom)\n",
|
|
"prrBySymptomByVaccine"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "760ac423",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"prrByVaccineBySymptomWithoutZeroPrrs = PrrSeriesTransformer.filterByNonZeroPrrs(prrByVaccineBySymptom)\n",
|
|
"prrByVaccineBySymptomWithoutZeroPrrs"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "f07203e4",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"prrBySymptomByVaccineWithHighPrrs = PrrSeriesTransformer.filterByHighPrrs(prrBySymptomByVaccine)\n",
|
|
"prrBySymptomByVaccineWithHighPrrs"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"webAppBaseDir = os.getcwd() + '/../docs/SymptomsCausedByVaccines'"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "0b40071c",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"saveProportionalReportingRatios(\n",
|
|
" prrByVaccineBySymptomWithoutZeroPrrs,\n",
|
|
" directory = os.path.normpath(webAppBaseDir + '/data/ProportionalReportingRatios/symptoms'))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "fac4b34f",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"saveProportionalReportingRatios(\n",
|
|
" prrBySymptomByVaccineWithHighPrrs,\n",
|
|
" directory = os.path.normpath(webAppBaseDir + '/data/ProportionalReportingRatios/vaccines'))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "803dfbef",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"updateHtmlFile(\n",
|
|
" symptoms = list(prrByVaccineAndSymptom.columns),\n",
|
|
" vaccines = list(prrByVaccineAndSymptom.index),\n",
|
|
" htmlFile = os.path.normpath(webAppBaseDir + '/index.html'))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Symptoms caused by COVID Lots"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"prrByLotAndSymptom = pd.read_excel(\n",
|
|
" io = 'data/symcolumns500.xlsx',\n",
|
|
" index_col = 'VAX_LOT')\n",
|
|
"prrByLotAndSymptom\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"prrByLotBySymptom = PrrSeriesFactory.getPrrByVaccineBySymptom(prrByLotAndSymptom)\n",
|
|
"prrByLotBySymptom"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"prrBySymptomByLot = PrrSeriesFactory.getPrrBySymptomByVaccine(prrByLotAndSymptom)\n",
|
|
"prrBySymptomByLot"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"prrByLotBySymptomWithoutZeroPrrs = PrrSeriesTransformer.filterByNonZeroPrrs(prrByLotBySymptom)\n",
|
|
"prrByLotBySymptomWithoutZeroPrrs"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"prrBySymptomByLotWithHighPrrs = PrrSeriesTransformer.filterByHighPrrs(prrBySymptomByLot)\n",
|
|
"prrBySymptomByLotWithHighPrrs"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"webAppBaseDir = os.getcwd() + '/../docs/SymptomsCausedByCOVIDLots'"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"saveProportionalReportingRatios(\n",
|
|
" prrByLotBySymptomWithoutZeroPrrs,\n",
|
|
" directory = os.path.normpath(webAppBaseDir + '/data/ProportionalReportingRatios/symptoms'))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"saveProportionalReportingRatios(\n",
|
|
" prrBySymptomByLotWithHighPrrs,\n",
|
|
" directory = os.path.normpath(webAppBaseDir + '/data/ProportionalReportingRatios/vaccines'))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"updateHtmlFile4SymptomsCausedByCOVIDLots(\n",
|
|
" symptoms = list(prrByLotAndSymptom.columns),\n",
|
|
" batches = list(prrByLotAndSymptom.index),\n",
|
|
" htmlFile = os.path.normpath(webAppBaseDir + '/index.html'))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Multi Line Fitting"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from SymptomsCausedByVaccines.MultiLineFitting.MultiLineFitter import MultiLineFitter\n",
|
|
"from SymptomsCausedByVaccines.MultiLineFitting.SymptomCombinationsProvider import SymptomCombinationsProvider\n",
|
|
"from matplotlib import pyplot as plt\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# symptomX = 'Abdominal discomfort' # HIV test' # 'Immunosuppression'\n",
|
|
"# symptomY = 'Abdominal distension' # 'Infection' # 'Immunoglobulin therapy'"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# df = prrByLotAndSymptom[[symptomX, symptomY]]\n",
|
|
"# df = df[(df[symptomX] != 0) & (df[symptomY] != 0)]\n",
|
|
"# df"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# retain only those columns of prrByLotAndSymptom that have more than 400 PRRs != 0\n",
|
|
"# prrByLotAndSymptom2 = prrByLotAndSymptom.loc[:, (prrByLotAndSymptom != 0).sum() >= 400]\n",
|
|
"# prrByLotAndSymptom2"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"symptomCombinations = SymptomCombinationsProvider.generateSymptomCombinations(\n",
|
|
" prrByLotAndSymptom,\n",
|
|
" dataFramePredicate = lambda df: 40 <= len(df) <= 50)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from SymptomsCausedByVaccines.MultiLineFitting.Utils import take\n",
|
|
"\n",
|
|
"df = take(symptomCombinations, 1)[0]\n",
|
|
"df"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"symptomX, symptomY = df.columns"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"points = [(x, y) for [x, y] in df.values]\n",
|
|
"points"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def draw(points, clusters, lines, symptomX, symptomY, minClusterSize):\n",
|
|
" _, ax = plt.subplots()\n",
|
|
" plt.scatter(_getXs(points), _getYs(points), color = \"blue\", marker = \".\", s = 100)\n",
|
|
" for cluster, line in zip(clusters, lines):\n",
|
|
" if len(cluster) >= minClusterSize:\n",
|
|
" _drawLine(line, cluster, ax)\n",
|
|
" plt.scatter(_getXs(cluster), _getYs(cluster), marker = \".\", s = 100)\n",
|
|
" plt.xlabel(symptomX)\n",
|
|
" plt.ylabel(symptomY)\n",
|
|
" plt.show()\n",
|
|
"\n",
|
|
"def _drawLine(line, cluster, ax):\n",
|
|
" coords = line.transform_points(cluster)\n",
|
|
" magnitude = line.direction.norm()\n",
|
|
" line.plot_2d(ax, t_1 = min(coords) / magnitude, t_2 = max(coords) / magnitude)\n",
|
|
"\n",
|
|
"def _getXs(xys):\n",
|
|
" return [x for (x, _) in xys]\n",
|
|
"\n",
|
|
"def _getYs(xys):\n",
|
|
" return [y for (_, y) in xys]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"clustersAscending, linesAscending = MultiLineFitter.fitPointsByAscendingLines(\n",
|
|
" points,\n",
|
|
" consensusThreshold = 0.01,\n",
|
|
" maxNumLines = None)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"draw(points, clustersAscending, linesAscending, symptomX, symptomY, minClusterSize = 5)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"clusters, lines = MultiLineFitter.fitPointsByLines(\n",
|
|
" points,\n",
|
|
" consensusThreshold = 0.01,\n",
|
|
" maxNumLines = None)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"draw(points, clusters, lines, symptomX, symptomY, minClusterSize = 5)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "howbadismybatch-venv-kernel",
|
|
"language": "python",
|
|
"name": "howbadismybatch-venv-kernel"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.9.15"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|