diff --git a/.gitignore b/.gitignore index 836dc6ca842..fb12341fb23 100644 --- a/.gitignore +++ b/.gitignore @@ -14,4 +14,5 @@ src/HowBadIsMyBatch.nbconvert.html src/__pycache__/ src/intensivstationen/__pycache__/ google-chrome-stable_current_amd64* -src/captcha/__pycache__ \ No newline at end of file +src/captcha/__pycache__ +src/GoogleAnalytics/__pycache__ diff --git a/src/GoogleAnalytics/ResolutionProvider.py b/src/GoogleAnalytics/ResolutionProvider.py new file mode 100644 index 00000000000..ea9b87dac24 --- /dev/null +++ b/src/GoogleAnalytics/ResolutionProvider.py @@ -0,0 +1,15 @@ +from enum import Enum +import linecache + + +class Resolution(Enum): + CITY = 1 + COUNTRY = 2 + + +class ResolutionProvider: + + @staticmethod + def getResolution(file): + columns = linecache.getline(file, 7) + return Resolution.CITY if 'City' in columns else Resolution.COUNTRY diff --git a/src/GoogleAnalytics/ResolutionProviderTest.py b/src/GoogleAnalytics/ResolutionProviderTest.py new file mode 100644 index 00000000000..ddf7d49b007 --- /dev/null +++ b/src/GoogleAnalytics/ResolutionProviderTest.py @@ -0,0 +1,23 @@ +import unittest +from GoogleAnalytics.ResolutionProvider import Resolution, ResolutionProvider + + +class ResolutionProviderTest(unittest.TestCase): + + def test_getResolution_COUNTRY(self): + # Given + + # When + resolution = ResolutionProvider.getResolution('src/testdata/GoogleAnalytics/CountryByBatchcode 20230302-20230430.csv') + + # Then + self.assertEqual(resolution, Resolution.COUNTRY) + + def test_getResolution_CITY(self): + # Given + + # When + resolution = ResolutionProvider.getResolution('src/testdata/GoogleAnalytics/CountryByBatchcode 20230730-20230929.csv') + + # Then + self.assertEqual(resolution, Resolution.CITY) diff --git a/src/GoogleAnalytics/__init__.py b/src/GoogleAnalytics/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/GoogleAnalyticsReaderTest.py b/src/GoogleAnalyticsReaderTest.py index afd28f534be..13ac554bb1d 100644 --- a/src/GoogleAnalyticsReaderTest.py +++ b/src/GoogleAnalyticsReaderTest.py @@ -12,4 +12,4 @@ class GoogleAnalyticsReaderTest(unittest.TestCase): dateRange = googleAnalyticsReader.getDateRange() # Then - self.assertEqual(dateRange, (date(2023, 3, 2), date(2023, 5, 31))) + self.assertEqual(dateRange, (date(2023, 3, 2), date(2023, 9, 29))) diff --git a/src/HowBadIsMyBatch.ipynb b/src/HowBadIsMyBatch.ipynb index e07504bd44f..e8ea731aa03 100644 --- a/src/HowBadIsMyBatch.ipynb +++ b/src/HowBadIsMyBatch.ipynb @@ -2,10 +2,19 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "9de5907f-18f5-4cb1-903e-26028ff1fa03", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-10-03 00:48:04.102691: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE4.1 SSE4.2 AVX AVX2 FMA\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" + ] + } + ], "source": [ "from CountryCountsByBatchcodeTable2BarChartDescriptionTableConverter import CountryCountsByBatchcodeTable2BarChartDescriptionTableConverter\n", "from JensenShannonDistance2BarChartDescriptionColumnAdder import JensenShannonDistance2BarChartDescriptionColumnAdder\n", @@ -32,20 +41,38 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "d1e4fa9e", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "03.10.2023, 00:48:07 Uhr\n" + ] + } + ], "source": [ "print(datetime.now().strftime(\"%d.%m.%Y, %H:%M:%S Uhr\"))" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "ffad1c04", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " lastUpdated: 2023-09-29 00:00:00\n", + "lastUpdatedDataSource: 2023-09-29 00:00:00\n", + "needsUpdate: False\n" + ] + } + ], "source": [ "dateProvider = DateProvider()\n", "print(' lastUpdated:', dateProvider.getLastUpdated())\n", @@ -56,7 +83,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "194b7357", "metadata": {}, "outputs": [], @@ -78,7 +105,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "58333a19", "metadata": {}, "outputs": [], @@ -88,30 +115,649 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "f10b558f", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
| \n", + " | VAX_TYPE | \n", + "VAX_MANU | \n", + "VAX_LOT | \n", + "VAX_DOSE_SERIES | \n", + "COUNTRY | \n", + "
|---|---|---|---|---|---|
| VAERS_ID | \n", + "\n", + " | \n", + " | \n", + " | \n", + " | \n", + " |
| 902418 | \n", + "COVID19 | \n", + "PFIZER\\BIONTECH | \n", + "EH9899 | \n", + "1 | \n", + "<NA> | \n", + "
| 902440 | \n", + "COVID19 | \n", + "PFIZER\\BIONTECH | \n", + "EH 9899 | \n", + "1 | \n", + "<NA> | \n", + "
| 902446 | \n", + "COVID19 | \n", + "PFIZER\\BIONTECH | \n", + "EH9899 | \n", + "1 | \n", + "<NA> | \n", + "
| 902464 | \n", + "COVID19 | \n", + "PFIZER\\BIONTECH | \n", + "EH9899 | \n", + "UNK | \n", + "<NA> | \n", + "
| 902465 | \n", + "COVID19 | \n", + "PFIZER\\BIONTECH | \n", + "EH9899 | \n", + "1 | \n", + "<NA> | \n", + "
| ... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
| 2684965 | \n", + "COVID19 | \n", + "PFIZER\\BIONTECH | \n", + "HD9835 | \n", + "1 | \n", + "<NA> | \n", + "
| 2684975 | \n", + "COVID19 | \n", + "MODERNA | \n", + "216C45 | \n", + "3 | \n", + "<NA> | \n", + "
| 2684979 | \n", + "COVID19 | \n", + "MODERNA | \n", + "037A22B | \n", + "1 | \n", + "<NA> | \n", + "
| 2684982 | \n", + "COVID19 | \n", + "MODERNA | \n", + "3004675 | \n", + "3 | \n", + "<NA> | \n", + "
| 2684990 | \n", + "COVID19 | \n", + "MODERNA | \n", + "NDC:80777-0102- | \n", + "1 | \n", + "<NA> | \n", + "
1076871 rows × 5 columns
\n", + "| \n", + " | SYMPTOM1 | \n", + "SYMPTOM2 | \n", + "SYMPTOM3 | \n", + "SYMPTOM4 | \n", + "SYMPTOM5 | \n", + "
|---|---|---|---|---|---|
| VAERS_ID | \n", + "\n", + " | \n", + " | \n", + " | \n", + " | \n", + " |
| 855017 | \n", + "Arthralgia | \n", + "Chills | \n", + "Injection site pain | \n", + "Pyrexia | \n", + "NaN | \n", + "
| 855018 | \n", + "Chills | \n", + "Fatigue | \n", + "Hypertension | \n", + "Hypoaesthesia | \n", + "Injected limb mobility decreased | \n", + "
| 855018 | \n", + "Muscular weakness | \n", + "Pain in extremity | \n", + "Pyrexia | \n", + "Tremor | \n", + "Vertigo | \n", + "
| 855019 | \n", + "Pain | \n", + "Pruritus | \n", + "Rash | \n", + "NaN | \n", + "NaN | \n", + "
| 855020 | \n", + "Chills | \n", + "Influenza like illness | \n", + "Myalgia | \n", + "Pain in extremity | \n", + "Pyrexia | \n", + "
| ... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
| 2684975 | \n", + "Haemodynamic test | \n", + "Hypoaesthesia | \n", + "Laboratory test | \n", + "Magnetic resonance imaging abdominal | \n", + "Magnetic resonance imaging thoracic | \n", + "
| 2684975 | \n", + "Nervous system disorder | \n", + "Rash macular | \n", + "Renal cyst | \n", + "Respiratory distress | \n", + "Respiratory failure | \n", + "
| 2684975 | \n", + "Vomiting | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "
| 2684982 | \n", + "Anaemia macrocytic | \n", + "Blood magnesium | \n", + "Hypercalcaemia | \n", + "Hypomagnesaemia | \n", + "Magnesium deficiency | \n", + "
| 2684982 | \n", + "Nerve injury | \n", + "Paraesthesia | \n", + "SARS-CoV-2 test | \n", + "NaN | \n", + "NaN | \n", + "
2584689 rows × 5 columns
\n", + "| \n", + " | RECVDATE | \n", + "DIED | \n", + "L_THREAT | \n", + "ER_VISIT | \n", + "HOSPITAL | \n", + "DISABLE | \n", + "SPLTTYPE | \n", + "VAX_TYPE | \n", + "VAX_MANU | \n", + "VAX_LOT | \n", + "VAX_DOSE_SERIES | \n", + "COUNTRY | \n", + "SEVERE | \n", + "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| VAERS_ID | \n", + "\n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " |
| 902418 | \n", + "2020-12-15 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "NaN | \n", + "COVID19 | \n", + "PFIZER\\BIONTECH | \n", + "EH9899 | \n", + "1 | \n", + "United States | \n", + "0 | \n", + "
| 902440 | \n", + "2020-12-15 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "NaN | \n", + "COVID19 | \n", + "PFIZER\\BIONTECH | \n", + "EH 9899 | \n", + "1 | \n", + "United States | \n", + "0 | \n", + "
| 902446 | \n", + "2020-12-15 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "NaN | \n", + "COVID19 | \n", + "PFIZER\\BIONTECH | \n", + "EH9899 | \n", + "1 | \n", + "United States | \n", + "0 | \n", + "
| 902464 | \n", + "2020-12-15 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "NaN | \n", + "COVID19 | \n", + "PFIZER\\BIONTECH | \n", + "EH9899 | \n", + "UNK | \n", + "United States | \n", + "0 | \n", + "
| 902465 | \n", + "2020-12-15 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "NaN | \n", + "COVID19 | \n", + "PFIZER\\BIONTECH | \n", + "EH9899 | \n", + "1 | \n", + "United States | \n", + "0 | \n", + "
| ... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
| 2684915 | \n", + "2023-09-21 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "NZPFIZER INCPV20230015804 | \n", + "COVID19 | \n", + "PFIZER\\BIONTECH | \n", + "<NA> | \n", + "1 | \n", + "New Zealand | \n", + "1 | \n", + "
| 2684916 | \n", + "2023-09-21 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "NZPFIZER INCPV20230015804 | \n", + "COVID19 | \n", + "PFIZER\\BIONTECH | \n", + "<NA> | \n", + "1 | \n", + "New Zealand | \n", + "0 | \n", + "
| 2684919 | \n", + "2023-09-21 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "NaN | \n", + "COVID19 | \n", + "PFIZER\\BIONTECH | \n", + "FA4632 | \n", + "1 | \n", + "<NA> | \n", + "1 | \n", + "
| 2684975 | \n", + "2023-09-22 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "NaN | \n", + "COVID19 | \n", + "MODERNA | \n", + "216C45 | \n", + "3 | \n", + "<NA> | \n", + "0 | \n", + "
| 2684982 | \n", + "2023-09-22 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "NaN | \n", + "COVID19 | \n", + "MODERNA | \n", + "3004675 | \n", + "3 | \n", + "<NA> | \n", + "0 | \n", + "
1596698 rows × 13 columns
\n", + "| \n", + " | RECVDATE | \n", + "DIED | \n", + "L_THREAT | \n", + "ER_VISIT | \n", + "HOSPITAL | \n", + "DISABLE | \n", + "SPLTTYPE | \n", + "VAX_TYPE | \n", + "VAX_MANU | \n", + "VAX_LOT | \n", + "VAX_DOSE_SERIES | \n", + "SEVERE | \n", + "COUNTRY | \n", + "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| VAERS_ID | \n", + "\n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " |
| 902418 | \n", + "2020-12-15 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "NaN | \n", + "COVID19 | \n", + "PFIZER\\BIONTECH | \n", + "EH9899 | \n", + "1 | \n", + "0 | \n", + "United States | \n", + "
| 902440 | \n", + "2020-12-15 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "NaN | \n", + "COVID19 | \n", + "PFIZER\\BIONTECH | \n", + "EH 9899 | \n", + "1 | \n", + "0 | \n", + "United States | \n", + "
| 902446 | \n", + "2020-12-15 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "NaN | \n", + "COVID19 | \n", + "PFIZER\\BIONTECH | \n", + "EH9899 | \n", + "1 | \n", + "0 | \n", + "United States | \n", + "
| 902464 | \n", + "2020-12-15 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "NaN | \n", + "COVID19 | \n", + "PFIZER\\BIONTECH | \n", + "EH9899 | \n", + "UNK | \n", + "0 | \n", + "United States | \n", + "
| 902465 | \n", + "2020-12-15 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "NaN | \n", + "COVID19 | \n", + "PFIZER\\BIONTECH | \n", + "EH9899 | \n", + "1 | \n", + "0 | \n", + "United States | \n", + "
| ... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
| 2684975 | \n", + "2023-09-22 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "NaN | \n", + "COVID19 | \n", + "MODERNA | \n", + "216C45 | \n", + "3 | \n", + "0 | \n", + "<NA> | \n", + "
| 2684979 | \n", + "2023-09-22 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "NaN | \n", + "COVID19 | \n", + "MODERNA | \n", + "037A22B | \n", + "1 | \n", + "1 | \n", + "United States | \n", + "
| 2684982 | \n", + "2023-09-22 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "NaN | \n", + "COVID19 | \n", + "MODERNA | \n", + "3004675 | \n", + "3 | \n", + "0 | \n", + "<NA> | \n", + "
| 2684989 | \n", + "2023-09-22 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "NaN | \n", + "COVID19 | \n", + "MODERNA | \n", + "<NA> | \n", + "UNK | \n", + "0 | \n", + "United States | \n", + "
| 2684990 | \n", + "2023-09-22 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "NaN | \n", + "COVID19 | \n", + "MODERNA | \n", + "NDC:80777-0102- | \n", + "1 | \n", + "0 | \n", + "United States | \n", + "
1596698 rows × 13 columns
\n", + "| \n", + " | Batch | \n", + "Adverse Reaction Reports | \n", + "Deaths | \n", + "Disabilities | \n", + "Life Threatening Illnesses | \n", + "Company | \n", + "Severe reports | \n", + "Lethality | \n", + "
|---|---|---|---|---|---|---|---|---|
| 0 | \n", + "C202105026 | \n", + "3 | \n", + "3 | \n", + "0 | \n", + "0 | \n", + "UNKNOWN MANUFACTURER | \n", + "NaN | \n", + "NaN | \n", + "
| 1 | \n", + "J202100034 | \n", + "3 | \n", + "3 | \n", + "0 | \n", + "0 | \n", + "UNKNOWN MANUFACTURER | \n", + "NaN | \n", + "NaN | \n", + "
| 2 | \n", + "JJ-21C14-02 | \n", + "4 | \n", + "0 | \n", + "4 | \n", + "0 | \n", + "JANSSEN | \n", + "NaN | \n", + "NaN | \n", + "
| 3 | \n", + "J202103011 | \n", + "3 | \n", + "3 | \n", + "0 | \n", + "0 | \n", + "UNKNOWN MANUFACTURER | \n", + "NaN | \n", + "NaN | \n", + "
| 4 | \n", + "J20210311 | \n", + "3 | \n", + "3 | \n", + "0 | \n", + "0 | \n", + "UNKNOWN MANUFACTURER | \n", + "NaN | \n", + "NaN | \n", + "
| ... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
| 9226 | \n", + "21C19-02 | \n", + "20 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "JANSSEN | \n", + "NaN | \n", + "NaN | \n", + "
| 9227 | \n", + "21C19-05 | \n", + "6 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "JANSSEN | \n", + "NaN | \n", + "NaN | \n", + "
| 9228 | \n", + "21C13-04 | \n", + "8 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "JANSSEN | \n", + "NaN | \n", + "NaN | \n", + "
| 9229 | \n", + "21C1102 | \n", + "3 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "JANSSEN | \n", + "NaN | \n", + "NaN | \n", + "
| 9230 | \n", + "21C14-05 | \n", + "10 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "JANSSEN | \n", + "NaN | \n", + "NaN | \n", + "
9231 rows × 8 columns
\n", + "| \n", + " | \n", + " | COUNTRY_COUNT_BY_VAX_LOT Clicked | \n", + "COUNTRY_COUNT_BY_VAX_LOT Before Deletion | \n", + "
|---|---|---|---|
| VAX_LOT | \n", + "COUNTRY | \n", + "\n", + " | \n", + " |
| J202100034 | \n", + "Austria | \n", + "1 | \n", + "0 | \n", + "
| Germany | \n", + "1 | \n", + "0 | \n", + "|
| United States | \n", + "1 | \n", + "0 | \n", + "|
| JJ-21C14-02 | \n", + "Belgium | \n", + "1 | \n", + "3 | \n", + "
| Poland | \n", + "1 | \n", + "0 | \n", + "|
| ... | \n", + "... | \n", + "... | \n", + "... | \n", + "
| 21C13-04 | \n", + "Switzerland | \n", + "1 | \n", + "0 | \n", + "
| United States | \n", + "1 | \n", + "0 | \n", + "|
| 21C1102 | \n", + "France | \n", + "0 | \n", + "2 | \n", + "
| Germany | \n", + "3 | \n", + "0 | \n", + "|
| 21C14-05 | \n", + "France | \n", + "0 | \n", + "10 | \n", + "
29310 rows × 2 columns
\n", + "| \n", + " | \n", + " | COUNTRY_COUNT_BY_VAX_LOT Clicked | \n", + "COUNTRY_COUNT_BY_VAX_LOT Before Deletion | \n", + "
|---|---|---|---|
| VAX_LOT | \n", + "COUNTRY | \n", + "\n", + " | \n", + " |
| EX8679 | \n", + "Germany | \n", + "1099 | \n", + "1132 | \n", + "
| FC3095 | \n", + "Germany | \n", + "871 | \n", + "1099 | \n", + "
| FD7958 | \n", + "Germany | \n", + "846 | \n", + "1049 | \n", + "
| FD9234 | \n", + "Germany | \n", + "759 | \n", + "878 | \n", + "
| ET3045 | \n", + "Germany | \n", + "704 | \n", + "716 | \n", + "
| ... | \n", + "... | \n", + "... | \n", + "... | \n", + "
| 026C21A | \n", + "Germany | \n", + "0 | \n", + "1 | \n", + "
| 000 106A | \n", + "Germany | \n", + "0 | \n", + "3 | \n", + "
| 0000437 | \n", + "Germany | \n", + "0 | \n", + "3 | \n", + "
| JO7BX03 | \n", + "Germany | \n", + "0 | \n", + "2 | \n", + "
| 6202 | \n", + "Germany | \n", + "0 | \n", + "1 | \n", + "
2520 rows × 2 columns
\n", + "