From 4ae1c7f2c8c2897da355af1622136d536bd61dbb Mon Sep 17 00:00:00 2001 From: frankknoll Date: Thu, 5 Oct 2023 10:54:56 +0200 Subject: [PATCH] refactoring --- src/GoogleAnalytics/Helper.py | 32 +++++++++++++++++++++++++ src/HowBadIsMyBatch.ipynb | 45 +++++------------------------------ 2 files changed, 38 insertions(+), 39 deletions(-) create mode 100644 src/GoogleAnalytics/Helper.py diff --git a/src/GoogleAnalytics/Helper.py b/src/GoogleAnalytics/Helper.py new file mode 100644 index 00000000000..23be3c633cc --- /dev/null +++ b/src/GoogleAnalytics/Helper.py @@ -0,0 +1,32 @@ +import shutil + +def persistCityCountsByClickedBatchcodeTables(dataDir, n, cityCountsByClickedBatchcodeTable): + _prepare(dataDir) + for row in _getMostCommonBatchesTable(n, cityCountsByClickedBatchcodeTable).itertuples(): + _persistCityCountsByClickedBatchcodeTable( + dataDir = dataDir, + batch = row.Index, + count = row.CITY_COUNT_BY_VAX_LOT, + cityCountsByClickedBatchcodeTable = cityCountsByClickedBatchcodeTable.loc[(row.Index, slice(None), slice(None), slice(None)), :]) + +def _prepare(dataDir): + shutil.rmtree(dataDir, ignore_errors = True) + dataDir.mkdir(parents = True, exist_ok = True) + +def _getMostCommonBatchesTable(n, cityCountsByClickedBatchcodeTable): + df_sorted = _dataframeWithoutIndexValue( + dataframe = (cityCountsByClickedBatchcodeTable + .groupby('VAX_LOT') + .sum() + .sort_values(by = 'CITY_COUNT_BY_VAX_LOT', ascending = False)), + indexValue = '(not set)') + return df_sorted.iloc[:n] + +def _dataframeWithoutIndexValue(dataframe, indexValue): + return dataframe[dataframe.index != indexValue] + +def _persistCityCountsByClickedBatchcodeTable(dataDir, batch, count, cityCountsByClickedBatchcodeTable): + (cityCountsByClickedBatchcodeTable + .reset_index() + .to_excel(f'{str(dataDir)}/{count}_{batch}.xlsx')) + # import generated xlsx files into https://www.google.com/mymaps diff --git a/src/HowBadIsMyBatch.ipynb b/src/HowBadIsMyBatch.ipynb index 58317f32ca7..3aff8ec2eb5 100644 --- a/src/HowBadIsMyBatch.ipynb +++ b/src/HowBadIsMyBatch.ipynb @@ -24,6 +24,8 @@ "from VAERSFileDownloader import updateVAERSFiles\n", "from GoogleAnalytics.CityCountsByBatchcodeTablesMerger import CityCountsByBatchcodeTablesMerger\n", "from IOUtils import IOUtils\n", + "from pathlib import Path\n", + "from GoogleAnalytics.Helper import persistCityCountsByClickedBatchcodeTables\n", "import os\n", "import pandas as pd\n", "\n", @@ -268,30 +270,6 @@ "cityCountsByClickedBatchcodeTable" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "6883ba3a", - "metadata": {}, - "outputs": [], - "source": [ - "def dataframeWithoutIndexValue(dataframe, indexValue):\n", - " return dataframe[dataframe.index != indexValue]\n", - "\n", - "def getMaxBatch(cityCountsByClickedBatchcodeTable):\n", - " df_sorted = dataframeWithoutIndexValue(\n", - " dataframe = (cityCountsByClickedBatchcodeTable\n", - " .groupby('VAX_LOT')\n", - " .sum()\n", - " .sort_values(by = 'CITY_COUNT_BY_VAX_LOT', ascending = False)),\n", - " indexValue = '(not set)')\n", - " print(df_sorted)\n", - " return df_sorted.iloc[0].name\n", - "\n", - "maxBatch = getMaxBatch(cityCountsByClickedBatchcodeTable)\n", - "maxBatch" - ] - }, { "cell_type": "code", "execution_count": null, @@ -299,21 +277,10 @@ "metadata": {}, "outputs": [], "source": [ - "cityCountsByClickedBatchcodeTable_maxBatch = cityCountsByClickedBatchcodeTable.loc[(maxBatch, slice(None), slice(None), slice(None)), :]\n", - "cityCountsByClickedBatchcodeTable_maxBatch" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8d7a8bbf", - "metadata": {}, - "outputs": [], - "source": [ - "(cityCountsByClickedBatchcodeTable_maxBatch\n", - " .reset_index()\n", - " .to_excel('tmp/cityCountsByClickedBatchcodeTable_maxBatch.xlsx'))\n", - "# import cityCountsByClickedBatchcodeTable_maxBatch.xlsx into https://www.google.com/mymaps" + "persistCityCountsByClickedBatchcodeTables(\n", + " dataDir = Path('tmp/cityCountsByClickedBatchcodeTables'),\n", + " n = 10,\n", + " cityCountsByClickedBatchcodeTable = cityCountsByClickedBatchcodeTable)" ] }, {