refactoring

2023-10-05 10:54:56 +02:00
parent 48727ddaa2
commit 4ae1c7f2c8
2 changed files with 38 additions and 39 deletions
--- a/src/GoogleAnalytics/Helper.py
+++ b/src/GoogleAnalytics/Helper.py
@@ -0,0 +1,32 @@
+import shutil
+
+def persistCityCountsByClickedBatchcodeTables(dataDir, n, cityCountsByClickedBatchcodeTable):
+    _prepare(dataDir)
+    for row in _getMostCommonBatchesTable(n, cityCountsByClickedBatchcodeTable).itertuples():
+        _persistCityCountsByClickedBatchcodeTable(
+            dataDir = dataDir,
+            batch = row.Index,
+            count = row.CITY_COUNT_BY_VAX_LOT,
+            cityCountsByClickedBatchcodeTable = cityCountsByClickedBatchcodeTable.loc[(row.Index, slice(None), slice(None), slice(None)), :])
+
+def _prepare(dataDir):
+    shutil.rmtree(dataDir, ignore_errors = True)
+    dataDir.mkdir(parents = True, exist_ok = True)
+
+def _getMostCommonBatchesTable(n, cityCountsByClickedBatchcodeTable):
+    df_sorted = _dataframeWithoutIndexValue(
+        dataframe = (cityCountsByClickedBatchcodeTable
+                        .groupby('VAX_LOT')
+                        .sum()
+                        .sort_values(by = 'CITY_COUNT_BY_VAX_LOT', ascending = False)),
+        indexValue = '(not set)')
+    return df_sorted.iloc[:n]
+
+def _dataframeWithoutIndexValue(dataframe, indexValue):
+    return dataframe[dataframe.index != indexValue]
+    
+def _persistCityCountsByClickedBatchcodeTable(dataDir, batch, count, cityCountsByClickedBatchcodeTable):
+    (cityCountsByClickedBatchcodeTable
+    .reset_index()
+    .to_excel(f'{str(dataDir)}/{count}_{batch}.xlsx'))
+    # import generated xlsx files into https://www.google.com/mymaps
--- a/src/HowBadIsMyBatch.ipynb
+++ b/src/HowBadIsMyBatch.ipynb
@@ -24,6 +24,8 @@
    "from VAERSFileDownloader import updateVAERSFiles\n",
    "from GoogleAnalytics.CityCountsByBatchcodeTablesMerger import CityCountsByBatchcodeTablesMerger\n",
    "from IOUtils import IOUtils\n",
+    "from pathlib import Path\n",
+    "from GoogleAnalytics.Helper import persistCityCountsByClickedBatchcodeTables\n",
    "import os\n",
    "import pandas as pd\n",
    "\n",
@@ -268,30 +270,6 @@
    "cityCountsByClickedBatchcodeTable"
   ]
  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "6883ba3a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def dataframeWithoutIndexValue(dataframe, indexValue):\n",
-    "    return dataframe[dataframe.index != indexValue]\n",
-    "\n",
-    "def getMaxBatch(cityCountsByClickedBatchcodeTable):\n",
-    "    df_sorted = dataframeWithoutIndexValue(\n",
-    "        dataframe = (cityCountsByClickedBatchcodeTable\n",
-    "                        .groupby('VAX_LOT')\n",
-    "                        .sum()\n",
-    "                        .sort_values(by = 'CITY_COUNT_BY_VAX_LOT', ascending = False)),\n",
-    "        indexValue = '(not set)')\n",
-    "    print(df_sorted)\n",
-    "    return df_sorted.iloc[0].name\n",
-    "\n",
-    "maxBatch = getMaxBatch(cityCountsByClickedBatchcodeTable)\n",
-    "maxBatch"
-   ]
-  },
  {
   "cell_type": "code",
   "execution_count": null,
@@ -299,21 +277,10 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "cityCountsByClickedBatchcodeTable_maxBatch = cityCountsByClickedBatchcodeTable.loc[(maxBatch, slice(None), slice(None), slice(None)), :]\n",
-    "cityCountsByClickedBatchcodeTable_maxBatch"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "8d7a8bbf",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "(cityCountsByClickedBatchcodeTable_maxBatch\n",
-    " .reset_index()\n",
-    " .to_excel('tmp/cityCountsByClickedBatchcodeTable_maxBatch.xlsx'))\n",
-    "# import cityCountsByClickedBatchcodeTable_maxBatch.xlsx into https://www.google.com/mymaps"
+    "persistCityCountsByClickedBatchcodeTables(\n",
+    "    dataDir = Path('tmp/cityCountsByClickedBatchcodeTables'),\n",
+    "    n = 10,\n",
+    "    cityCountsByClickedBatchcodeTable = cityCountsByClickedBatchcodeTable)"
   ]
  },
  {