refactoring

This commit is contained in:
frankknoll
2023-10-05 10:54:56 +02:00
parent 48727ddaa2
commit 4ae1c7f2c8
2 changed files with 38 additions and 39 deletions

View File

@@ -0,0 +1,32 @@
import shutil
def persistCityCountsByClickedBatchcodeTables(dataDir, n, cityCountsByClickedBatchcodeTable):
_prepare(dataDir)
for row in _getMostCommonBatchesTable(n, cityCountsByClickedBatchcodeTable).itertuples():
_persistCityCountsByClickedBatchcodeTable(
dataDir = dataDir,
batch = row.Index,
count = row.CITY_COUNT_BY_VAX_LOT,
cityCountsByClickedBatchcodeTable = cityCountsByClickedBatchcodeTable.loc[(row.Index, slice(None), slice(None), slice(None)), :])
def _prepare(dataDir):
shutil.rmtree(dataDir, ignore_errors = True)
dataDir.mkdir(parents = True, exist_ok = True)
def _getMostCommonBatchesTable(n, cityCountsByClickedBatchcodeTable):
df_sorted = _dataframeWithoutIndexValue(
dataframe = (cityCountsByClickedBatchcodeTable
.groupby('VAX_LOT')
.sum()
.sort_values(by = 'CITY_COUNT_BY_VAX_LOT', ascending = False)),
indexValue = '(not set)')
return df_sorted.iloc[:n]
def _dataframeWithoutIndexValue(dataframe, indexValue):
return dataframe[dataframe.index != indexValue]
def _persistCityCountsByClickedBatchcodeTable(dataDir, batch, count, cityCountsByClickedBatchcodeTable):
(cityCountsByClickedBatchcodeTable
.reset_index()
.to_excel(f'{str(dataDir)}/{count}_{batch}.xlsx'))
# import generated xlsx files into https://www.google.com/mymaps

View File

@@ -24,6 +24,8 @@
"from VAERSFileDownloader import updateVAERSFiles\n", "from VAERSFileDownloader import updateVAERSFiles\n",
"from GoogleAnalytics.CityCountsByBatchcodeTablesMerger import CityCountsByBatchcodeTablesMerger\n", "from GoogleAnalytics.CityCountsByBatchcodeTablesMerger import CityCountsByBatchcodeTablesMerger\n",
"from IOUtils import IOUtils\n", "from IOUtils import IOUtils\n",
"from pathlib import Path\n",
"from GoogleAnalytics.Helper import persistCityCountsByClickedBatchcodeTables\n",
"import os\n", "import os\n",
"import pandas as pd\n", "import pandas as pd\n",
"\n", "\n",
@@ -268,30 +270,6 @@
"cityCountsByClickedBatchcodeTable" "cityCountsByClickedBatchcodeTable"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"id": "6883ba3a",
"metadata": {},
"outputs": [],
"source": [
"def dataframeWithoutIndexValue(dataframe, indexValue):\n",
" return dataframe[dataframe.index != indexValue]\n",
"\n",
"def getMaxBatch(cityCountsByClickedBatchcodeTable):\n",
" df_sorted = dataframeWithoutIndexValue(\n",
" dataframe = (cityCountsByClickedBatchcodeTable\n",
" .groupby('VAX_LOT')\n",
" .sum()\n",
" .sort_values(by = 'CITY_COUNT_BY_VAX_LOT', ascending = False)),\n",
" indexValue = '(not set)')\n",
" print(df_sorted)\n",
" return df_sorted.iloc[0].name\n",
"\n",
"maxBatch = getMaxBatch(cityCountsByClickedBatchcodeTable)\n",
"maxBatch"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
@@ -299,21 +277,10 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"cityCountsByClickedBatchcodeTable_maxBatch = cityCountsByClickedBatchcodeTable.loc[(maxBatch, slice(None), slice(None), slice(None)), :]\n", "persistCityCountsByClickedBatchcodeTables(\n",
"cityCountsByClickedBatchcodeTable_maxBatch" " dataDir = Path('tmp/cityCountsByClickedBatchcodeTables'),\n",
] " n = 10,\n",
}, " cityCountsByClickedBatchcodeTable = cityCountsByClickedBatchcodeTable)"
{
"cell_type": "code",
"execution_count": null,
"id": "8d7a8bbf",
"metadata": {},
"outputs": [],
"source": [
"(cityCountsByClickedBatchcodeTable_maxBatch\n",
" .reset_index()\n",
" .to_excel('tmp/cityCountsByClickedBatchcodeTable_maxBatch.xlsx'))\n",
"# import cityCountsByClickedBatchcodeTable_maxBatch.xlsx into https://www.google.com/mymaps"
] ]
}, },
{ {