refactoring
This commit is contained in:
32
src/GoogleAnalytics/Helper.py
Normal file
32
src/GoogleAnalytics/Helper.py
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
import shutil
|
||||||
|
|
||||||
|
def persistCityCountsByClickedBatchcodeTables(dataDir, n, cityCountsByClickedBatchcodeTable):
|
||||||
|
_prepare(dataDir)
|
||||||
|
for row in _getMostCommonBatchesTable(n, cityCountsByClickedBatchcodeTable).itertuples():
|
||||||
|
_persistCityCountsByClickedBatchcodeTable(
|
||||||
|
dataDir = dataDir,
|
||||||
|
batch = row.Index,
|
||||||
|
count = row.CITY_COUNT_BY_VAX_LOT,
|
||||||
|
cityCountsByClickedBatchcodeTable = cityCountsByClickedBatchcodeTable.loc[(row.Index, slice(None), slice(None), slice(None)), :])
|
||||||
|
|
||||||
|
def _prepare(dataDir):
|
||||||
|
shutil.rmtree(dataDir, ignore_errors = True)
|
||||||
|
dataDir.mkdir(parents = True, exist_ok = True)
|
||||||
|
|
||||||
|
def _getMostCommonBatchesTable(n, cityCountsByClickedBatchcodeTable):
|
||||||
|
df_sorted = _dataframeWithoutIndexValue(
|
||||||
|
dataframe = (cityCountsByClickedBatchcodeTable
|
||||||
|
.groupby('VAX_LOT')
|
||||||
|
.sum()
|
||||||
|
.sort_values(by = 'CITY_COUNT_BY_VAX_LOT', ascending = False)),
|
||||||
|
indexValue = '(not set)')
|
||||||
|
return df_sorted.iloc[:n]
|
||||||
|
|
||||||
|
def _dataframeWithoutIndexValue(dataframe, indexValue):
|
||||||
|
return dataframe[dataframe.index != indexValue]
|
||||||
|
|
||||||
|
def _persistCityCountsByClickedBatchcodeTable(dataDir, batch, count, cityCountsByClickedBatchcodeTable):
|
||||||
|
(cityCountsByClickedBatchcodeTable
|
||||||
|
.reset_index()
|
||||||
|
.to_excel(f'{str(dataDir)}/{count}_{batch}.xlsx'))
|
||||||
|
# import generated xlsx files into https://www.google.com/mymaps
|
||||||
@@ -24,6 +24,8 @@
|
|||||||
"from VAERSFileDownloader import updateVAERSFiles\n",
|
"from VAERSFileDownloader import updateVAERSFiles\n",
|
||||||
"from GoogleAnalytics.CityCountsByBatchcodeTablesMerger import CityCountsByBatchcodeTablesMerger\n",
|
"from GoogleAnalytics.CityCountsByBatchcodeTablesMerger import CityCountsByBatchcodeTablesMerger\n",
|
||||||
"from IOUtils import IOUtils\n",
|
"from IOUtils import IOUtils\n",
|
||||||
|
"from pathlib import Path\n",
|
||||||
|
"from GoogleAnalytics.Helper import persistCityCountsByClickedBatchcodeTables\n",
|
||||||
"import os\n",
|
"import os\n",
|
||||||
"import pandas as pd\n",
|
"import pandas as pd\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -268,30 +270,6 @@
|
|||||||
"cityCountsByClickedBatchcodeTable"
|
"cityCountsByClickedBatchcodeTable"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "6883ba3a",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"def dataframeWithoutIndexValue(dataframe, indexValue):\n",
|
|
||||||
" return dataframe[dataframe.index != indexValue]\n",
|
|
||||||
"\n",
|
|
||||||
"def getMaxBatch(cityCountsByClickedBatchcodeTable):\n",
|
|
||||||
" df_sorted = dataframeWithoutIndexValue(\n",
|
|
||||||
" dataframe = (cityCountsByClickedBatchcodeTable\n",
|
|
||||||
" .groupby('VAX_LOT')\n",
|
|
||||||
" .sum()\n",
|
|
||||||
" .sort_values(by = 'CITY_COUNT_BY_VAX_LOT', ascending = False)),\n",
|
|
||||||
" indexValue = '(not set)')\n",
|
|
||||||
" print(df_sorted)\n",
|
|
||||||
" return df_sorted.iloc[0].name\n",
|
|
||||||
"\n",
|
|
||||||
"maxBatch = getMaxBatch(cityCountsByClickedBatchcodeTable)\n",
|
|
||||||
"maxBatch"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
@@ -299,21 +277,10 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"cityCountsByClickedBatchcodeTable_maxBatch = cityCountsByClickedBatchcodeTable.loc[(maxBatch, slice(None), slice(None), slice(None)), :]\n",
|
"persistCityCountsByClickedBatchcodeTables(\n",
|
||||||
"cityCountsByClickedBatchcodeTable_maxBatch"
|
" dataDir = Path('tmp/cityCountsByClickedBatchcodeTables'),\n",
|
||||||
]
|
" n = 10,\n",
|
||||||
},
|
" cityCountsByClickedBatchcodeTable = cityCountsByClickedBatchcodeTable)"
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "8d7a8bbf",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"(cityCountsByClickedBatchcodeTable_maxBatch\n",
|
|
||||||
" .reset_index()\n",
|
|
||||||
" .to_excel('tmp/cityCountsByClickedBatchcodeTable_maxBatch.xlsx'))\n",
|
|
||||||
"# import cityCountsByClickedBatchcodeTable_maxBatch.xlsx into https://www.google.com/mymaps"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
Reference in New Issue
Block a user