diff --git a/src/HowBadIsMyBatch.ipynb b/src/HowBadIsMyBatch.ipynb index fc18dc59f8f..58317f32ca7 100644 --- a/src/HowBadIsMyBatch.ipynb +++ b/src/HowBadIsMyBatch.ipynb @@ -22,7 +22,7 @@ "from CountryColumnsMerger import CountryColumnsMerger\n", "from datetime import datetime\n", "from VAERSFileDownloader import updateVAERSFiles\n", - "from GoogleAnalytics.RegionCountsByBatchcodeTablesMerger import RegionCountsByBatchcodeTablesMerger\n", + "from GoogleAnalytics.CityCountsByBatchcodeTablesMerger import CityCountsByBatchcodeTablesMerger\n", "from IOUtils import IOUtils\n", "import os\n", "import pandas as pd\n", @@ -211,8 +211,8 @@ "outputs": [], "source": [ "# df = countryCountsByBatchcode.loc[(slice(None), 'Germany'), :][['COUNTRY_COUNT_BY_VAX_LOT Clicked']].sort_values(by = 'COUNTRY_COUNT_BY_VAX_LOT Clicked', ascending = False)\n", - "df = countryCountsByBatchcode.loc[(slice(None), 'Germany'), :].sort_values(by = 'COUNTRY_COUNT_BY_VAX_LOT Clicked', ascending = False)\n", - "df" + "dataframe = countryCountsByBatchcode.loc[(slice(None), 'Germany'), :].sort_values(by = 'COUNTRY_COUNT_BY_VAX_LOT Clicked', ascending = False)\n", + "dataframe" ] }, { @@ -257,66 +257,6 @@ "# Google Analytics" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "f2d4e5f9", - "metadata": {}, - "outputs": [], - "source": [ - "regionCountsByClickedBatchcodeTable = RegionCountsByBatchcodeTablesMerger.getRegionCountsByClickedBatchcode('data/GoogleAnalytics')\n", - "regionCountsByClickedBatchcodeTable" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "778564bb", - "metadata": {}, - "outputs": [], - "source": [ - "regionCountsByClickedBatchcodeTable4Germany = (regionCountsByClickedBatchcodeTable\n", - " .loc[(slice(None), 'Germany', slice(None)), :]\n", - " .sort_values(by = ['REGION', 'REGION_COUNT_BY_VAX_LOT'], ascending = False))\n", - "regionCountsByClickedBatchcodeTable4Germany" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7c0efbfb", - "metadata": {}, - "outputs": [], - "source": [ - "regionCountsByClickedBatchcodeTable4Germany.to_excel('tmp/regionCountsByClickedBatchcodeTable4Germany.xlsx')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3c05fcfc", - "metadata": {}, - "outputs": [], - "source": [ - "# VAX_LOT: EX8679\n", - "(regionCountsByClickedBatchcodeTable4Germany\n", - " .groupby('VAX_LOT')\n", - " .sum()\n", - " .sort_values(by = 'REGION_COUNT_BY_VAX_LOT', ascending = False))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "370bf329", - "metadata": {}, - "outputs": [], - "source": [ - "(regionCountsByClickedBatchcodeTable4Germany\n", - " .loc[('EX8679', slice(None), slice(None)), :]\n", - " .sort_values(by = 'REGION_COUNT_BY_VAX_LOT', ascending = False))" - ] - }, { "cell_type": "code", "execution_count": null, @@ -324,12 +264,34 @@ "metadata": {}, "outputs": [], "source": [ - "from GoogleAnalytics.CityCountsByBatchcodeTablesMerger import CityCountsByBatchcodeTablesMerger\n", - "\n", "cityCountsByClickedBatchcodeTable = CityCountsByBatchcodeTablesMerger.getCityCountsByClickedBatchcode('data/GoogleAnalytics')\n", "cityCountsByClickedBatchcodeTable" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "6883ba3a", + "metadata": {}, + "outputs": [], + "source": [ + "def dataframeWithoutIndexValue(dataframe, indexValue):\n", + " return dataframe[dataframe.index != indexValue]\n", + "\n", + "def getMaxBatch(cityCountsByClickedBatchcodeTable):\n", + " df_sorted = dataframeWithoutIndexValue(\n", + " dataframe = (cityCountsByClickedBatchcodeTable\n", + " .groupby('VAX_LOT')\n", + " .sum()\n", + " .sort_values(by = 'CITY_COUNT_BY_VAX_LOT', ascending = False)),\n", + " indexValue = '(not set)')\n", + " print(df_sorted)\n", + " return df_sorted.iloc[0].name\n", + "\n", + "maxBatch = getMaxBatch(cityCountsByClickedBatchcodeTable)\n", + "maxBatch" + ] + }, { "cell_type": "code", "execution_count": null, @@ -337,8 +299,8 @@ "metadata": {}, "outputs": [], "source": [ - "cityCountsByClickedBatchcodeTable_EX8679_Germany = cityCountsByClickedBatchcodeTable.loc[('EX8679', 'Germany', slice(None), slice(None)), :]\n", - "cityCountsByClickedBatchcodeTable_EX8679_Germany" + "cityCountsByClickedBatchcodeTable_maxBatch = cityCountsByClickedBatchcodeTable.loc[(maxBatch, slice(None), slice(None), slice(None)), :]\n", + "cityCountsByClickedBatchcodeTable_maxBatch" ] }, { @@ -348,19 +310,10 @@ "metadata": {}, "outputs": [], "source": [ - "cityCountsByClickedBatchcodeTable_EX8679_Germany.to_excel('tmp/cityCountsByClickedBatchcodeTable_EX8679_Germany.xlsx')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "494943f3", - "metadata": {}, - "outputs": [], - "source": [ - "(cityCountsByClickedBatchcodeTable_EX8679_Germany\n", - " .sort_values(by = ['CITY_COUNT_BY_VAX_LOT'], ascending = False)\n", - " .to_excel('tmp/cityCountsByClickedBatchcodeTable_EX8679_Germany_sorted.xlsx'))" + "(cityCountsByClickedBatchcodeTable_maxBatch\n", + " .reset_index()\n", + " .to_excel('tmp/cityCountsByClickedBatchcodeTable_maxBatch.xlsx'))\n", + "# import cityCountsByClickedBatchcodeTable_maxBatch.xlsx into https://www.google.com/mymaps" ] }, {