diff --git a/HowBadIsMyBatch.ipynb b/HowBadIsMyBatch.ipynb index 0096c02afb8..8423bf44a55 100644 --- a/HowBadIsMyBatch.ipynb +++ b/HowBadIsMyBatch.ipynb @@ -149,6 +149,9 @@ " def filterByCovid19(self, dataFrame):\n", " return dataFrame[self._isCovid19(dataFrame)]\n", "\n", + " def filterByCountry(self, dataFrame, country, countryColumnName):\n", + " return dataFrame[dataFrame[countryColumnName] == country]\n", + "\n", " def filterBy(self, dataFrame, manufacturer = None, dose = None):\n", " return dataFrame[self._isManufacturer(dataFrame, manufacturer) & self._isDose(dataFrame, dose)]\n", "\n", @@ -278,6 +281,7 @@ " dataFrame['VAX_DOSE_SERIES'].rename('Dose')\n", " ]))\n", "\n", + " # FK-TODO: rename _createDoseTable()\n", " @staticmethod\n", " def _createDoseTable(dataFrame):\n", " doseTable = SummationTableFactory.createSummationTable(\n", @@ -311,12 +315,25 @@ " return internationalLotTable.sort_values(by = 'Severe reports (%)', ascending = False)\n", "\n", " @staticmethod\n", + " def createBatchCodeTableByCountry(dataFrame : pd.DataFrame, country):\n", + " dataFrame = DataFrameFilter().filterByCovid19(dataFrame)\n", + " batchCodeTable = InternationalLotTableFactory._createBatchCodeTableByCountry(dataFrame, country)\n", + " return batchCodeTable.sort_values(by = 'Severe reports (%)', ascending = False)\n", + "\n", + " @staticmethod\n", " def _createInternationalLotTable(dataFrame):\n", " countryColumnName = 'Country'\n", - " InternationalLotTableFactory._addCountryColumn(dataFrame, countryColumnName = countryColumnName)\n", + " dataFrame = InternationalLotTableFactory._addCountryColumn(dataFrame, countryColumnName = countryColumnName)\n", " return DoseTableFactory._createDoseTable(dataFrame.groupby(dataFrame[countryColumnName]))\n", "\n", " @staticmethod\n", + " def _createBatchCodeTableByCountry(dataFrame : pd.DataFrame, country):\n", + " countryColumnName = 'Country'\n", + " dataFrame = InternationalLotTableFactory._addCountryColumn(dataFrame, countryColumnName = countryColumnName)\n", + " dataFrame = DataFrameFilter().filterByCountry(dataFrame, country = country, countryColumnName = countryColumnName)\n", + " return DoseTableFactory._createDoseTable(dataFrame.groupby('VAX_LOT'))\n", + "\n", + " @staticmethod\n", " def _addCountryColumn(dataFrame, countryColumnName):\n", " dataFrame[countryColumnName] = dataFrame.apply(\n", " lambda row:\n", @@ -324,6 +341,7 @@ " splttype = row['SPLTTYPE'],\n", " default = 'Unknown Country'),\n", " axis = 'columns')\n", + " return dataFrame.astype({countryColumnName: \"string\"})\n", "\n", " @staticmethod\n", " def _getCountryNameOfSplttypeOrDefault(splttype, default):\n", @@ -810,7 +828,40 @@ " 'United Kingdom',\n", " 'Unknown Country'\n", " ],\n", - " name = 'Country')))\n" + " dtype = \"string\",\n", + " name = 'Country')))\n", + "\n", + " def test_createBatchCodeTableByCountry(self):\n", + " # Given\n", + " dataFrame = TestHelper.createDataFrame(\n", + " columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES', 'SPLTTYPE', 'HOSPITAL', 'ER_VISIT'],\n", + " data = [ [1, 0, 0, 'COVID19', 'MODERNA', '016M20A', '2', 'GBPFIZER INC2020486806', 0, 0],\n", + " [1, 0, 0, 'COVID19', 'MODERNA', '030L20A', '1', 'FRMODERNATX, INC.MOD20224', 0, 0],\n", + " [1, 1, 1, 'COVID19', 'MODERNA', '030L20B', '1', 'FRMODERNATX, INC.MOD20224', 0, 0],\n", + " [0, 1, 1, 'COVID19', 'MODERNA', '030L20B', '1', 'FRMODERNATX, INC.MOD20224', 0, 0]],\n", + " index = [\n", + " \"1048786\",\n", + " \"1048786\",\n", + " \"4711\",\n", + " \"0815\"])\n", + " \n", + " # When\n", + " batchCodeTable = InternationalLotTableFactory.createBatchCodeTableByCountry(dataFrame, 'France')\n", + "\n", + " # Then\n", + " assert_frame_equal(\n", + " batchCodeTable,\n", + " TestHelper.createDataFrame(\n", + " columns = ['Total reports', 'Deaths', 'Disabilities', 'Life Threatening Illnesses', 'Severe reports (%)'],\n", + " data = [ [2, 1, 2, 2, (1 + 2 + 2) / 2 * 100],\n", + " [1, 1, 0, 0, (1 + 0 + 0) / 1 * 100]],\n", + " index = pd.Index(\n", + " [\n", + " '030L20B',\n", + " '030L20A'\n", + " ],\n", + " name = 'VAX_LOT')),\n", + " check_dtype = False)\n" ] }, { @@ -1028,6 +1079,56 @@ "internationalLotTable.to_excel('results/International_Deadly_Lots.xlsx')\n", "internationalLotTable" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0f691bf5", + "metadata": {}, + "outputs": [], + "source": [ + "def createBatchCodeTableByCountry(country):\n", + " vaersDescr = VaersDescrReader(dataDir = 'VAERS').readNonDomesticVaersDescr()\n", + " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescr(vaersDescr)\n", + " DataFrameNormalizer.normalize(dataFrame)\n", + " return InternationalLotTableFactory.createBatchCodeTableByCountry(dataFrame, country)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "59f7e62c", + "metadata": {}, + "outputs": [], + "source": [ + "batchCodeTable = createBatchCodeTableByCountry('France')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a35ede95", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "report_path = 'results/international/'\n", + "if not os.path.exists(report_path):\n", + " os.makedirs(report_path)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ff259a35", + "metadata": {}, + "outputs": [], + "source": [ + "batchCodeTable = batchCodeTable[batchCodeTable['Total reports'] > 50]\n", + "batchCodeTable.to_excel(report_path + 'france.xlsx')\n", + "batchCodeTable" + ] } ], "metadata": {