From 4cf718c16dfd3cd0b3d38b4684a92fe87e684da1 Mon Sep 17 00:00:00 2001 From: frankknoll Date: Mon, 7 Feb 2022 11:06:14 +0100 Subject: [PATCH] starting page "International Deadly Lots" --- HowBadIsMyBatch.ipynb | 147 +++++++++++++++++++++++++++++++++++++++++- help.txt | 4 +- 2 files changed, 146 insertions(+), 5 deletions(-) diff --git a/HowBadIsMyBatch.ipynb b/HowBadIsMyBatch.ipynb index fc07d8bd831..e5cae328bb5 100644 --- a/HowBadIsMyBatch.ipynb +++ b/HowBadIsMyBatch.ipynb @@ -41,10 +41,17 @@ " 'VAERSVAX': self._readVAERSVAX(folder + year + \"VAERSVAX.csv\")\n", " }\n", "\n", + " def readNonDomesticVaersDescr(self):\n", + " folder = self.dataDir + \"/NonDomesticVAERSData/\"\n", + " return {\n", + " 'VAERSDATA': self._readVAERSDATA(folder + \"NonDomesticVAERSDATA.csv\"),\n", + " 'VAERSVAX': self._readVAERSVAX(folder + \"NonDomesticVAERSVAX.csv\")\n", + " }\n", + "\n", " def _readVAERSDATA(self, file):\n", " return self._read_csv(\n", " file = file,\n", - " usecols = ['VAERS_ID', 'RECVDATE', 'DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'],\n", + " usecols = ['VAERS_ID', 'RECVDATE', 'DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT', 'SPLTTYPE'],\n", " parse_dates = ['RECVDATE'],\n", " date_parser = lambda dateStr: pd.to_datetime(dateStr, format = \"%m/%d/%Y\"))\n", "\n", @@ -117,7 +124,14 @@ "\n", " @staticmethod\n", " def _convertColumnOfDataFrameToNumeric(dataFrame, column):\n", - " dataFrame[column] = np.where(dataFrame[column] == 'Y', 1, 0)\n", + " dataFrame[column] = DataFrameNormalizer._where(\n", + " condition = dataFrame[column] == 'Y',\n", + " trueValue = 1,\n", + " falseValue = 0)\n", + "\n", + " @staticmethod\n", + " def _where(condition, trueValue, falseValue):\n", + " return np.where(condition, trueValue, falseValue) \n", " " ] }, @@ -190,6 +204,7 @@ " 'L_THREAT': 'sum',\n", " 'DISABLE': 'sum'\n", " },\n", + " # FK-TODO: rename \"ADRs\" and \"Total reports\" to \"Total Number of Adverse Reaction Reports\" in all places\n", " columnNameMappingsDict = {\n", " \"DIED_size\": \"ADRs\",\n", " \"DIED_sum\": \"DEATHS\",\n", @@ -258,7 +273,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "id": "41d4fa30", "metadata": {}, "outputs": [], @@ -283,6 +298,7 @@ " dataFrame['VAX_DOSE_SERIES'].rename('Dose')\n", " ]))\n", "\n", + " # FK-TODO: DRY because it generates a subset of BatchCodeTableHelper.createSevereEffectsBatchCodeTable()\n", " @staticmethod\n", " def _getDoseTable(dataFrame):\n", " doseTable = AggregationHelper.aggregateAndFlattenColumnsAndRenameColumns(\n", @@ -303,6 +319,38 @@ " return doseTable\n" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "09e6b511", + "metadata": {}, + "outputs": [], + "source": [ + "import pycountry\n", + "\n", + "class InternationalLotAnalysis:\n", + " \n", + " @staticmethod\n", + " def getInternationalLotTable(dataFrame):\n", + " dataFrame = DataFrameFilter().filterByCovid19(dataFrame)\n", + " dataFrame['Country'] = dataFrame.apply(InternationalLotAnalysis._fun2, axis = 'columns')\n", + " result = DoseAnalysis._getDoseTable(dataFrame.groupby(dataFrame['Country']))\n", + " return result.sort_values(by = 'Severe reports (%)', ascending = False)\n", + "\n", + " # FK-TODO: refactor\n", + " @staticmethod\n", + " def _fun2(row):\n", + " if isinstance(row['SPLTTYPE'], str):\n", + " country = pycountry.countries.get(alpha_2 = row['SPLTTYPE'][:2])\n", + " if country is None:\n", + " return 'NO-COUNTRY: ' + row['SPLTTYPE'][:2]\n", + " else:\n", + " return country.name\n", + " else:\n", + " # FK-TODO: add missing InternationalLotAnalysisTests for this else branch\n", + " return 'NO-COUNTRY: ' + str(row['SPLTTYPE'])\n" + ] + }, { "cell_type": "code", "execution_count": null, @@ -733,6 +781,52 @@ " return pd.DataFrame(index = index, columns = columns, data = data).astype(dtypes)\n" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "c784bfef", + "metadata": {}, + "outputs": [], + "source": [ + "from pandas.testing import assert_frame_equal\n", + "\n", + "class InternationalLotAnalysisTest(unittest.TestCase):\n", + "\n", + " def test_getInternationalLotTable(self):\n", + " # Given\n", + " dataFrame = self.createDataFrame(\n", + " columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES', 'SPLTTYPE'],\n", + " data = [ [1, 0, 0, 'COVID19', 'MODERNA', '016M20A', '2', 'GBPFIZER INC2020486806'],\n", + " [1, 0, 0, 'COVID19', 'MODERNA', '030L20A', '1', 'FRMODERNATX, INC.MOD20224'],\n", + " [1, 1, 1, 'COVID19', 'MODERNA', '030L20B', '1', 'FRMODERNATX, INC.MOD20224']],\n", + " index = [\n", + " \"1048786\",\n", + " \"1048786\",\n", + " \"4711\"])\n", + " \n", + " # When\n", + " internationalLotTable = InternationalLotAnalysis.getInternationalLotTable(dataFrame)\n", + "\n", + " # Then\n", + " assert_frame_equal(\n", + " internationalLotTable,\n", + " self.createDataFrame(\n", + " columns = ['Total reports', 'Deaths', 'Disabilities', 'Life Threatening Illnesses', 'Severe reports (%)'],\n", + " data = [ [2, 2, 1, 1, (2 + 1 + 1) / 2 * 100],\n", + " [1, 1, 0, 0, (1 + 0 + 0) / 1 * 100]],\n", + " index = pd.Index(\n", + " [\n", + " 'France',\n", + " 'United Kingdom'\n", + " \n", + " ],\n", + " name = 'Country')))\n", + "\n", + " # FK-TODO: createDataFrame() is defined in almost every test class: DRY \n", + " def createDataFrame(self, index, columns, data, dtypes = {}):\n", + " return pd.DataFrame(index = index, columns = columns, data = data).astype(dtypes)\n" + ] + }, { "cell_type": "code", "execution_count": null, @@ -901,6 +995,53 @@ "doseByMonthTable.to_excel('results/doseByMonthTable.xlsx')\n", "doseByMonthTable" ] + }, + { + "cell_type": "markdown", + "id": "075aa6c9", + "metadata": {}, + "source": [ + "### International Deadly Lots" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8f8880f4", + "metadata": {}, + "outputs": [], + "source": [ + "# https://www.howbadismybatch.com/international.html\n", + "\n", + "def getInternationalLotTable():\n", + " vaersDescr = VaersDescrReader(dataDir = 'VAERS').readNonDomesticVaersDescr()\n", + " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescr(vaersDescr)\n", + " DataFrameNormalizer.normalize(dataFrame)\n", + " return InternationalLotAnalysis.getInternationalLotTable(dataFrame)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "54e03231", + "metadata": {}, + "outputs": [], + "source": [ + "internationalLotTable = getInternationalLotTable()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7e80e958", + "metadata": {}, + "outputs": [], + "source": [ + "# FK-TODO: make filter on 'Total reports' a parameter in getInternationalLotTable() \n", + "internationalLotTable = internationalLotTable[internationalLotTable['Total reports'] > 50]\n", + "internationalLotTable.to_excel('results/International_Deadly_Lots.xlsx')\n", + "internationalLotTable" + ] } ], "metadata": { diff --git a/help.txt b/help.txt index e4258f023c7..95b610d5197 100644 --- a/help.txt +++ b/help.txt @@ -4,8 +4,8 @@ get VAERS data: - download data (e.g. 2022VAERSData.zip) from https://vaers.hhs.gov/data/datasets.html and save and unzip in VAERS folder FK-TODO: -- https://www.howbadismybatch.com/firstsecond.html nachprogrammieren -- VAX_LOT-Spalte normalisieren, d.h. mindestens toUpperCase() darauf anwenden +- https://www.howbadismybatch.com/international.html nachprogrammieren +- https://www.howbadismybatch.com/geography.html nachprogrammieren - handle VAX_DOSE_SERIES = 'UNK' or 'N/A' like '1'? - Format des jeweiligen Herstellers berücksichtigen und "verschmutzte" Einträge säubern, denn sie stellen alle dieselbe Charge dar: 039k20a