adding getDoseByMonthTable()

This commit is contained in:
frankknoll
2022-02-05 17:28:26 +01:00
parent b9bb504b87
commit a8c20c3656

View File

@@ -64,7 +64,9 @@
" def _readVAERSDATA(self, file):\n", " def _readVAERSDATA(self, file):\n",
" VAERSDATA = self._read_csv(\n", " VAERSDATA = self._read_csv(\n",
" file = file,\n", " file = file,\n",
" usecols = ['VAERS_ID', 'DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'])\n", " usecols = ['VAERS_ID', 'RECVDATE', 'DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'],\n",
" parse_dates = ['RECVDATE'],\n",
" date_parser = lambda dateStr: pd.to_datetime(dateStr, format = \"%m/%d/%Y\"))\n",
" DataFrameConverter.convertColumnsOfDataFrameToNumerics(\n", " DataFrameConverter.convertColumnsOfDataFrameToNumerics(\n",
" VAERSDATA,\n", " VAERSDATA,\n",
" ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'])\n", " ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'])\n",
@@ -274,6 +276,33 @@
" })\n", " })\n",
" doseTable = doseTable[['Total reports', 'Deaths', 'Disabilities', 'Life Threatening Illnesses']]\n", " doseTable = doseTable[['Total reports', 'Deaths', 'Disabilities', 'Life Threatening Illnesses']]\n",
" doseTable['Severe reports (%)'] = (doseTable['Deaths'] + doseTable['Disabilities'] + doseTable['Life Threatening Illnesses']) / doseTable['Total reports'] * 100\n", " doseTable['Severe reports (%)'] = (doseTable['Deaths'] + doseTable['Disabilities'] + doseTable['Life Threatening Illnesses']) / doseTable['Total reports'] * 100\n",
" return doseTable\n",
"\n",
" @staticmethod\n",
" def getDoseByMonthTable(dataFrame):\n",
" # https://stackoverflow.com/questions/61879166/pandas-groupby-month-and-year-date-as-datetime64ns-and-summarized-by-count\n",
" grouped = dataFrame.groupby(\n",
" [\n",
" dataFrame['RECVDATE'].dt.year.rename('year'),\n",
" dataFrame['RECVDATE'].dt.month.rename('month'),\n",
" dataFrame['VAX_DOSE_SERIES']\n",
" ])\n",
" # FK-TODO: DRY with getDoseTable()\n",
" doseTable = AggregationHelper.aggregateAndFlattenColumnsAndRenameColumns(\n",
" dataFrame = grouped,\n",
" aggFunctionsByColumn = {\n",
" 'DIED': ['sum', 'size'],\n",
" 'L_THREAT': 'sum',\n",
" 'DISABLE': 'sum'\n",
" },\n",
" columnNameMappingsDict = {\n",
" \"DIED_size\": \"Total reports\",\n",
" \"DIED_sum\": \"Deaths\",\n",
" \"L_THREAT_sum\": \"Life Threatening Illnesses\",\n",
" \"DISABLE_sum\": \"Disabilities\"\n",
" })\n",
" doseTable = doseTable[['Total reports', 'Deaths', 'Disabilities', 'Life Threatening Illnesses']]\n",
" doseTable['Severe reports (%)'] = (doseTable['Deaths'] + doseTable['Disabilities'] + doseTable['Life Threatening Illnesses']) / doseTable['Total reports'] * 100\n",
" return doseTable\n" " return doseTable\n"
] ]
}, },
@@ -620,6 +649,42 @@
" },\n", " },\n",
" index = pd.Index(['1', '2'], dtype = \"string\", name = 'VAX_DOSE_SERIES')))\n", " index = pd.Index(['1', '2'], dtype = \"string\", name = 'VAX_DOSE_SERIES')))\n",
" \n", " \n",
" def test_getDoseByMonthTable(self):\n",
" # Given\n",
" parseDate = lambda dateStr: pd.to_datetime(dateStr, format = \"%m/%d/%Y\")\n",
" dataFrame = self.createDataFrame(\n",
" columns = ['RECVDATE', 'DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
" data = [ [parseDate('01/01/2021'), 1, 0, 0, 'COVID19', 'MODERNA', '016M20A', '2'],\n",
" [parseDate('01/01/2021'), 1, 0, 0, 'COVID19', 'MODERNA', '030L20A', '1'],\n",
" [parseDate('01/01/2021'), 1, 1, 1, 'COVID19', 'MODERNA', '030L20B', '1']],\n",
" index = [\n",
" \"1048786\",\n",
" \"1048786\",\n",
" \"4711\"],\n",
" dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
" \n",
" # When\n",
" doseByMonthTable = DoseAnalysis.getDoseByMonthTable(dataFrame)\n",
"\n",
" # Then\n",
" assert_frame_equal(\n",
" doseByMonthTable,\n",
" pd.DataFrame(\n",
" data = {\n",
" 'Total reports': [2, 1],\n",
" 'Deaths': [2, 1],\n",
" 'Disabilities': [1, 0],\n",
" 'Life Threatening Illnesses': [1, 0],\n",
" 'Severe reports (%)': [(2 + 1 + 1)/2 * 100, (1 + 0 + 0)/1 * 100]\n",
" },\n",
" index = pd.MultiIndex.from_tuples(\n",
" [\n",
" (2021, 1, '1'),\n",
" (2021, 1, '2'),\n",
" ],\n",
" names = ('year', 'month', 'VAX_DOSE_SERIES'))),\n",
" check_index_type = False)\n",
"\n",
" def createDataFrame(self, index, columns, data, dtypes = {}):\n", " def createDataFrame(self, index, columns, data, dtypes = {}):\n",
" return pd.DataFrame(index = index, columns = columns, data = data).astype(dtypes)\n" " return pd.DataFrame(index = index, columns = columns, data = data).astype(dtypes)\n"
] ]
@@ -706,7 +771,12 @@
"def getDoseTable():\n", "def getDoseTable():\n",
" vaersDescrs = VaersDescrReader(dataDir = \"VAERS\").readAllVaersDescrs()\n", " vaersDescrs = VaersDescrReader(dataDir = \"VAERS\").readAllVaersDescrs()\n",
" dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n", " dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n",
" return DoseAnalysis.getDoseTable(dataFrame)" " return DoseAnalysis.getDoseTable(dataFrame)\n",
"\n",
"def getDoseByMonthTable():\n",
" vaersDescrs = VaersDescrReader(dataDir = \"VAERS\").readAllVaersDescrs()\n",
" dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n",
" return DoseAnalysis.getDoseByMonthTable(dataFrame)"
] ]
}, },
{ {
@@ -718,6 +788,18 @@
"source": [ "source": [
"getDoseTable()" "getDoseTable()"
] ]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b333e5fb",
"metadata": {},
"outputs": [],
"source": [
"doseByMonthTable = getDoseByMonthTable()\n",
"doseByMonthTable.to_excel('results/doseByMonthTable.xlsx')\n",
"doseByMonthTable"
]
} }
], ],
"metadata": { "metadata": {