adding getDoseByMonthTable()
This commit is contained in:
@@ -64,7 +64,9 @@
|
|||||||
" def _readVAERSDATA(self, file):\n",
|
" def _readVAERSDATA(self, file):\n",
|
||||||
" VAERSDATA = self._read_csv(\n",
|
" VAERSDATA = self._read_csv(\n",
|
||||||
" file = file,\n",
|
" file = file,\n",
|
||||||
" usecols = ['VAERS_ID', 'DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'])\n",
|
" usecols = ['VAERS_ID', 'RECVDATE', 'DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'],\n",
|
||||||
|
" parse_dates = ['RECVDATE'],\n",
|
||||||
|
" date_parser = lambda dateStr: pd.to_datetime(dateStr, format = \"%m/%d/%Y\"))\n",
|
||||||
" DataFrameConverter.convertColumnsOfDataFrameToNumerics(\n",
|
" DataFrameConverter.convertColumnsOfDataFrameToNumerics(\n",
|
||||||
" VAERSDATA,\n",
|
" VAERSDATA,\n",
|
||||||
" ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'])\n",
|
" ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'])\n",
|
||||||
@@ -274,6 +276,33 @@
|
|||||||
" })\n",
|
" })\n",
|
||||||
" doseTable = doseTable[['Total reports', 'Deaths', 'Disabilities', 'Life Threatening Illnesses']]\n",
|
" doseTable = doseTable[['Total reports', 'Deaths', 'Disabilities', 'Life Threatening Illnesses']]\n",
|
||||||
" doseTable['Severe reports (%)'] = (doseTable['Deaths'] + doseTable['Disabilities'] + doseTable['Life Threatening Illnesses']) / doseTable['Total reports'] * 100\n",
|
" doseTable['Severe reports (%)'] = (doseTable['Deaths'] + doseTable['Disabilities'] + doseTable['Life Threatening Illnesses']) / doseTable['Total reports'] * 100\n",
|
||||||
|
" return doseTable\n",
|
||||||
|
"\n",
|
||||||
|
" @staticmethod\n",
|
||||||
|
" def getDoseByMonthTable(dataFrame):\n",
|
||||||
|
" # https://stackoverflow.com/questions/61879166/pandas-groupby-month-and-year-date-as-datetime64ns-and-summarized-by-count\n",
|
||||||
|
" grouped = dataFrame.groupby(\n",
|
||||||
|
" [\n",
|
||||||
|
" dataFrame['RECVDATE'].dt.year.rename('year'),\n",
|
||||||
|
" dataFrame['RECVDATE'].dt.month.rename('month'),\n",
|
||||||
|
" dataFrame['VAX_DOSE_SERIES']\n",
|
||||||
|
" ])\n",
|
||||||
|
" # FK-TODO: DRY with getDoseTable()\n",
|
||||||
|
" doseTable = AggregationHelper.aggregateAndFlattenColumnsAndRenameColumns(\n",
|
||||||
|
" dataFrame = grouped,\n",
|
||||||
|
" aggFunctionsByColumn = {\n",
|
||||||
|
" 'DIED': ['sum', 'size'],\n",
|
||||||
|
" 'L_THREAT': 'sum',\n",
|
||||||
|
" 'DISABLE': 'sum'\n",
|
||||||
|
" },\n",
|
||||||
|
" columnNameMappingsDict = {\n",
|
||||||
|
" \"DIED_size\": \"Total reports\",\n",
|
||||||
|
" \"DIED_sum\": \"Deaths\",\n",
|
||||||
|
" \"L_THREAT_sum\": \"Life Threatening Illnesses\",\n",
|
||||||
|
" \"DISABLE_sum\": \"Disabilities\"\n",
|
||||||
|
" })\n",
|
||||||
|
" doseTable = doseTable[['Total reports', 'Deaths', 'Disabilities', 'Life Threatening Illnesses']]\n",
|
||||||
|
" doseTable['Severe reports (%)'] = (doseTable['Deaths'] + doseTable['Disabilities'] + doseTable['Life Threatening Illnesses']) / doseTable['Total reports'] * 100\n",
|
||||||
" return doseTable\n"
|
" return doseTable\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -620,6 +649,42 @@
|
|||||||
" },\n",
|
" },\n",
|
||||||
" index = pd.Index(['1', '2'], dtype = \"string\", name = 'VAX_DOSE_SERIES')))\n",
|
" index = pd.Index(['1', '2'], dtype = \"string\", name = 'VAX_DOSE_SERIES')))\n",
|
||||||
" \n",
|
" \n",
|
||||||
|
" def test_getDoseByMonthTable(self):\n",
|
||||||
|
" # Given\n",
|
||||||
|
" parseDate = lambda dateStr: pd.to_datetime(dateStr, format = \"%m/%d/%Y\")\n",
|
||||||
|
" dataFrame = self.createDataFrame(\n",
|
||||||
|
" columns = ['RECVDATE', 'DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
|
||||||
|
" data = [ [parseDate('01/01/2021'), 1, 0, 0, 'COVID19', 'MODERNA', '016M20A', '2'],\n",
|
||||||
|
" [parseDate('01/01/2021'), 1, 0, 0, 'COVID19', 'MODERNA', '030L20A', '1'],\n",
|
||||||
|
" [parseDate('01/01/2021'), 1, 1, 1, 'COVID19', 'MODERNA', '030L20B', '1']],\n",
|
||||||
|
" index = [\n",
|
||||||
|
" \"1048786\",\n",
|
||||||
|
" \"1048786\",\n",
|
||||||
|
" \"4711\"],\n",
|
||||||
|
" dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
|
||||||
|
" \n",
|
||||||
|
" # When\n",
|
||||||
|
" doseByMonthTable = DoseAnalysis.getDoseByMonthTable(dataFrame)\n",
|
||||||
|
"\n",
|
||||||
|
" # Then\n",
|
||||||
|
" assert_frame_equal(\n",
|
||||||
|
" doseByMonthTable,\n",
|
||||||
|
" pd.DataFrame(\n",
|
||||||
|
" data = {\n",
|
||||||
|
" 'Total reports': [2, 1],\n",
|
||||||
|
" 'Deaths': [2, 1],\n",
|
||||||
|
" 'Disabilities': [1, 0],\n",
|
||||||
|
" 'Life Threatening Illnesses': [1, 0],\n",
|
||||||
|
" 'Severe reports (%)': [(2 + 1 + 1)/2 * 100, (1 + 0 + 0)/1 * 100]\n",
|
||||||
|
" },\n",
|
||||||
|
" index = pd.MultiIndex.from_tuples(\n",
|
||||||
|
" [\n",
|
||||||
|
" (2021, 1, '1'),\n",
|
||||||
|
" (2021, 1, '2'),\n",
|
||||||
|
" ],\n",
|
||||||
|
" names = ('year', 'month', 'VAX_DOSE_SERIES'))),\n",
|
||||||
|
" check_index_type = False)\n",
|
||||||
|
"\n",
|
||||||
" def createDataFrame(self, index, columns, data, dtypes = {}):\n",
|
" def createDataFrame(self, index, columns, data, dtypes = {}):\n",
|
||||||
" return pd.DataFrame(index = index, columns = columns, data = data).astype(dtypes)\n"
|
" return pd.DataFrame(index = index, columns = columns, data = data).astype(dtypes)\n"
|
||||||
]
|
]
|
||||||
@@ -706,7 +771,12 @@
|
|||||||
"def getDoseTable():\n",
|
"def getDoseTable():\n",
|
||||||
" vaersDescrs = VaersDescrReader(dataDir = \"VAERS\").readAllVaersDescrs()\n",
|
" vaersDescrs = VaersDescrReader(dataDir = \"VAERS\").readAllVaersDescrs()\n",
|
||||||
" dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n",
|
" dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n",
|
||||||
" return DoseAnalysis.getDoseTable(dataFrame)"
|
" return DoseAnalysis.getDoseTable(dataFrame)\n",
|
||||||
|
"\n",
|
||||||
|
"def getDoseByMonthTable():\n",
|
||||||
|
" vaersDescrs = VaersDescrReader(dataDir = \"VAERS\").readAllVaersDescrs()\n",
|
||||||
|
" dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n",
|
||||||
|
" return DoseAnalysis.getDoseByMonthTable(dataFrame)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -718,6 +788,18 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"getDoseTable()"
|
"getDoseTable()"
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "b333e5fb",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"doseByMonthTable = getDoseByMonthTable()\n",
|
||||||
|
"doseByMonthTable.to_excel('results/doseByMonthTable.xlsx')\n",
|
||||||
|
"doseByMonthTable"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
|||||||
Reference in New Issue
Block a user