adding Countries column

This commit is contained in:
frankknoll
2022-02-22 20:38:36 +01:00
parent 6087c2587e
commit 43403e7872
167 changed files with 195 additions and 192 deletions

View File

@@ -159,19 +159,14 @@
"\n",
" @staticmethod\n",
" def createSummationTable(dataFrame):\n",
" summationTable = dataFrame.agg({\n",
" 'DIED': ['sum', 'size'],\n",
" 'L_THREAT': 'sum',\n",
" 'DISABLE': 'sum',\n",
" 'SEVERE': 'sum'\n",
" })\n",
" SummationTableFactory._flattenColumns(summationTable)\n",
" summationTable = summationTable.rename(columns = {\n",
" \"DIED_size\": \"Adverse Reaction Reports\",\n",
" \"DIED_sum\": \"Deaths\",\n",
" \"L_THREAT_sum\": \"Life Threatening Illnesses\",\n",
" \"DISABLE_sum\": \"Disabilities\",\n",
" \"SEVERE_sum\": \"Severities\"\n",
" summationTable = dataFrame.agg(\n",
" **{\n",
" 'Deaths': pd.NamedAgg(column = 'DIED', aggfunc = 'sum'),\n",
" 'Adverse Reaction Reports': pd.NamedAgg(column = 'DIED', aggfunc = 'size'),\n",
" 'Life Threatening Illnesses': pd.NamedAgg(column = 'L_THREAT', aggfunc = 'sum'), \n",
" 'Disabilities': pd.NamedAgg(column = 'DISABLE', aggfunc = 'sum'),\n",
" 'Severities': pd.NamedAgg(column = 'SEVERE', aggfunc = 'sum'),\n",
" 'Countries': pd.NamedAgg(column = 'COUNTRY', aggfunc = SummationTableFactory.countries2str)\n",
" })\n",
" summationTable['Severe reports'] = summationTable['Severities'] / summationTable['Adverse Reaction Reports'] * 100\n",
" summationTable['Lethality'] = summationTable['Deaths'] / summationTable['Adverse Reaction Reports'] * 100\n",
@@ -182,12 +177,13 @@
" 'Disabilities',\n",
" 'Life Threatening Illnesses',\n",
" 'Severe reports',\n",
" 'Lethality'\n",
" 'Lethality',\n",
" 'Countries'\n",
" ]]\n",
"\n",
" @staticmethod\n",
" def _flattenColumns(dataFrame):\n",
" dataFrame.columns = [\"_\".join(a) for a in dataFrame.columns.to_flat_index()]\n"
" def countries2str(countries):\n",
" return ', '.join(sorted(set(countries)))"
]
},
{
@@ -302,6 +298,7 @@
" 'Disabilities',\n",
" 'Life Threatening Illnesses',\n",
" 'Company',\n",
" 'Countries',\n",
" 'Severe reports',\n",
" 'Lethality'\n",
" ]]\n",
@@ -531,9 +528,9 @@
" assert_frame_equal(\n",
" batchCodeTable,\n",
" TestHelper.createDataFrame(\n",
" columns = ['Adverse Reaction Reports', 'Deaths', 'Disabilities', 'Life Threatening Illnesses', 'Company', 'Severe reports', 'Lethality'],\n",
" data = [ [2, 1, 2, 2, 'MODERNA', 2/2 * 100, 1/2 * 100],\n",
" [1, 0, 0, 0, 'MODERNA', 0/1 * 100, 0/1 * 100]],\n",
" columns = ['Adverse Reaction Reports', 'Deaths', 'Disabilities', 'Life Threatening Illnesses', 'Company', 'Countries', 'Severe reports', 'Lethality'],\n",
" data = [ [2, 1, 2, 2, 'MODERNA', 'France', 2/2 * 100, 1/2 * 100],\n",
" [1, 0, 0, 0, 'MODERNA', 'France', 0/1 * 100, 0/1 * 100]],\n",
" index = pd.Index(\n",
" [\n",
" '030L20B',\n",
@@ -549,7 +546,7 @@
" data = [ [1, 0, 0, 'COVID19', 'PFIZER\\BIONTECH', '016M20A', '2', 'GBPFIZER INC2020486806', 0, 0, 'United Kingdom'],\n",
" [0, 0, 0, 'COVID19', 'MODERNA', '030L20A', '1', 'FRMODERNATX, INC.MOD20224', 0, 0, 'France'],\n",
" [1, 1, 1, 'COVID19', 'MODERNA', '030L20B', '1', 'FRMODERNATX, INC.MOD20224', 0, 0, 'France'],\n",
" [0, 1, 1, 'COVID19', 'MODERNA', '030L20B', '1', 'FRMODERNATX, INC.MOD20224', 0, 0, 'France']],\n",
" [0, 1, 1, 'COVID19', 'MODERNA', '030L20B', '1', 'FRMODERNATX, INC.MOD20224', 0, 0, 'United Kingdom']],\n",
" index = [\n",
" \"1048786\",\n",
" \"1048786\",\n",
@@ -565,10 +562,10 @@
" assert_frame_equal(\n",
" batchCodeTable,\n",
" TestHelper.createDataFrame(\n",
" columns = ['Adverse Reaction Reports', 'Deaths', 'Disabilities', 'Life Threatening Illnesses', 'Company', 'Severe reports', 'Lethality'],\n",
" data = [ [1, 1, 0, 0, 'PFIZER\\BIONTECH', 1/1 * 100, 1/1 * 100],\n",
" [2, 1, 2, 2, 'MODERNA', 2/2 * 100, 1/2 * 100],\n",
" [1, 0, 0, 0, 'MODERNA', 0/1 * 100, 0/1 * 100]],\n",
" columns = ['Adverse Reaction Reports', 'Deaths', 'Disabilities', 'Life Threatening Illnesses', 'Company', 'Countries', 'Severe reports', 'Lethality'],\n",
" data = [ [1, 1, 0, 0, 'PFIZER\\BIONTECH', 'United Kingdom', 1/1 * 100, 1/1 * 100],\n",
" [2, 1, 2, 2, 'MODERNA', 'France, United Kingdom', 2/2 * 100, 1/2 * 100],\n",
" [1, 0, 0, 0, 'MODERNA', 'France', 0/1 * 100, 0/1 * 100]],\n",
" index = pd.Index(\n",
" [\n",
" '016M20A',\n",
@@ -601,7 +598,7 @@
" assert_frame_equal(\n",
" batchCodeTable,\n",
" TestHelper.createDataFrame(\n",
" columns = ['Adverse Reaction Reports', 'Deaths', 'Disabilities', 'Life Threatening Illnesses', 'Company', 'Severe reports', 'Lethality'],\n",
" columns = ['Adverse Reaction Reports', 'Deaths', 'Disabilities', 'Life Threatening Illnesses', 'Company', 'Countries', 'Severe reports', 'Lethality'],\n",
" data = [ ],\n",
" index = pd.Index([], name = 'VAX_LOT')),\n",
" check_dtype = False)\n"

View File

@@ -29,5 +29,4 @@ u039k20a
039K20A 12-31-
039K20A & 031M2
039K20A and 032
039K20A, 011L20
- brauchen Spalte, die die Länder aufzählt, in denen der Batch Code gefunden wurde.
039K20A, 011L20