refactoring

This commit is contained in:
frankknoll
2022-01-29 20:56:17 +01:00
parent d860b2d630
commit 9316a6755a

View File

@@ -94,7 +94,7 @@
" 'DISABILITIES': filter(df, 'DISABLE').value_counts(),\n", " 'DISABILITIES': filter(df, 'DISABLE').value_counts(),\n",
" 'LIFE THREATENING ILLNESSES': filter(df, 'L_THREAT').value_counts()\n", " 'LIFE THREATENING ILLNESSES': filter(df, 'L_THREAT').value_counts()\n",
" }\n", " }\n",
" return pd.concat(batchCodeTableDict, axis = 1).replace(to_replace = np.nan, value = 0)\n" " return pd.concat(batchCodeTableDict, axis = 'columns').replace(to_replace = np.nan, value = 0)\n"
] ]
}, },
{ {
@@ -123,28 +123,38 @@
" vaersDescrs = [\n", " vaersDescrs = [\n",
" {\n", " {\n",
" 'VAERSDATA': self.createDataFrame(\n", " 'VAERSDATA': self.createDataFrame(\n",
" index = [\"0916600\", \"0916601\"],\n",
" columns = ['DIED', 'L_THREAT', 'DISABLE'],\n", " columns = ['DIED', 'L_THREAT', 'DISABLE'],\n",
" data = [ ['Y', np.NaN, np.NaN],\n", " data = [ ['Y', np.NaN, np.NaN],\n",
" [np.NaN, np.NaN, 'Y']]),\n", " [np.NaN, np.NaN, 'Y']],\n",
" index = [\n",
" \"0916600\",\n",
" \"0916601\"],\n",
"),\n",
" 'VAERSVAX': self.createDataFrame(\n", " 'VAERSVAX': self.createDataFrame(\n",
" index = [\"0916600\", \"0916601\"],\n",
" columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
" data = [ ['COVID19', 'MODERNA', '037K20A', '1'],\n", " data = [ ['COVID19', 'MODERNA', '037K20A', '1'],\n",
" ['COVID19', 'MODERNA', '025L20A', '1']],\n", " ['COVID19', 'MODERNA', '025L20A', '1']],\n",
" index = [\n",
" \"0916600\",\n",
" \"0916601\"],\n",
" dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
" },\n", " },\n",
" {\n", " {\n",
" 'VAERSDATA': self.createDataFrame(\n", " 'VAERSDATA': self.createDataFrame(\n",
" index = [\"1996873\", \"1996874\"],\n",
" columns = ['DIED', 'L_THREAT', 'DISABLE'],\n", " columns = ['DIED', 'L_THREAT', 'DISABLE'],\n",
" data = [ [np.NaN, np.NaN, np.NaN],\n", " data = [ [np.NaN, np.NaN, np.NaN],\n",
" [np.NaN, np.NaN, 'Y']]),\n", " [np.NaN, np.NaN, 'Y']],\n",
" index = [\n",
" \"1996873\",\n",
" \"1996874\"],\n",
"),\n",
" 'VAERSVAX': self.createDataFrame(\n", " 'VAERSVAX': self.createDataFrame(\n",
" index = [\"1996873\", \"1996874\"],\n",
" columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
" data = [ ['HPV9', 'MERCK & CO. INC.', 'R017624', 'UNK'],\n", " data = [ ['HPV9', 'MERCK & CO. INC.', 'R017624', 'UNK'],\n",
" ['COVID19', 'MODERNA', '025L20A', '1']],\n", " ['COVID19', 'MODERNA', '025L20A', '1']],\n",
" index = [\n",
" \"1996873\",\n",
" \"1996874\"],\n",
" dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
" }\n", " }\n",
" ]\n", " ]\n",
@@ -154,11 +164,14 @@
" \n", " \n",
" # Then\n", " # Then\n",
" dataFrameExpected = self.createDataFrame(\n", " dataFrameExpected = self.createDataFrame(\n",
" index = [\"0916600\", \"0916601\", \"1996874\"],\n",
" columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", " columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
" data = [ ['Y', np.NaN, np.NaN, 'COVID19', 'MODERNA', '037K20A', '1'],\n", " data = [ ['Y', np.NaN, np.NaN, 'COVID19', 'MODERNA', '037K20A', '1'],\n",
" [np.NaN, np.NaN, 'Y', 'COVID19', 'MODERNA', '025L20A', '1'],\n", " [np.NaN, np.NaN, 'Y', 'COVID19', 'MODERNA', '025L20A', '1'],\n",
" [np.NaN, np.NaN, 'Y', 'COVID19', 'MODERNA', '025L20A', '1']],\n", " [np.NaN, np.NaN, 'Y', 'COVID19', 'MODERNA', '025L20A', '1']],\n",
" index = [\n",
" \"0916600\",\n",
" \"0916601\",\n",
" \"1996874\"],\n",
" dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
" assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)\n", " assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)\n",
"\n", "\n",
@@ -167,14 +180,18 @@
" vaersDescrs = [\n", " vaersDescrs = [\n",
" {\n", " {\n",
" 'VAERSDATA': self.createDataFrame(\n", " 'VAERSDATA': self.createDataFrame(\n",
" index = [\"1048786\"],\n",
" columns = ['DIED', 'L_THREAT', 'DISABLE'],\n", " columns = ['DIED', 'L_THREAT', 'DISABLE'],\n",
" data = [ ['Y', np.NaN, np.NaN]]),\n", " data = [ ['Y', np.NaN, np.NaN]],\n",
" index = [\n",
" \"1048786\"],\n",
"),\n",
" 'VAERSVAX': self.createDataFrame(\n", " 'VAERSVAX': self.createDataFrame(\n",
" index = [\"1048786\", \"1048786\"],\n",
" columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
" data = [ ['COVID19', 'MODERNA', '016M20A', '2'],\n", " data = [ ['COVID19', 'MODERNA', '016M20A', '2'],\n",
" ['COVID19', 'MODERNA', '030L20A', '1']],\n", " ['COVID19', 'MODERNA', '030L20A', '1']],\n",
" index = [\n",
" \"1048786\",\n",
" \"1048786\"],\n",
" dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
" }\n", " }\n",
" ]\n", " ]\n",
@@ -184,9 +201,10 @@
" \n", " \n",
" # Then\n", " # Then\n",
" dataFrameExpected = self.createDataFrame(\n", " dataFrameExpected = self.createDataFrame(\n",
" index = [\"1048786\"],\n",
" columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", " columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
" data = [ ['Y', np.NaN, np.NaN, 'COVID19', 'MODERNA', '030L20A', '1']],\n", " data = [ ['Y', np.NaN, np.NaN, 'COVID19', 'MODERNA', '030L20A', '1']],\n",
" index = [\n",
" \"1048786\"],\n",
" dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
" assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)\n", " assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)\n",
"\n", "\n",
@@ -195,14 +213,17 @@
" vaersDescrs = [\n", " vaersDescrs = [\n",
" {\n", " {\n",
" 'VAERSDATA': self.createDataFrame(\n", " 'VAERSDATA': self.createDataFrame(\n",
" index = [\"1048786\"],\n",
" columns = ['DIED', 'L_THREAT', 'DISABLE'],\n", " columns = ['DIED', 'L_THREAT', 'DISABLE'],\n",
" data = [ ['Y', np.NaN, np.NaN]]),\n", " data = [ ['Y', np.NaN, np.NaN]],\n",
" index = [\n",
" \"1048786\"]),\n",
" 'VAERSVAX': self.createDataFrame(\n", " 'VAERSVAX': self.createDataFrame(\n",
" index = [\"1048786\", \"1048786\"],\n",
" columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
" data = [ ['COVID19', 'MODERNA', '016M20A', '2'],\n", " data = [ ['COVID19', 'MODERNA', '016M20A', '2'],\n",
" ['COVID19', 'MODERNA', '030L20A', '1']],\n", " ['COVID19', 'MODERNA', '030L20A', '1']],\n",
" index = [\n",
" \"1048786\",\n",
" \"1048786\"],\n",
" dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
" }\n", " }\n",
" ]\n", " ]\n",
@@ -212,9 +233,10 @@
" \n", " \n",
" # Then\n", " # Then\n",
" dataFrameExpected = self.createDataFrame(\n", " dataFrameExpected = self.createDataFrame(\n",
" index = [\"1048786\"],\n",
" columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", " columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
" data = [ ['Y', np.NaN, np.NaN, 'COVID19', 'MODERNA', '016M20A', '2']],\n", " data = [ ['Y', np.NaN, np.NaN, 'COVID19', 'MODERNA', '016M20A', '2']],\n",
" index = [\n",
" \"1048786\"],\n",
" dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
" assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)\n", " assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)\n",
"\n", "\n",
@@ -238,28 +260,36 @@
" [\n", " [\n",
" {\n", " {\n",
" 'VAERSDATA': self.createDataFrame(\n", " 'VAERSDATA': self.createDataFrame(\n",
" index = [\"0916600\", \"0916601\"],\n",
" columns = ['DIED', 'L_THREAT', 'DISABLE'],\n", " columns = ['DIED', 'L_THREAT', 'DISABLE'],\n",
" data = [ ['Y', np.NaN, np.NaN],\n", " data = [ ['Y', np.NaN, np.NaN],\n",
" [np.NaN, np.NaN, 'Y']]),\n", " [np.NaN, np.NaN, 'Y']],\n",
" index = [\n",
" \"0916600\",\n",
" \"0916601\"]),\n",
" 'VAERSVAX': self.createDataFrame(\n", " 'VAERSVAX': self.createDataFrame(\n",
" index = [\"0916600\", \"0916601\"],\n",
" columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
" data = [ ['COVID19', 'MODERNA', '037K20A', '1'],\n", " data = [ ['COVID19', 'MODERNA', '037K20A', '1'],\n",
" ['COVID19', 'MODERNA', '025L20A', '1']],\n", " ['COVID19', 'MODERNA', '025L20A', '1']],\n",
" index = [\n",
" \"0916600\",\n",
" \"0916601\"],\n",
" dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
" },\n", " },\n",
" {\n", " {\n",
" 'VAERSDATA': self.createDataFrame(\n", " 'VAERSDATA': self.createDataFrame(\n",
" index = [\"1996873\", \"1996874\"],\n",
" columns = ['DIED', 'L_THREAT', 'DISABLE'],\n", " columns = ['DIED', 'L_THREAT', 'DISABLE'],\n",
" data = [ [np.NaN, np.NaN, np.NaN],\n", " data = [ [np.NaN, np.NaN, np.NaN],\n",
" [np.NaN, np.NaN, 'Y']]),\n", " [np.NaN, np.NaN, 'Y']],\n",
" index = [\n",
" \"1996873\",\n",
" \"1996874\"]),\n",
" 'VAERSVAX': self.createDataFrame(\n", " 'VAERSVAX': self.createDataFrame(\n",
" index = [\"1996873\", \"1996874\"],\n",
" columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
" data = [ ['HPV9', 'MERCK & CO. INC.', 'R017624', 'UNK'],\n", " data = [ ['HPV9', 'MERCK & CO. INC.', 'R017624', 'UNK'],\n",
" ['COVID19', 'MODERNA', '025L20A', '1']],\n", " ['COVID19', 'MODERNA', '025L20A', '1']],\n",
" index = [\n",
" \"1996873\",\n",
" \"1996874\"],\n",
" dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
" }\n", " }\n",
" ],\n", " ],\n",