From 9316a6755a0d7d9a65baaaebdb8ea7e31fb179ac Mon Sep 17 00:00:00 2001 From: frankknoll Date: Sat, 29 Jan 2022 20:56:17 +0100 Subject: [PATCH] refactoring --- HowBadIsMyBatch.ipynb | 74 ++++++++++++++++++++++++++++++------------- 1 file changed, 52 insertions(+), 22 deletions(-) diff --git a/HowBadIsMyBatch.ipynb b/HowBadIsMyBatch.ipynb index 79553e9ac2d..a5d5b9bc911 100644 --- a/HowBadIsMyBatch.ipynb +++ b/HowBadIsMyBatch.ipynb @@ -94,7 +94,7 @@ " 'DISABILITIES': filter(df, 'DISABLE').value_counts(),\n", " 'LIFE THREATENING ILLNESSES': filter(df, 'L_THREAT').value_counts()\n", " }\n", - " return pd.concat(batchCodeTableDict, axis = 1).replace(to_replace = np.nan, value = 0)\n" + " return pd.concat(batchCodeTableDict, axis = 'columns').replace(to_replace = np.nan, value = 0)\n" ] }, { @@ -123,28 +123,38 @@ " vaersDescrs = [\n", " {\n", " 'VAERSDATA': self.createDataFrame(\n", - " index = [\"0916600\", \"0916601\"],\n", " columns = ['DIED', 'L_THREAT', 'DISABLE'],\n", " data = [ ['Y', np.NaN, np.NaN],\n", - " [np.NaN, np.NaN, 'Y']]),\n", + " [np.NaN, np.NaN, 'Y']],\n", + " index = [\n", + " \"0916600\",\n", + " \"0916601\"],\n", + "),\n", " 'VAERSVAX': self.createDataFrame(\n", - " index = [\"0916600\", \"0916601\"],\n", " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", " data = [ ['COVID19', 'MODERNA', '037K20A', '1'],\n", " ['COVID19', 'MODERNA', '025L20A', '1']],\n", + " index = [\n", + " \"0916600\",\n", + " \"0916601\"],\n", " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", " },\n", " {\n", " 'VAERSDATA': self.createDataFrame(\n", - " index = [\"1996873\", \"1996874\"],\n", " columns = ['DIED', 'L_THREAT', 'DISABLE'],\n", " data = [ [np.NaN, np.NaN, np.NaN],\n", - " [np.NaN, np.NaN, 'Y']]),\n", + " [np.NaN, np.NaN, 'Y']],\n", + " index = [\n", + " \"1996873\",\n", + " \"1996874\"],\n", + "),\n", " 'VAERSVAX': self.createDataFrame(\n", - " index = [\"1996873\", \"1996874\"],\n", " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", " data = [ ['HPV9', 'MERCK & CO. INC.', 'R017624', 'UNK'],\n", " ['COVID19', 'MODERNA', '025L20A', '1']],\n", + " index = [\n", + " \"1996873\",\n", + " \"1996874\"],\n", " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", " }\n", " ]\n", @@ -154,11 +164,14 @@ " \n", " # Then\n", " dataFrameExpected = self.createDataFrame(\n", - " index = [\"0916600\", \"0916601\", \"1996874\"],\n", " columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", " data = [ ['Y', np.NaN, np.NaN, 'COVID19', 'MODERNA', '037K20A', '1'],\n", " [np.NaN, np.NaN, 'Y', 'COVID19', 'MODERNA', '025L20A', '1'],\n", " [np.NaN, np.NaN, 'Y', 'COVID19', 'MODERNA', '025L20A', '1']],\n", + " index = [\n", + " \"0916600\",\n", + " \"0916601\",\n", + " \"1996874\"],\n", " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", " assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)\n", "\n", @@ -167,14 +180,18 @@ " vaersDescrs = [\n", " {\n", " 'VAERSDATA': self.createDataFrame(\n", - " index = [\"1048786\"],\n", " columns = ['DIED', 'L_THREAT', 'DISABLE'],\n", - " data = [ ['Y', np.NaN, np.NaN]]),\n", + " data = [ ['Y', np.NaN, np.NaN]],\n", + " index = [\n", + " \"1048786\"],\n", + "),\n", " 'VAERSVAX': self.createDataFrame(\n", - " index = [\"1048786\", \"1048786\"],\n", " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", " data = [ ['COVID19', 'MODERNA', '016M20A', '2'],\n", " ['COVID19', 'MODERNA', '030L20A', '1']],\n", + " index = [\n", + " \"1048786\",\n", + " \"1048786\"],\n", " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", " }\n", " ]\n", @@ -184,9 +201,10 @@ " \n", " # Then\n", " dataFrameExpected = self.createDataFrame(\n", - " index = [\"1048786\"],\n", " columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", " data = [ ['Y', np.NaN, np.NaN, 'COVID19', 'MODERNA', '030L20A', '1']],\n", + " index = [\n", + " \"1048786\"],\n", " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", " assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)\n", "\n", @@ -195,14 +213,17 @@ " vaersDescrs = [\n", " {\n", " 'VAERSDATA': self.createDataFrame(\n", - " index = [\"1048786\"],\n", " columns = ['DIED', 'L_THREAT', 'DISABLE'],\n", - " data = [ ['Y', np.NaN, np.NaN]]),\n", + " data = [ ['Y', np.NaN, np.NaN]],\n", + " index = [\n", + " \"1048786\"]),\n", " 'VAERSVAX': self.createDataFrame(\n", - " index = [\"1048786\", \"1048786\"],\n", " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", " data = [ ['COVID19', 'MODERNA', '016M20A', '2'],\n", " ['COVID19', 'MODERNA', '030L20A', '1']],\n", + " index = [\n", + " \"1048786\",\n", + " \"1048786\"],\n", " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", " }\n", " ]\n", @@ -212,9 +233,10 @@ " \n", " # Then\n", " dataFrameExpected = self.createDataFrame(\n", - " index = [\"1048786\"],\n", " columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", " data = [ ['Y', np.NaN, np.NaN, 'COVID19', 'MODERNA', '016M20A', '2']],\n", + " index = [\n", + " \"1048786\"],\n", " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", " assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)\n", "\n", @@ -238,28 +260,36 @@ " [\n", " {\n", " 'VAERSDATA': self.createDataFrame(\n", - " index = [\"0916600\", \"0916601\"],\n", " columns = ['DIED', 'L_THREAT', 'DISABLE'],\n", " data = [ ['Y', np.NaN, np.NaN],\n", - " [np.NaN, np.NaN, 'Y']]),\n", + " [np.NaN, np.NaN, 'Y']],\n", + " index = [\n", + " \"0916600\",\n", + " \"0916601\"]),\n", " 'VAERSVAX': self.createDataFrame(\n", - " index = [\"0916600\", \"0916601\"],\n", " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", " data = [ ['COVID19', 'MODERNA', '037K20A', '1'],\n", " ['COVID19', 'MODERNA', '025L20A', '1']],\n", + " index = [\n", + " \"0916600\",\n", + " \"0916601\"],\n", " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", " },\n", " {\n", " 'VAERSDATA': self.createDataFrame(\n", - " index = [\"1996873\", \"1996874\"],\n", " columns = ['DIED', 'L_THREAT', 'DISABLE'],\n", " data = [ [np.NaN, np.NaN, np.NaN],\n", - " [np.NaN, np.NaN, 'Y']]),\n", + " [np.NaN, np.NaN, 'Y']],\n", + " index = [\n", + " \"1996873\",\n", + " \"1996874\"]),\n", " 'VAERSVAX': self.createDataFrame(\n", - " index = [\"1996873\", \"1996874\"],\n", " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", " data = [ ['HPV9', 'MERCK & CO. INC.', 'R017624', 'UNK'],\n", " ['COVID19', 'MODERNA', '025L20A', '1']],\n", + " index = [\n", + " \"1996873\",\n", + " \"1996874\"],\n", " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", " }\n", " ],\n",