refactoring

This commit is contained in:
frankknoll
2022-02-01 10:48:48 +01:00
parent a97068a0ae
commit 978bf3d4f8

View File

@@ -226,50 +226,51 @@
"source": [ "source": [
"from pandas.testing import assert_frame_equal\n", "from pandas.testing import assert_frame_equal\n",
"\n", "\n",
"class CreateAndFilterDataFrameTest(unittest.TestCase):\n", "class DataFrameFilterTest(unittest.TestCase):\n",
"\n", "\n",
" def test_createAndFilterDataFrameFromDescrs(self):\n", " def test_filterBy(self):\n",
" # Given\n", " # Given\n",
" vaersDescrs = [\n", " dataFrameFilter = DataFrameFilter(\n",
" {\n", " VaersDescr2DataFrameConverter.createDataFrameFromDescrs(\n",
" 'VAERSDATA': self.createDataFrame(\n", " [\n",
" columns = ['DIED', 'L_THREAT', 'DISABLE'],\n", " {\n",
" data = [ ['Y', np.NaN, np.NaN],\n", " 'VAERSDATA': self.createDataFrame(\n",
" [np.NaN, np.NaN, 'Y']],\n", " columns = ['DIED', 'L_THREAT', 'DISABLE'],\n",
" index = [\n", " data = [ ['Y', np.NaN, np.NaN],\n",
" \"0916600\",\n", " [np.NaN, np.NaN, 'Y']],\n",
" \"0916601\"]),\n", " index = [\n",
" 'VAERSVAX': self.createDataFrame(\n", " \"0916600\",\n",
" columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", " \"0916601\"]),\n",
" data = [ ['COVID19', 'MODERNA', '037K20A', '1'],\n", " 'VAERSVAX': self.createDataFrame(\n",
" ['COVID19', 'MODERNA', '025L20A', '1']],\n", " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
" index = [\n", " data = [ ['COVID19', 'MODERNA', '037K20A', '1'],\n",
" \"0916600\",\n", " ['COVID19', 'MODERNA', '025L20A', '1']],\n",
" \"0916601\"],\n", " index = [\n",
" dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", " \"0916600\",\n",
" },\n", " \"0916601\"],\n",
" {\n", " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
" 'VAERSDATA': self.createDataFrame(\n", " },\n",
" columns = ['DIED', 'L_THREAT', 'DISABLE'],\n", " {\n",
" data = [ [np.NaN, np.NaN, np.NaN],\n", " 'VAERSDATA': self.createDataFrame(\n",
" [np.NaN, np.NaN, 'Y']],\n", " columns = ['DIED', 'L_THREAT', 'DISABLE'],\n",
" index = [\n", " data = [ [np.NaN, np.NaN, np.NaN],\n",
" \"1996873\",\n", " [np.NaN, np.NaN, 'Y']],\n",
" \"1996874\"]),\n", " index = [\n",
" 'VAERSVAX': self.createDataFrame(\n", " \"1996873\",\n",
" columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", " \"1996874\"]),\n",
" data = [ ['HPV9', 'MERCK & CO. INC.', 'R017624', 'UNK'],\n", " 'VAERSVAX': self.createDataFrame(\n",
" ['COVID19', 'MODERNA', '025L20A', '1']],\n", " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
" index = [\n", " data = [ ['HPV9', 'MERCK & CO. INC.', 'R017624', 'UNK'],\n",
" \"1996873\",\n", " ['COVID19', 'MODERNA', '025L20A', '1']],\n",
" \"1996874\"],\n", " index = [\n",
" dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", " \"1996873\",\n",
" }\n", " \"1996874\"],\n",
" ]\n", " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
" dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n", " }\n",
" ]))\n",
" \n", " \n",
" # When\n", " # When\n",
" dataFrame = DataFrameFilter(dataFrame).filterBy(manufacturer = \"MODERNA\", dose = '1')\n", " dataFrame = dataFrameFilter.filterBy(manufacturer = \"MODERNA\", dose = '1')\n",
" \n", " \n",
" # Then\n", " # Then\n",
" dataFrameExpected = self.createDataFrame(\n", " dataFrameExpected = self.createDataFrame(\n",
@@ -284,31 +285,32 @@
" dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
" assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)\n", " assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)\n",
"\n", "\n",
" def test_createDataFrameFromForSevereEffects(self):\n", " def test_filterForSevereEffects(self):\n",
" # Given\n", " # Given\n",
" vaersDescrs = [\n", " dataFrameFilter = DataFrameFilter(\n",
" {\n", " VaersDescr2DataFrameConverter.createDataFrameFromDescrs(\n",
" 'VAERSDATA': self.createDataFrame(\n", " [\n",
" columns = ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'],\n", " {\n",
" data = [ ['Y', 'Y', np.NaN, 'Y', 'Y'],\n", " 'VAERSDATA': self.createDataFrame(\n",
" [np.NaN, np.NaN, 'Y', np.NaN, 'Y']],\n", " columns = ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'],\n",
" index = [\n", " data = [ ['Y', 'Y', np.NaN, 'Y', 'Y'],\n",
" \"0916600\",\n", " [np.NaN, np.NaN, 'Y', np.NaN, 'Y']],\n",
" \"0916601\"]),\n", " index = [\n",
" 'VAERSVAX': self.createDataFrame(\n", " \"0916600\",\n",
" columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", " \"0916601\"]),\n",
" data = [ ['COVID19', 'MODERNA', '037K20A', '1'],\n", " 'VAERSVAX': self.createDataFrame(\n",
" ['COVID19', 'PFIZER\\BIONTECH', '025L20A', '1']],\n", " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
" index = [\n", " data = [ ['COVID19', 'MODERNA', '037K20A', '1'],\n",
" \"0916600\",\n", " ['COVID19', 'PFIZER\\BIONTECH', '025L20A', '1']],\n",
" \"0916601\"],\n", " index = [\n",
" dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", " \"0916600\",\n",
" }\n", " \"0916601\"],\n",
" ]\n", " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
" dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n", " }\n",
" \n", " ]))\n",
"\n",
" # When\n", " # When\n",
" dataFrame = DataFrameFilter(dataFrame).filterForSevereEffects(dose = '1')\n", " dataFrame = dataFrameFilter.filterForSevereEffects(dose = '1')\n",
" \n", " \n",
" # Then\n", " # Then\n",
" dataFrameExpected = self.createDataFrame(\n", " dataFrameExpected = self.createDataFrame(\n",
@@ -323,27 +325,28 @@
"\n", "\n",
" def test_createAndFilterDataFrameFromDescrsWithFirstDose(self):\n", " def test_createAndFilterDataFrameFromDescrsWithFirstDose(self):\n",
" # Given\n", " # Given\n",
" vaersDescrs = [\n", " dataFrameFilter = DataFrameFilter(\n",
" {\n", " VaersDescr2DataFrameConverter.createDataFrameFromDescrs(\n",
" 'VAERSDATA': self.createDataFrame(\n", " [\n",
" columns = ['DIED', 'L_THREAT', 'DISABLE'],\n", " {\n",
" data = [ ['Y', np.NaN, np.NaN]],\n", " 'VAERSDATA': self.createDataFrame(\n",
" index = [\n", " columns = ['DIED', 'L_THREAT', 'DISABLE'],\n",
" \"1048786\"]),\n", " data = [ ['Y', np.NaN, np.NaN]],\n",
" 'VAERSVAX': self.createDataFrame(\n", " index = [\n",
" columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", " \"1048786\"]),\n",
" data = [ ['COVID19', 'MODERNA', '016M20A', '2'],\n", " 'VAERSVAX': self.createDataFrame(\n",
" ['COVID19', 'MODERNA', '030L20A', '1']],\n", " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
" index = [\n", " data = [ ['COVID19', 'MODERNA', '016M20A', '2'],\n",
" \"1048786\",\n", " ['COVID19', 'MODERNA', '030L20A', '1']],\n",
" \"1048786\"],\n", " index = [\n",
" dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", " \"1048786\",\n",
" }\n", " \"1048786\"],\n",
" ]\n", " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
" dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n", " }\n",
" ]))\n",
" \n", " \n",
" # When\n", " # When\n",
" dataFrame = DataFrameFilter(dataFrame).filterBy(manufacturer = \"MODERNA\", dose = '1')\n", " dataFrame = dataFrameFilter.filterBy(manufacturer = \"MODERNA\", dose = '1')\n",
" \n", " \n",
" # Then\n", " # Then\n",
" dataFrameExpected = self.createDataFrame(\n", " dataFrameExpected = self.createDataFrame(\n",
@@ -356,27 +359,28 @@
"\n", "\n",
" def test_createAndFilterDataFrameFromDescrsWithSecondDose(self):\n", " def test_createAndFilterDataFrameFromDescrsWithSecondDose(self):\n",
" # Given\n", " # Given\n",
" vaersDescrs = [\n", " dataFrameFilter = DataFrameFilter(\n",
" {\n", " VaersDescr2DataFrameConverter.createDataFrameFromDescrs(\n",
" 'VAERSDATA': self.createDataFrame(\n", " [\n",
" columns = ['DIED', 'L_THREAT', 'DISABLE'],\n", " {\n",
" data = [ ['Y', np.NaN, np.NaN]],\n", " 'VAERSDATA': self.createDataFrame(\n",
" index = [\n", " columns = ['DIED', 'L_THREAT', 'DISABLE'],\n",
" \"1048786\"]),\n", " data = [ ['Y', np.NaN, np.NaN]],\n",
" 'VAERSVAX': self.createDataFrame(\n", " index = [\n",
" columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n", " \"1048786\"]),\n",
" data = [ ['COVID19', 'MODERNA', '016M20A', '2'],\n", " 'VAERSVAX': self.createDataFrame(\n",
" ['COVID19', 'MODERNA', '030L20A', '1']],\n", " columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
" index = [\n", " data = [ ['COVID19', 'MODERNA', '016M20A', '2'],\n",
" \"1048786\",\n", " ['COVID19', 'MODERNA', '030L20A', '1']],\n",
" \"1048786\"],\n", " index = [\n",
" dtypes = {'VAX_DOSE_SERIES': \"string\"})\n", " \"1048786\",\n",
" }\n", " \"1048786\"],\n",
" ]\n", " dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
" dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n", " }\n",
" \n", " ]))\n",
"\n",
" # When\n", " # When\n",
" dataFrame = DataFrameFilter(dataFrame).filterBy(manufacturer = \"MODERNA\", dose = '2')\n", " dataFrame = dataFrameFilter.filterBy(manufacturer = \"MODERNA\", dose = '2')\n",
" \n", " \n",
" # Then\n", " # Then\n",
" dataFrameExpected = self.createDataFrame(\n", " dataFrameExpected = self.createDataFrame(\n",