refactoring
This commit is contained in:
@@ -37,25 +37,41 @@
|
||||
" def readVaersDescr(self, year):\n",
|
||||
" folder = self.dataDir + \"/\" + year + \"VAERSData/\"\n",
|
||||
" return {\n",
|
||||
" 'VAERSDATA':\n",
|
||||
" self._read_csv(\n",
|
||||
" folder + year + \"VAERSDATA.csv\",\n",
|
||||
" ['VAERS_ID', 'DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT']),\n",
|
||||
" 'VAERSVAX':\n",
|
||||
" self._read_csv(\n",
|
||||
" folder + year + \"VAERSVAX.csv\",\n",
|
||||
" ['VAERS_ID', 'VAX_DOSE_SERIES', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT'],\n",
|
||||
" dtype = {\"VAX_DOSE_SERIES\": \"string\"})\n",
|
||||
" 'VAERSDATA': self._readVAERSDATA(folder, year),\n",
|
||||
" 'VAERSVAX': self._readVAERSVAX(folder, year)\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" def _read_csv(self, file, usecols, dtype = {}):\n",
|
||||
" def _readVAERSDATA(self, folder, year):\n",
|
||||
" VAERSDATA = self._read_csv(\n",
|
||||
" file = folder + year + \"VAERSDATA.csv\",\n",
|
||||
" usecols = ['VAERS_ID', 'DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'])\n",
|
||||
" VaersDescrReader._convertColumnsOfDataFrameToNumerics(\n",
|
||||
" VAERSDATA,\n",
|
||||
" ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'])\n",
|
||||
" return VAERSDATA\n",
|
||||
"\n",
|
||||
" def _readVAERSVAX(self, folder, year):\n",
|
||||
" return self._read_csv(\n",
|
||||
" file = folder + year + \"VAERSVAX.csv\",\n",
|
||||
" usecols = ['VAERS_ID', 'VAX_DOSE_SERIES', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT'],\n",
|
||||
" dtype = {\"VAX_DOSE_SERIES\": \"string\"})\n",
|
||||
"\n",
|
||||
" def _read_csv(self, file, **kwargs):\n",
|
||||
" return pd.read_csv(\n",
|
||||
" file,\n",
|
||||
" index_col = 'VAERS_ID',\n",
|
||||
" encoding = 'latin1',\n",
|
||||
" low_memory = False,\n",
|
||||
" usecols = usecols,\n",
|
||||
" dtype = dtype)\n"
|
||||
" **kwargs)\n",
|
||||
"\n",
|
||||
" @staticmethod\n",
|
||||
" def _convertColumnsOfDataFrameToNumerics(dataFrame, columns):\n",
|
||||
" for column in columns:\n",
|
||||
" VaersDescrReader._convertColumnOfDataFrameToNumeric(dataFrame, column)\n",
|
||||
"\n",
|
||||
" @staticmethod\n",
|
||||
" def _convertColumnOfDataFrameToNumeric(dataFrame, column):\n",
|
||||
" dataFrame[column] = np.where(dataFrame[column] == 'Y', 1, 0)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -128,7 +144,6 @@
|
||||
" \n",
|
||||
" def __init__(self, dataFrame : pd.DataFrame):\n",
|
||||
" self.dataFrame = dataFrame \n",
|
||||
" self._convertColumnsOfDataFrameToNumerics(['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'])\n",
|
||||
"\n",
|
||||
" def createBatchCodeTable(self):\n",
|
||||
" batchCodeTable = self.dataFrame.groupby('VAX_LOT').agg(\n",
|
||||
@@ -189,13 +204,6 @@
|
||||
" manufacturerByBatchCodeTable = manufacturerByBatchCodeTable.drop_duplicates(subset = ['VAX_LOT'])\n",
|
||||
" return manufacturerByBatchCodeTable.set_index('VAX_LOT')\n",
|
||||
"\n",
|
||||
" def _convertColumnsOfDataFrameToNumerics(self, columns):\n",
|
||||
" for column in columns:\n",
|
||||
" self._convertColumnOfDataFrameToNumeric(column)\n",
|
||||
"\n",
|
||||
" def _convertColumnOfDataFrameToNumeric(self, column):\n",
|
||||
" self.dataFrame[column] = np.where(self.dataFrame[column] == 'Y', 1, 0)\n",
|
||||
"\n",
|
||||
" def _flattenColumns(self, batchCodeTable):\n",
|
||||
" batchCodeTable.columns = [\"_\".join(a) for a in batchCodeTable.columns.to_flat_index()]\n",
|
||||
"\n",
|
||||
@@ -225,9 +233,6 @@
|
||||
" \n",
|
||||
" @staticmethod\n",
|
||||
" def getDoseTable(dataFrame):\n",
|
||||
" # FK-TODO: _convertColumnsOfDataFrameToNumerics() sollte schon während des Einlesens aus den CSV-Dateien durchgeführt werden\n",
|
||||
" # FK-TODO: bitte alle DataFrames als unmutable behandeln und nicht inplace ändern.\n",
|
||||
" DoseAnalysis._convertColumnsOfDataFrameToNumerics(dataFrame, ['DIED', 'L_THREAT', 'DISABLE'])\n",
|
||||
" doseTable = dataFrame.groupby('VAX_DOSE_SERIES').agg(\n",
|
||||
" {\n",
|
||||
" 'DIED': ['sum', 'size'],\n",
|
||||
@@ -246,21 +251,6 @@
|
||||
" doseTable['Severe reports (%)'] = (doseTable['Deaths'] + doseTable['Disabilities'] + doseTable['Life Threatening Illnesses']) / doseTable['Total reports'] * 100\n",
|
||||
" return doseTable\n",
|
||||
"\n",
|
||||
" @staticmethod\n",
|
||||
" def _count(dataFrame, column):\n",
|
||||
" return len(dataFrame[dataFrame[column] == 'Y'])\n",
|
||||
"\n",
|
||||
" # FK-TODO: DRY with BatchCodeTableHelper\n",
|
||||
" @staticmethod\n",
|
||||
" def _convertColumnsOfDataFrameToNumerics(dataFrame, columns):\n",
|
||||
" for column in columns:\n",
|
||||
" DoseAnalysis._convertColumnOfDataFrameToNumeric(dataFrame, column)\n",
|
||||
"\n",
|
||||
" # FK-TODO: DRY with BatchCodeTableHelper\n",
|
||||
" @staticmethod\n",
|
||||
" def _convertColumnOfDataFrameToNumeric(dataFrame, column):\n",
|
||||
" dataFrame[column] = np.where(dataFrame[column] == 'Y', 1, 0)\n",
|
||||
"\n",
|
||||
" # FK-TODO: DRY with BatchCodeTableHelper\n",
|
||||
" @staticmethod\n",
|
||||
" def _flattenColumns(batchCodeTable):\n",
|
||||
@@ -296,8 +286,8 @@
|
||||
" {\n",
|
||||
" 'VAERSDATA': self.createDataFrame(\n",
|
||||
" columns = ['DIED', 'L_THREAT', 'DISABLE'],\n",
|
||||
" data = [ ['Y', np.NaN, np.NaN],\n",
|
||||
" [np.NaN, np.NaN, 'Y']],\n",
|
||||
" data = [ [1, 0, 0],\n",
|
||||
" [0, 0, 1]],\n",
|
||||
" index = [\n",
|
||||
" \"0916600\",\n",
|
||||
" \"0916601\"]),\n",
|
||||
@@ -313,8 +303,8 @@
|
||||
" {\n",
|
||||
" 'VAERSDATA': self.createDataFrame(\n",
|
||||
" columns = ['DIED', 'L_THREAT', 'DISABLE'],\n",
|
||||
" data = [ [np.NaN, np.NaN, np.NaN],\n",
|
||||
" [np.NaN, np.NaN, 'Y']],\n",
|
||||
" data = [ [0, 0, 0],\n",
|
||||
" [0, 0, 1]],\n",
|
||||
" index = [\n",
|
||||
" \"1996873\",\n",
|
||||
" \"1996874\"]),\n",
|
||||
@@ -335,9 +325,9 @@
|
||||
" # Then\n",
|
||||
" dataFrameExpected = self.createDataFrame(\n",
|
||||
" columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
|
||||
" data = [ ['Y', np.NaN, np.NaN, 'COVID19', 'MODERNA', '037K20A', '1'],\n",
|
||||
" [np.NaN, np.NaN, 'Y', 'COVID19', 'MODERNA', '025L20A', '1'],\n",
|
||||
" [np.NaN, np.NaN, 'Y', 'COVID19', 'MODERNA', '025L20A', '1']],\n",
|
||||
" data = [ [1, 0, 0, 'COVID19', 'MODERNA', '037K20A', '1'],\n",
|
||||
" [0, 0, 1, 'COVID19', 'MODERNA', '025L20A', '1'],\n",
|
||||
" [0, 0, 1, 'COVID19', 'MODERNA', '025L20A', '1']],\n",
|
||||
" index = [\n",
|
||||
" \"0916600\",\n",
|
||||
" \"0916601\",\n",
|
||||
@@ -353,8 +343,8 @@
|
||||
" {\n",
|
||||
" 'VAERSDATA': self.createDataFrame(\n",
|
||||
" columns = ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'],\n",
|
||||
" data = [ ['Y', 'Y', np.NaN, 'Y', 'Y'],\n",
|
||||
" [np.NaN, np.NaN, 'Y', np.NaN, 'Y']],\n",
|
||||
" data = [ [1, 1, 0, 1, 1],\n",
|
||||
" [0, 0, 1, 0, 1]],\n",
|
||||
" index = [\n",
|
||||
" \"0916600\",\n",
|
||||
" \"0916601\"]),\n",
|
||||
@@ -375,8 +365,8 @@
|
||||
" # Then\n",
|
||||
" dataFrameExpected = self.createDataFrame(\n",
|
||||
" columns = ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
|
||||
" data = [ ['Y', 'Y', np.NaN, 'Y', 'Y', 'COVID19', 'MODERNA', '037K20A', '1'],\n",
|
||||
" [np.NaN, np.NaN, 'Y', np.NaN, 'Y', 'COVID19', 'PFIZER\\BIONTECH', '025L20A', '1']],\n",
|
||||
" data = [ [1, 1, 0, 1, 1, 'COVID19', 'MODERNA', '037K20A', '1'],\n",
|
||||
" [0, 0, 1, 0, 1, 'COVID19', 'PFIZER\\BIONTECH', '025L20A', '1']],\n",
|
||||
" index = [\n",
|
||||
" \"0916600\",\n",
|
||||
" \"0916601\"],\n",
|
||||
@@ -391,7 +381,7 @@
|
||||
" {\n",
|
||||
" 'VAERSDATA': self.createDataFrame(\n",
|
||||
" columns = ['DIED', 'L_THREAT', 'DISABLE'],\n",
|
||||
" data = [ ['Y', np.NaN, np.NaN]],\n",
|
||||
" data = [ [1, 0, 0]],\n",
|
||||
" index = [\n",
|
||||
" \"1048786\"]),\n",
|
||||
" 'VAERSVAX': self.createDataFrame(\n",
|
||||
@@ -411,7 +401,7 @@
|
||||
" # Then\n",
|
||||
" dataFrameExpected = self.createDataFrame(\n",
|
||||
" columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
|
||||
" data = [ ['Y', np.NaN, np.NaN, 'COVID19', 'MODERNA', '030L20A', '1']],\n",
|
||||
" data = [ [1, 0, 0, 'COVID19', 'MODERNA', '030L20A', '1']],\n",
|
||||
" index = [\n",
|
||||
" \"1048786\"],\n",
|
||||
" dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
|
||||
@@ -425,7 +415,7 @@
|
||||
" {\n",
|
||||
" 'VAERSDATA': self.createDataFrame(\n",
|
||||
" columns = ['DIED', 'L_THREAT', 'DISABLE'],\n",
|
||||
" data = [ ['Y', np.NaN, np.NaN]],\n",
|
||||
" data = [ [1, 0, 0]],\n",
|
||||
" index = [\n",
|
||||
" \"1048786\"]),\n",
|
||||
" 'VAERSVAX': self.createDataFrame(\n",
|
||||
@@ -445,7 +435,7 @@
|
||||
" # Then\n",
|
||||
" dataFrameExpected = self.createDataFrame(\n",
|
||||
" columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
|
||||
" data = [ ['Y', np.NaN, np.NaN, 'COVID19', 'MODERNA', '016M20A', '2']],\n",
|
||||
" data = [ [1, 0, 0, 'COVID19', 'MODERNA', '016M20A', '2']],\n",
|
||||
" index = [\n",
|
||||
" \"1048786\"],\n",
|
||||
" dtypes = {'VAX_DOSE_SERIES': \"string\"})\n",
|
||||
@@ -473,8 +463,8 @@
|
||||
" {\n",
|
||||
" 'VAERSDATA': self.createDataFrame(\n",
|
||||
" columns = ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'],\n",
|
||||
" data = [ ['Y', 'Y', np.NaN, 'Y', 'Y'],\n",
|
||||
" [np.NaN, np.NaN, 'Y', np.NaN, 'Y']],\n",
|
||||
" data = [ [1, 1, 0, 1, 1],\n",
|
||||
" [0, 0, 1, 0, 1]],\n",
|
||||
" index = [\n",
|
||||
" \"0916600\",\n",
|
||||
" \"0916601\"]),\n",
|
||||
@@ -512,8 +502,8 @@
|
||||
" {\n",
|
||||
" 'VAERSDATA': self.createDataFrame(\n",
|
||||
" columns = ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'],\n",
|
||||
" data = [ ['Y', np.NaN, np.NaN, np.NaN, np.NaN],\n",
|
||||
" [np.NaN, np.NaN, 'Y', np.NaN, np.NaN]],\n",
|
||||
" data = [ [1, 0, 0, 0, 0],\n",
|
||||
" [0, 0, 1, 0, 0]],\n",
|
||||
" index = [\n",
|
||||
" \"0916600\",\n",
|
||||
" \"0916601\"]),\n",
|
||||
@@ -529,8 +519,8 @@
|
||||
" {\n",
|
||||
" 'VAERSDATA': self.createDataFrame(\n",
|
||||
" columns = ['DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT'],\n",
|
||||
" data = [ [np.NaN, np.NaN, np.NaN, np.NaN, np.NaN],\n",
|
||||
" [np.NaN, np.NaN, 'Y', np.NaN, np.NaN]],\n",
|
||||
" data = [ [0, 0, 0, 0, 0],\n",
|
||||
" [0, 0, 1, 0, 0]],\n",
|
||||
" index = [\n",
|
||||
" \"1996873\",\n",
|
||||
" \"1996874\"]),\n",
|
||||
@@ -585,9 +575,9 @@
|
||||
" # Given\n",
|
||||
" dataFrame = self.createDataFrame(\n",
|
||||
" columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],\n",
|
||||
" data = [ ['Y', np.NaN, np.NaN,\t 'COVID19', 'MODERNA', '016M20A', '2'],\n",
|
||||
" ['Y', np.NaN, np.NaN, 'COVID19', 'MODERNA', '030L20A', '1'],\n",
|
||||
" ['Y', 'Y', 'Y', 'COVID19', 'MODERNA', '030L20B', '1']],\n",
|
||||
" data = [ [1, 0, 0, 'COVID19', 'MODERNA', '016M20A', '2'],\n",
|
||||
" [1, 0, 0, 'COVID19', 'MODERNA', '030L20A', '1'],\n",
|
||||
" [1, 1, 1, 'COVID19', 'MODERNA', '030L20B', '1']],\n",
|
||||
" index = [\n",
|
||||
" \"1048786\",\n",
|
||||
" \"1048786\",\n",
|
||||
|
||||
Reference in New Issue
Block a user