refactoring

This commit is contained in:
frankknoll
2022-02-01 08:33:24 +01:00
parent 360cbb3b25
commit b025bf2aae

View File

@@ -21,34 +21,41 @@
"metadata": {},
"outputs": [],
"source": [
"def read_csv(file, usecols, dtype = {}):\n",
" return pd.read_csv(\n",
" file,\n",
" index_col = 'VAERS_ID',\n",
" encoding = 'latin1',\n",
" low_memory = False,\n",
" usecols = usecols,\n",
" dtype = dtype)\n",
"import pandas as pd\n",
"\n",
"def readVaersDescr(dataDir, year):\n",
" folder = dataDir + \"/\" + year + \"VAERSData/\"\n",
" return {\n",
" 'VAERSDATA':\n",
" read_csv(\n",
" folder + year + \"VAERSDATA.csv\",\n",
" ['VAERS_ID', 'DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT']),\n",
" 'VAERSVAX':\n",
" read_csv(\n",
" folder + year + \"VAERSVAX.csv\",\n",
" ['VAERS_ID', 'VAX_DOSE_SERIES', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT'],\n",
" dtype = {\"VAX_DOSE_SERIES\": \"string\"})\n",
" }\n",
"class VaersDescrReader:\n",
" \n",
" def __init__(self, dataDir):\n",
" self.dataDir = dataDir \n",
"\n",
"def readVaersDescrs(dataDir, years):\n",
" return [readVaersDescr(dataDir, year) for year in years]\n",
" def readAllVaersDescrs(self):\n",
" return self.readVaersDescrs([\"2021\", \"2022\"])\n",
" \n",
" def readVaersDescrs(self, years):\n",
" return [self.readVaersDescr(year) for year in years]\n",
"\n",
"def readAllVaersDescrs(dataDir):\n",
" return readVaersDescrs(dataDir, [\"2021\", \"2022\"])"
" def readVaersDescr(self, year):\n",
" folder = self.dataDir + \"/\" + year + \"VAERSData/\"\n",
" return {\n",
" 'VAERSDATA':\n",
" self._read_csv(\n",
" folder + year + \"VAERSDATA.csv\",\n",
" ['VAERS_ID', 'DIED', 'L_THREAT', 'DISABLE', 'HOSPITAL', 'ER_VISIT']),\n",
" 'VAERSVAX':\n",
" self._read_csv(\n",
" folder + year + \"VAERSVAX.csv\",\n",
" ['VAERS_ID', 'VAX_DOSE_SERIES', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT'],\n",
" dtype = {\"VAX_DOSE_SERIES\": \"string\"})\n",
" }\n",
"\n",
" def _read_csv(self, file, usecols, dtype = {}):\n",
" return pd.read_csv(\n",
" file,\n",
" index_col = 'VAERS_ID',\n",
" encoding = 'latin1',\n",
" low_memory = False,\n",
" usecols = usecols,\n",
" dtype = dtype)\n"
]
},
{
@@ -383,7 +390,7 @@
" self._test_createBatchCodeTable(\n",
" filterDataFrame(\n",
" createDataFrameFromDescrs(\n",
" readAllVaersDescrs(\"test/VAERS\")),\n",
" VaersDescrReader(\"test/VAERS\").readAllVaersDescrs()),\n",
" manufacturer = \"MODERNA\",\n",
" dose = '1'))\n",
"\n",
@@ -484,7 +491,7 @@
" batchCodeTable = createBatchCodeTable(\n",
" filterDataFrame(\n",
" createDataFrameFromDescrs(\n",
" readAllVaersDescrs(\"VAERS\")),\n",
" VaersDescrReader(\"VAERS\").readAllVaersDescrs()),\n",
" manufacturer = manufacturer,\n",
" dose = '1'))\n",
" display(manufacturer, batchCodeTable)\n",
@@ -514,7 +521,7 @@
" severeEffectsBatchCodeTable = createSevereEffectsBatchCodeTable(\n",
" filterDataFrameForSevereEffects(\n",
" createDataFrameFromDescrs(\n",
" readAllVaersDescrs(\"VAERS\")),\n",
" VaersDescrReader(\"VAERS\").readAllVaersDescrs()),\n",
" dose = '1'))\n",
" display('severeEffectsBatchCodeTable', severeEffectsBatchCodeTable)\n",
" severeEffectsBatchCodeTable.to_excel(excelFile)"