refactoring

This commit is contained in:
frankknoll
2022-02-20 18:55:02 +01:00
parent 9d44215c37
commit be1f4e613e
23 changed files with 68 additions and 62 deletions

View File

@@ -33,14 +33,14 @@
"\n",
" def readVaersDescrForYear(self, year):\n",
" return {\n",
" 'VAERSDATA': self._readVAERSDATA(self.dataDir + \"/\" + year + \"VAERSDATA.csv\"),\n",
" 'VAERSVAX': self._readVAERSVAX(self.dataDir + \"/\" + year + \"VAERSVAX.csv\")\n",
" 'VAERSDATA': self._readVAERSDATA('{dataDir}/{year}VAERSDATA.csv'.format(dataDir = self.dataDir, year = year)),\n",
" 'VAERSVAX': self._readVAERSVAX('{dataDir}/{year}VAERSVAX.csv'.format(dataDir = self.dataDir, year = year))\n",
" }\n",
"\n",
" def readNonDomesticVaersDescr(self):\n",
" return {\n",
" 'VAERSDATA': self._readVAERSDATA(self.dataDir + \"/\" + \"NonDomesticVAERSDATA.csv\"),\n",
" 'VAERSVAX': self._readVAERSVAX(self.dataDir + \"/\" + \"NonDomesticVAERSVAX.csv\")\n",
" 'VAERSDATA': self._readVAERSDATA(self.dataDir + \"/NonDomesticVAERSDATA.csv\"),\n",
" 'VAERSVAX': self._readVAERSVAX(self.dataDir + \"/NonDomesticVAERSVAX.csv\")\n",
" }\n",
"\n",
" def _readVAERSDATA(self, file):\n",
@@ -331,9 +331,9 @@
"class CountryColumnAdder:\n",
" \n",
" @staticmethod\n",
" def addCountryColumn(dataFrame, countryColumnName):\n",
" dataFrame[countryColumnName] = CountryColumnAdder.getCountryColumn(dataFrame)\n",
" return dataFrame.astype({countryColumnName: \"string\"})\n",
" def addCountryColumn(dataFrame):\n",
" dataFrame['Country'] = CountryColumnAdder.getCountryColumn(dataFrame)\n",
" return dataFrame.astype({'Country': \"string\"})\n",
"\n",
" @staticmethod\n",
" def getCountryColumn(dataFrame):\n",
@@ -422,9 +422,7 @@
" return self.batchCodeTableByCountryFactory.createBatchCodeTableByCountry(country)\n",
"\n",
" def _createInternationalLotTable(self):\n",
" countryColumnName = 'Country'\n",
" dataFrame = CountryColumnAdder.addCountryColumn(self.dataFrame, countryColumnName = countryColumnName)\n",
" return SummationTableFactory.createSummationTableHavingSevereReportsColumn(dataFrame.groupby(dataFrame[countryColumnName]))\n"
" return SummationTableFactory.createSummationTableHavingSevereReportsColumn(self.dataFrame.groupby(self.dataFrame['Country']))\n"
]
},
{
@@ -461,13 +459,11 @@
" return self._getCountry(self.countryBatchCodeTable, country)\n",
"\n",
" def _getCountryBatchCodeTable(self):\n",
" countryColumnName = 'Country'\n",
" dataFrame = CountryColumnAdder.addCountryColumn(self.dataFrame, countryColumnName = countryColumnName)\n",
" return SummationTableFactory.createSummationTableHavingSevereReportsColumn(\n",
" dataFrame.groupby(\n",
" self.dataFrame.groupby(\n",
" [\n",
" dataFrame[countryColumnName],\n",
" dataFrame['VAX_LOT']\n",
" self.dataFrame['Country'],\n",
" self.dataFrame['VAX_LOT']\n",
" ]))\n",
"\n",
" def _getCountry(self, countryBatchCodeTable, country):\n",
@@ -1009,7 +1005,7 @@
"\n",
" def test_createBatchCodeTableFromFiles(self):\n",
" dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(\n",
" VaersDescrReader(dataDir = \"test/VAERS\").readVaersDescrsForYears([\"2021\", \"2022\"]))\n",
" VaersDescrReader(dataDir = \"test/VAERS\").readVaersDescrsForYears([2021, 2022]))\n",
" DataFrameNormalizer.normalize(dataFrame)\n",
" self._test_createBatchCodeTable(dataFrame)\n",
"\n",
@@ -1128,12 +1124,12 @@
" def test_createInternationalLotTable(self):\n",
" # Given\n",
" dataFrame = TestHelper.createDataFrame(\n",
" columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES', 'SPLTTYPE', 'HOSPITAL', 'ER_VISIT'],\n",
" data = [ [1, 0, 0, 'COVID19', 'MODERNA', '016M20A', '2', 'GBPFIZER INC2020486806', 0, 0],\n",
" [1, 0, 0, 'COVID19', 'MODERNA', '030L20A', '1', 'FRMODERNATX, INC.MOD20224', 0, 0],\n",
" [1, 1, 1, 'COVID19', 'MODERNA', '030L20B', '1', 'FRMODERNATX, INC.MOD20224', 0, 0],\n",
" [0, 0, 0, 'COVID19', 'MODERNA', '030L20B', '1', 'dummy'],\n",
" [0, 0, 0, 'COVID19', 'MODERNA', '030L20B', '1', 123]],\n",
" columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES', 'SPLTTYPE', 'HOSPITAL', 'ER_VISIT', 'Country'],\n",
" data = [ [1, 0, 0, 'COVID19', 'MODERNA', '016M20A', '2', 'GBPFIZER INC2020486806', 0, 0, 'United Kingdom'],\n",
" [1, 0, 0, 'COVID19', 'MODERNA', '030L20A', '1', 'FRMODERNATX, INC.MOD20224', 0, 0, 'France'],\n",
" [1, 1, 1, 'COVID19', 'MODERNA', '030L20B', '1', 'FRMODERNATX, INC.MOD20224', 0, 0, 'France'],\n",
" [0, 0, 0, 'COVID19', 'MODERNA', '030L20B', '1', 'dummy', 0, 0, 'Unknown Country'],\n",
" [0, 0, 0, 'COVID19', 'MODERNA', '030L20B', '1', 123, 0, 0, 'Unknown Country']],\n",
" index = [\n",
" \"1048786\",\n",
" \"1048786\",\n",
@@ -1160,17 +1156,16 @@
" 'United Kingdom',\n",
" 'Unknown Country'\n",
" ],\n",
" dtype = \"string\",\n",
" name = 'Country')))\n",
"\n",
" def test_createBatchCodeTableByCountry(self):\n",
" # Given\n",
" dataFrame = TestHelper.createDataFrame(\n",
" columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES', 'SPLTTYPE', 'HOSPITAL', 'ER_VISIT'],\n",
" data = [ [1, 0, 0, 'COVID19', 'PFIZER\\BIONTECH', '016M20A', '2', 'GBPFIZER INC2020486806', 0, 0],\n",
" [0, 0, 0, 'COVID19', 'MODERNA', '030L20A', '1', 'FRMODERNATX, INC.MOD20224', 0, 0],\n",
" [1, 1, 1, 'COVID19', 'MODERNA', '030L20B', '1', 'FRMODERNATX, INC.MOD20224', 0, 0],\n",
" [0, 1, 1, 'COVID19', 'MODERNA', '030L20B', '1', 'FRMODERNATX, INC.MOD20224', 0, 0]],\n",
" columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES', 'SPLTTYPE', 'HOSPITAL', 'ER_VISIT', 'Country'],\n",
" data = [ [1, 0, 0, 'COVID19', 'PFIZER\\BIONTECH', '016M20A', '2', 'GBPFIZER INC2020486806', 0, 0, 'United Kingdom'],\n",
" [0, 0, 0, 'COVID19', 'MODERNA', '030L20A', '1', 'FRMODERNATX, INC.MOD20224', 0, 0, 'France'],\n",
" [1, 1, 1, 'COVID19', 'MODERNA', '030L20B', '1', 'FRMODERNATX, INC.MOD20224', 0, 0, 'France'],\n",
" [0, 1, 1, 'COVID19', 'MODERNA', '030L20B', '1', 'FRMODERNATX, INC.MOD20224', 0, 0, 'France']],\n",
" index = [\n",
" \"1048786\",\n",
" \"1048786\",\n",
@@ -1200,11 +1195,11 @@
" def test_createBatchCodeTableByNonExistingCountry(self):\n",
" # Given\n",
" dataFrame = TestHelper.createDataFrame(\n",
" columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES', 'SPLTTYPE', 'HOSPITAL', 'ER_VISIT'],\n",
" data = [ [1, 0, 0, 'COVID19', 'PFIZER\\BIONTECH', '016M20A', '2', 'GBPFIZER INC2020486806', 0, 0],\n",
" [0, 0, 0, 'COVID19', 'MODERNA', '030L20A', '1', 'FRMODERNATX, INC.MOD20224', 0, 0],\n",
" [1, 1, 1, 'COVID19', 'MODERNA', '030L20B', '1', 'FRMODERNATX, INC.MOD20224', 0, 0],\n",
" [0, 1, 1, 'COVID19', 'MODERNA', '030L20B', '1', 'FRMODERNATX, INC.MOD20224', 0, 0]],\n",
" columns = ['DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES', 'SPLTTYPE', 'HOSPITAL', 'ER_VISIT', 'Country'],\n",
" data = [ [1, 0, 0, 'COVID19', 'PFIZER\\BIONTECH', '016M20A', '2', 'GBPFIZER INC2020486806', 0, 0, 'United Kingdom'],\n",
" [0, 0, 0, 'COVID19', 'MODERNA', '030L20A', '1', 'FRMODERNATX, INC.MOD20224', 0, 0, 'France'],\n",
" [1, 1, 1, 'COVID19', 'MODERNA', '030L20B', '1', 'FRMODERNATX, INC.MOD20224', 0, 0, 'France'],\n",
" [0, 1, 1, 'COVID19', 'MODERNA', '030L20B', '1', 'FRMODERNATX, INC.MOD20224', 0, 0, 'France']],\n",
" index = [\n",
" \"1048786\",\n",
" \"1048786\",\n",
@@ -1243,14 +1238,25 @@
"metadata": {},
"outputs": [],
"source": [
"def getVaersForYears(years):\n",
" return _getVaers(_getVaersDescrReader().readVaersDescrsForYears(years))\n",
"\n",
"def getVaersForYear(year):\n",
" return getVaersForYears([year])\n",
"\n",
"# FK-TODO: DRY with getNonDomesticVaers()\n",
"def getVaersForYears(years):\n",
" vaersDescrs = _getVaersDescrReader().readVaersDescrsForYears(years)\n",
" dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n",
" dataFrame['Country'] = 'United States'\n",
" DataFrameNormalizer.normalize(dataFrame)\n",
" dataFrame = SevereColumnAdder.addSevereColumn(dataFrame)\n",
" return dataFrame\n",
"\n",
"def getNonDomesticVaers():\n",
" return _getVaers([_getVaersDescrReader().readNonDomesticVaersDescr()])\n",
" vaersDescrs = [_getVaersDescrReader().readNonDomesticVaersDescr()]\n",
" dataFrame = VaersDescr2DataFrameConverter.createDataFrameFromDescrs(vaersDescrs)\n",
" dataFrame = CountryColumnAdder.addCountryColumn(dataFrame)\n",
" DataFrameNormalizer.normalize(dataFrame)\n",
" dataFrame = SevereColumnAdder.addSevereColumn(dataFrame)\n",
" return dataFrame\n",
"\n",
"def _getVaersDescrReader():\n",
" return VaersDescrReader(dataDir = \"VAERS\")\n",
@@ -1270,7 +1276,7 @@
"metadata": {},
"outputs": [],
"source": [
"vaers = getVaersForYears([\"2020\", \"2021\", \"2022\"])\n",
"vaers = getVaersForYears([2020, 2021, 2022])\n",
"vaers"
]
},
@@ -1486,7 +1492,7 @@
"metadata": {},
"outputs": [],
"source": [
"countries = sorted(CountryColumnAdder.getCountryColumn(nonDomesticVaers).unique())"
"countries = sorted(internationalVaers['Country'].unique())"
]
},
{
@@ -1507,7 +1513,7 @@
"outputs": [],
"source": [
"createAndSaveAndDisplayBatchCodeTablesByCountry(\n",
" nonDomesticVaers,\n",
" internationalVaers,\n",
" countries,\n",
" minADRsForLethality = 100)"
]