adding CreateDataFrameTest
This commit is contained in:
@@ -44,7 +44,7 @@
|
||||
" folder = dataDir + \"/\" + year + \"VAERSData/\"\n",
|
||||
" return {\n",
|
||||
" 'VAERSDATA': read_csv(folder + year + \"VAERSDATA.csv\", ['VAERS_ID', 'DIED', 'L_THREAT', 'DISABLE']),\n",
|
||||
" 'VAERSVAX': read_csv(folder + year + \"VAERSVAX.csv\", ['VAERS_ID', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT'])\n",
|
||||
" 'VAERSVAX': read_csv(folder + year + \"VAERSVAX.csv\", ['VAERS_ID', 'VAX_DOSE_SERIES', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT'])\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" return _createDataFrame(\n",
|
||||
@@ -85,12 +85,71 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e14465d7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from pandas.testing import assert_frame_equal\n",
|
||||
"\n",
|
||||
"class CreateDataFrameTest(unittest.TestCase):\n",
|
||||
"\n",
|
||||
" def test_createDataFrame(self):\n",
|
||||
" # Given\n",
|
||||
" vaersDescrs = [\n",
|
||||
" {\n",
|
||||
" 'VAERSDATA': self.createDataFrame(\n",
|
||||
" [ 'DIED', 'L_THREAT', 'DISABLE'],\n",
|
||||
" {\n",
|
||||
" '0916600': ['Y', np.NaN, np.NaN],\n",
|
||||
" '0916601': [np.NaN, np.NaN, 'Y']\n",
|
||||
" }),\n",
|
||||
" 'VAERSVAX': self.createDataFrame(\n",
|
||||
" [ 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT'],\n",
|
||||
" {\n",
|
||||
" '0916600': ['COVID19', 'MODERNA', '037K20A'],\n",
|
||||
" '0916601': ['COVID19', 'MODERNA', '025L20A']\n",
|
||||
" })\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" 'VAERSDATA': self.createDataFrame(\n",
|
||||
" [ 'DIED', 'L_THREAT', 'DISABLE'],\n",
|
||||
" {\n",
|
||||
" '1996873': [np.NaN, np.NaN, np.NaN],\n",
|
||||
" '1996874': [np.NaN, np.NaN, 'Y']\n",
|
||||
" }),\n",
|
||||
" 'VAERSVAX': self.createDataFrame(\n",
|
||||
" [ 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT'],\n",
|
||||
" {\n",
|
||||
" '1996873': ['HPV9', 'MERCK & CO. INC.', 'R017624'],\n",
|
||||
" '1996874': ['COVID19', 'MODERNA', '025L20A']\n",
|
||||
" })\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
" \n",
|
||||
" # When\n",
|
||||
" dataFrame = _createDataFrame(vaersDescrs, \"MODERNA\")\n",
|
||||
" \n",
|
||||
" # Then\n",
|
||||
" dataFrameExpected = self.createDataFrame(\n",
|
||||
" [ 'DIED', 'L_THREAT', 'DISABLE', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT'],\n",
|
||||
" {\n",
|
||||
" '0916600': ['Y', np.NaN, np.NaN, 'COVID19', 'MODERNA', '037K20A'],\n",
|
||||
" '0916601': [np.NaN, np.NaN, 'Y', 'COVID19', 'MODERNA', '025L20A'],\n",
|
||||
" '1996874': [np.NaN, np.NaN, 'Y', 'COVID19', 'MODERNA', '025L20A']\n",
|
||||
" })\n",
|
||||
" assert_frame_equal(dataFrame, dataFrameExpected, check_dtype = False)\n",
|
||||
"\n",
|
||||
" def createDataFrame(self, columns, data):\n",
|
||||
" return pd.DataFrame.from_dict(data, columns = columns, orient = 'index')\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e14465d7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from pandas.testing import assert_frame_equal\n",
|
||||
"\n",
|
||||
"class BatchCodeTableTest(unittest.TestCase):\n",
|
||||
"\n",
|
||||
|
||||
7
help.txt
7
help.txt
@@ -6,7 +6,7 @@ FK-TODO:
|
||||
Repeat for second dose and third dose separately. The cumulative effect will then appear.
|
||||
It should be analysed separately anyway, because adverse reactions increase with each dose."
|
||||
# 1. filter the vax table first for just C19 vaccines
|
||||
# 2. and for just n-tn (n \in {1, 2, 3}) dose => VAERSDATA --> VAERSVAX ist 1:1-Beziehung statt 1:n und kann einfacher in eine einzige Tabelle gemergt werden
|
||||
# 2. and for just n-th (VAERSVAX.VAX_DOSE_SERIES == n \in {1, 2, 3}) dose => VAERSDATA --> VAERSVAX ist 1:1-Beziehung statt 1:n und kann einfacher in eine einzige Tabelle gemergt werden
|
||||
# 3. filter for manufacturer
|
||||
- Prüfe, ob die VAERS_ID wirklich eindeutig ist. Antwort: VAERS_ID ist in der VAERSVAX-Tabelle nicht eindeutig, da es mehrere Impfungen pro Person geben kann.
|
||||
- VAX_LOT-Spalte normalisieren, d.h. mindestens toUpperCase() darauf anwenden
|
||||
@@ -15,7 +15,7 @@ FK-TODO:
|
||||
MOD039K20A
|
||||
#039K20A
|
||||
039K20A-MODERNA
|
||||
039K20A-2A
|
||||
039K20A-2A (vielleicht nicht)
|
||||
039K20A or 039L
|
||||
Moderna/039K20A
|
||||
MODERNA 039K20A
|
||||
@@ -34,3 +34,6 @@ u039k20a
|
||||
039K20A & 031M2
|
||||
039K20A and 032
|
||||
039K20A, 011L20
|
||||
|
||||
|
||||
df[df.index.duplicated(False)].to_excel('results/pfizer_duplicates.xlsx')
|
||||
Reference in New Issue
Block a user