refining SymptomsByBatchcodesTableFactoryTest
This commit is contained in:
@@ -1,5 +1,5 @@
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
class SymptomsByBatchcodesTableFactory:
|
class SymptomsByBatchcodesTableFactory:
|
||||||
|
|
||||||
@@ -14,13 +14,13 @@ class SymptomsByBatchcodesTableFactory:
|
|||||||
def _get_VAERSVAX_WITH_VAX_LOTS(VAERSVAX):
|
def _get_VAERSVAX_WITH_VAX_LOTS(VAERSVAX):
|
||||||
return pd.concat(
|
return pd.concat(
|
||||||
[VAERSVAX, SymptomsByBatchcodesTableFactory._getVaxLotsTable(VAERSVAX)],
|
[VAERSVAX, SymptomsByBatchcodesTableFactory._getVaxLotsTable(VAERSVAX)],
|
||||||
axis=1).drop_duplicates(subset=['VAX_LOT1', 'VAX_LOT2']).reset_index()
|
axis='columns').reset_index().drop_duplicates(subset=['VAERS_ID', 'VAX_LOT1', 'VAX_LOT2'])
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _getVaxLotsTable(VAERSVAX):
|
def _getVaxLotsTable(VAERSVAX):
|
||||||
VAX_LOT_LIST_Table = VAERSVAX.groupby("VAERS_ID").agg(VAX_LOT_LIST = pd.NamedAgg(column = 'VAX_LOT', aggfunc = list))
|
VAX_LOT_LIST_Table = VAERSVAX.groupby("VAERS_ID").agg(VAX_LOT_LIST = pd.NamedAgg(column = 'VAX_LOT', aggfunc = list))
|
||||||
return pd.DataFrame(
|
return pd.DataFrame(
|
||||||
VAX_LOT_LIST_Table['VAX_LOT_LIST'].tolist(),
|
[fill(VAX_LOTS, 2, str(np.nan)) for VAX_LOTS in VAX_LOT_LIST_Table['VAX_LOT_LIST'].tolist()],
|
||||||
columns = ['VAX_LOT1', 'VAX_LOT2'],
|
columns = ['VAX_LOT1', 'VAX_LOT2'],
|
||||||
index = VAX_LOT_LIST_Table.index)
|
index = VAX_LOT_LIST_Table.index)
|
||||||
|
|
||||||
@@ -33,4 +33,7 @@ class SymptomsByBatchcodesTableFactory:
|
|||||||
VAERSSYMPTOMS['SYMPTOM3'],
|
VAERSSYMPTOMS['SYMPTOM3'],
|
||||||
VAERSSYMPTOMS['SYMPTOM4'],
|
VAERSSYMPTOMS['SYMPTOM4'],
|
||||||
VAERSSYMPTOMS['SYMPTOM5']
|
VAERSSYMPTOMS['SYMPTOM5']
|
||||||
]).dropna().drop_duplicates().to_frame(name = "SYMPTOMS").reset_index()
|
]).dropna().to_frame(name = "SYMPTOMS").reset_index()
|
||||||
|
|
||||||
|
def fill(lst, desiredLen, fillValue):
|
||||||
|
return lst + [fillValue] * (max(desiredLen - len(lst), 0))
|
||||||
@@ -57,3 +57,114 @@ class SymptomsByBatchcodesTableFactoryTest(unittest.TestCase):
|
|||||||
index = pd.MultiIndex.from_tuples(
|
index = pd.MultiIndex.from_tuples(
|
||||||
names = ['VAX_LOT1', 'VAX_LOT2'],
|
names = ['VAX_LOT1', 'VAX_LOT2'],
|
||||||
tuples = [['1808982', 'EW0175']] * 13)))
|
tuples = [['1808982', 'EW0175']] * 13)))
|
||||||
|
|
||||||
|
def test_createSymptomsByBatchcodesTable_two_patients_same_symptoms(self):
|
||||||
|
# Given
|
||||||
|
VAERSVAX = TestHelper.createDataFrame(
|
||||||
|
columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],
|
||||||
|
data = [ ['COVID19', 'JANSSEN', 'EW0175', '1'],
|
||||||
|
['COVID19', 'JANSSEN', 'EW0175', '1']],
|
||||||
|
index = pd.Index(
|
||||||
|
name = 'VAERS_ID',
|
||||||
|
data=[
|
||||||
|
2547730,
|
||||||
|
2547731]),
|
||||||
|
dtypes = {
|
||||||
|
'VAX_DOSE_SERIES': 'string',
|
||||||
|
'VAX_LOT': 'string'})
|
||||||
|
VAERSSYMPTOMS = TestHelper.createDataFrame(
|
||||||
|
columns = ['SYMPTOM1', 'SYMPTOM2', 'SYMPTOM3', 'SYMPTOM4', 'SYMPTOM5'],
|
||||||
|
data = [ ['Blood pressure orthostatic abnormal', np.nan, np.nan, np.nan, np.nan],
|
||||||
|
['Blood pressure orthostatic abnormal', np.nan, np.nan, np.nan, np.nan]],
|
||||||
|
index = pd.Index(
|
||||||
|
name = 'VAERS_ID',
|
||||||
|
data=[
|
||||||
|
2547730,
|
||||||
|
2547731]))
|
||||||
|
|
||||||
|
# When
|
||||||
|
symptomsByBatchcodesTable = SymptomsByBatchcodesTableFactory.createSymptomsByBatchcodesTable(VAERSVAX, VAERSSYMPTOMS)
|
||||||
|
|
||||||
|
# Then
|
||||||
|
assert_frame_equal(
|
||||||
|
symptomsByBatchcodesTable,
|
||||||
|
TestHelper.createDataFrame(
|
||||||
|
columns = ['SYMPTOMS'],
|
||||||
|
data = [ ['Blood pressure orthostatic abnormal'],
|
||||||
|
['Blood pressure orthostatic abnormal']],
|
||||||
|
index = pd.MultiIndex.from_tuples(
|
||||||
|
names = ['VAX_LOT1', 'VAX_LOT2'],
|
||||||
|
tuples = [['EW0175', str(np.nan)]] * 2)),
|
||||||
|
check_dtype = False)
|
||||||
|
|
||||||
|
def test_createSymptomsByBatchcodesTable_two_patients_distinct_symptoms(self):
|
||||||
|
# Given
|
||||||
|
VAERSVAX = TestHelper.createDataFrame(
|
||||||
|
columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],
|
||||||
|
data = [ ['COVID19', 'JANSSEN', '1808982', 'UNK'],
|
||||||
|
['COVID19', 'PFIZER\BIONTECH', 'EW0175', '1'],
|
||||||
|
['COVID19', 'PFIZER\BIONTECH', 'EW0175', '1'],
|
||||||
|
['COVID19', 'PFIZER\BIONTECH', 'EW0167', '2']],
|
||||||
|
index = pd.Index(
|
||||||
|
name = 'VAERS_ID',
|
||||||
|
data=[
|
||||||
|
2547730,
|
||||||
|
2547730,
|
||||||
|
2547744,
|
||||||
|
2547744]),
|
||||||
|
dtypes = {
|
||||||
|
'VAX_DOSE_SERIES': 'string',
|
||||||
|
'VAX_LOT': 'string'})
|
||||||
|
VAERSSYMPTOMS = TestHelper.createDataFrame(
|
||||||
|
columns = ['SYMPTOM1', 'SYMPTOM2', 'SYMPTOM3', 'SYMPTOM4', 'SYMPTOM5'],
|
||||||
|
data = [ ['Blood pressure orthostatic abnormal', 'COVID-19', 'Coma', 'Computerised tomogram', 'Exposure to SARS-CoV-2'],
|
||||||
|
['Head injury', 'Headache', 'Laboratory test', 'Magnetic resonance imaging', 'SARS-CoV-2 antibody test negative'],
|
||||||
|
['SARS-CoV-2 test positive', 'Unresponsive to stimuli', 'X-ray', np.nan, np.nan],
|
||||||
|
['Computerised tomogram head abnormal', 'Ear pain', 'Headache', 'Idiopathic intracranial hypertension', 'Intracranial pressure increased'],
|
||||||
|
['Lumbar puncture', 'Magnetic resonance imaging head', 'Pain', 'Swelling', 'Vision blurred']],
|
||||||
|
index = pd.Index(
|
||||||
|
name = 'VAERS_ID',
|
||||||
|
data=[
|
||||||
|
2547730,
|
||||||
|
2547730,
|
||||||
|
2547730,
|
||||||
|
2547744,
|
||||||
|
2547744]))
|
||||||
|
|
||||||
|
# When
|
||||||
|
symptomsByBatchcodesTable = SymptomsByBatchcodesTableFactory.createSymptomsByBatchcodesTable(VAERSVAX, VAERSSYMPTOMS)
|
||||||
|
|
||||||
|
# Then
|
||||||
|
assert_frame_equal(
|
||||||
|
symptomsByBatchcodesTable,
|
||||||
|
TestHelper.createDataFrame(
|
||||||
|
columns = ['SYMPTOMS'],
|
||||||
|
data = [ ['Blood pressure orthostatic abnormal'],
|
||||||
|
['Head injury'],
|
||||||
|
['SARS-CoV-2 test positive'],
|
||||||
|
['COVID-19'],
|
||||||
|
['Headache'],
|
||||||
|
['Unresponsive to stimuli'],
|
||||||
|
['Coma'],
|
||||||
|
['Laboratory test'],
|
||||||
|
['X-ray'],
|
||||||
|
['Computerised tomogram'],
|
||||||
|
['Magnetic resonance imaging'],
|
||||||
|
['Exposure to SARS-CoV-2'],
|
||||||
|
['SARS-CoV-2 antibody test negative'],
|
||||||
|
|
||||||
|
['Computerised tomogram head abnormal'],
|
||||||
|
['Lumbar puncture'],
|
||||||
|
['Ear pain'],
|
||||||
|
['Magnetic resonance imaging head'],
|
||||||
|
['Headache'],
|
||||||
|
['Pain'],
|
||||||
|
['Idiopathic intracranial hypertension'],
|
||||||
|
['Swelling'],
|
||||||
|
['Intracranial pressure increased'],
|
||||||
|
['Vision blurred']],
|
||||||
|
index = pd.MultiIndex.from_tuples(
|
||||||
|
names = ['VAX_LOT1', 'VAX_LOT2'],
|
||||||
|
tuples = [['1808982', 'EW0175']] * 13 + [['EW0175', 'EW0167']] * 10)))
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user