adding convertVAX_LOTColumnToUpperCase() in VaersDescrReader
This commit is contained in:
@@ -182,14 +182,14 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "fc2c5591",
|
||||
"id": "6e6efc19",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from HistogramDescriptionPersister import HistogramDescriptionPersister\n",
|
||||
"\n",
|
||||
"histogramDescriptionPersister = HistogramDescriptionPersister('../docs/data/histograms')\n",
|
||||
"histogramDescriptionPersister.saveHistogramDescriptionsForBatchcodes(['eh9899'], dictByBatchcodeTable)"
|
||||
"histogramDescriptionPersister.saveHistogramDescriptionsForBatchcodes(batchcodes[:100], dictByBatchcodeTable)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -202,33 +202,6 @@
|
||||
"batchcodes[:10]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ba9f665a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from TableByBatchcodeFilter import TableByBatchcodeFilter\n",
|
||||
"filteredTable = TableByBatchcodeFilter.filterTableByBatchcode('<NA>', dictByBatchcodeTable)\n",
|
||||
"filteredTable"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c7027164",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from DictByBatchcodeTable2DictConverter import DictByBatchcodeTable2DictConverter\n",
|
||||
"import json\n",
|
||||
"# FK-TODO: filterTableByBatchcode() und convertDictByBatchcodeTable2Json() für alle Batchcodes aufrufen und jedes Ergebnis in einer Datei batchcode.json speichern.\n",
|
||||
"dict = DictByBatchcodeTable2DictConverter.convertDictByBatchcodeTable2Dict(filteredTable, 'FE6208')\n",
|
||||
"dict\n",
|
||||
"# print(json.dumps(json.loads(jsonActual), indent=2))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
|
||||
@@ -1,12 +1,10 @@
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from DataFrameNormalizer import DataFrameNormalizer
|
||||
|
||||
class SymptomByBatchcodeTableFactory:
|
||||
|
||||
@staticmethod
|
||||
def createSymptomByBatchcodeTable(VAERSVAX, VAERSSYMPTOMS):
|
||||
DataFrameNormalizer.convertVAX_LOTColumnToUpperCase(VAERSVAX)
|
||||
index_columns = SymptomByBatchcodeTableFactory._getIndexColumns(VAERSVAX)
|
||||
symptomColumn = 'SYMPTOM'
|
||||
return (pd
|
||||
|
||||
@@ -167,45 +167,3 @@ class SymptomByBatchcodeTableFactoryTest(unittest.TestCase):
|
||||
index = pd.MultiIndex.from_tuples(
|
||||
names = ['VAX_LOT1', 'VAX_LOT2'],
|
||||
tuples = [['1808982', 'EW0175']] * 13 + [['EW0167', 'EW0175']] * 10)))
|
||||
|
||||
def test_createSymptomByBatchcodeTable_after_convertVAX_LOTColumnToUpperCase(self):
|
||||
# Given
|
||||
VAX_LOT = 'EW0175'
|
||||
VAX_LOT_lower = VAX_LOT.lower()
|
||||
VAERSVAX = TestHelper.createDataFrame(
|
||||
columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],
|
||||
data = [ ['COVID19', 'JANSSEN', VAX_LOT, '1'],
|
||||
['COVID19', 'JANSSEN', VAX_LOT_lower, '1']],
|
||||
index = pd.Index(
|
||||
name = 'VAERS_ID',
|
||||
data=[
|
||||
2547730,
|
||||
2547731]),
|
||||
dtypes = {
|
||||
'VAX_DOSE_SERIES': 'string',
|
||||
'VAX_LOT': 'string'})
|
||||
VAERSSYMPTOMS = TestHelper.createDataFrame(
|
||||
columns = ['SYMPTOM1', 'SYMPTOM2', 'SYMPTOM3', 'SYMPTOM4', 'SYMPTOM5'],
|
||||
data = [ ['Blood pressure orthostatic abnormal', np.nan, np.nan, np.nan, np.nan],
|
||||
['Blood pressure orthostatic abnormal', np.nan, np.nan, np.nan, np.nan]],
|
||||
index = pd.Index(
|
||||
name = 'VAERS_ID',
|
||||
data=[
|
||||
2547730,
|
||||
2547731]))
|
||||
|
||||
# When
|
||||
symptomByBatchcodeTable = SymptomByBatchcodeTableFactory.createSymptomByBatchcodeTable(VAERSVAX, VAERSSYMPTOMS)
|
||||
|
||||
# Then
|
||||
assert_frame_equal(
|
||||
symptomByBatchcodeTable,
|
||||
TestHelper.createDataFrame(
|
||||
columns = ['SYMPTOM'],
|
||||
data = [ ['Blood pressure orthostatic abnormal'],
|
||||
['Blood pressure orthostatic abnormal']],
|
||||
index = pd.Index(
|
||||
name = 'VAX_LOT1',
|
||||
data = [VAX_LOT,
|
||||
VAX_LOT])),
|
||||
check_dtype = False)
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
import pandas as pd
|
||||
from DataFrameNormalizer import DataFrameNormalizer
|
||||
|
||||
|
||||
class VaersDescrReader:
|
||||
|
||||
@@ -30,7 +32,7 @@ class VaersDescrReader:
|
||||
date_parser = lambda dateStr: pd.to_datetime(dateStr, format = "%m/%d/%Y"))
|
||||
|
||||
def _readVAERSVAX(self, file):
|
||||
return self._read_csv(
|
||||
VAERSVAX = self._read_csv(
|
||||
file = file,
|
||||
usecols = ['VAERS_ID', 'VAX_DOSE_SERIES', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT'],
|
||||
dtype =
|
||||
@@ -38,6 +40,9 @@ class VaersDescrReader:
|
||||
"VAX_DOSE_SERIES": "string",
|
||||
"VAX_LOT": "string"
|
||||
})
|
||||
DataFrameNormalizer.convertVAX_LOTColumnToUpperCase(VAERSVAX)
|
||||
return VAERSVAX
|
||||
|
||||
|
||||
def _readVAERSSYMPTOMS(self, file):
|
||||
return self._read_csv(
|
||||
|
||||
Reference in New Issue
Block a user