adding convertVAX_LOTColumnToUpperCase() in VaersDescrReader

This commit is contained in:
frankknoll
2023-01-27 10:54:48 +01:00
parent 702d48aaf7
commit 68fcb4da40
4 changed files with 8 additions and 74 deletions

View File

@@ -182,14 +182,14 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "fc2c5591", "id": "6e6efc19",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from HistogramDescriptionPersister import HistogramDescriptionPersister\n", "from HistogramDescriptionPersister import HistogramDescriptionPersister\n",
"\n", "\n",
"histogramDescriptionPersister = HistogramDescriptionPersister('../docs/data/histograms')\n", "histogramDescriptionPersister = HistogramDescriptionPersister('../docs/data/histograms')\n",
"histogramDescriptionPersister.saveHistogramDescriptionsForBatchcodes(['eh9899'], dictByBatchcodeTable)" "histogramDescriptionPersister.saveHistogramDescriptionsForBatchcodes(batchcodes[:100], dictByBatchcodeTable)"
] ]
}, },
{ {
@@ -202,33 +202,6 @@
"batchcodes[:10]" "batchcodes[:10]"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"id": "ba9f665a",
"metadata": {},
"outputs": [],
"source": [
"from TableByBatchcodeFilter import TableByBatchcodeFilter\n",
"filteredTable = TableByBatchcodeFilter.filterTableByBatchcode('<NA>', dictByBatchcodeTable)\n",
"filteredTable"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c7027164",
"metadata": {},
"outputs": [],
"source": [
"from DictByBatchcodeTable2DictConverter import DictByBatchcodeTable2DictConverter\n",
"import json\n",
"# FK-TODO: filterTableByBatchcode() und convertDictByBatchcodeTable2Json() für alle Batchcodes aufrufen und jedes Ergebnis in einer Datei batchcode.json speichern.\n",
"dict = DictByBatchcodeTable2DictConverter.convertDictByBatchcodeTable2Dict(filteredTable, 'FE6208')\n",
"dict\n",
"# print(json.dumps(json.loads(jsonActual), indent=2))"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,

View File

@@ -1,12 +1,10 @@
import pandas as pd import pandas as pd
import numpy as np import numpy as np
from DataFrameNormalizer import DataFrameNormalizer
class SymptomByBatchcodeTableFactory: class SymptomByBatchcodeTableFactory:
@staticmethod @staticmethod
def createSymptomByBatchcodeTable(VAERSVAX, VAERSSYMPTOMS): def createSymptomByBatchcodeTable(VAERSVAX, VAERSSYMPTOMS):
DataFrameNormalizer.convertVAX_LOTColumnToUpperCase(VAERSVAX)
index_columns = SymptomByBatchcodeTableFactory._getIndexColumns(VAERSVAX) index_columns = SymptomByBatchcodeTableFactory._getIndexColumns(VAERSVAX)
symptomColumn = 'SYMPTOM' symptomColumn = 'SYMPTOM'
return (pd return (pd

View File

@@ -167,45 +167,3 @@ class SymptomByBatchcodeTableFactoryTest(unittest.TestCase):
index = pd.MultiIndex.from_tuples( index = pd.MultiIndex.from_tuples(
names = ['VAX_LOT1', 'VAX_LOT2'], names = ['VAX_LOT1', 'VAX_LOT2'],
tuples = [['1808982', 'EW0175']] * 13 + [['EW0167', 'EW0175']] * 10))) tuples = [['1808982', 'EW0175']] * 13 + [['EW0167', 'EW0175']] * 10)))
def test_createSymptomByBatchcodeTable_after_convertVAX_LOTColumnToUpperCase(self):
# Given
VAX_LOT = 'EW0175'
VAX_LOT_lower = VAX_LOT.lower()
VAERSVAX = TestHelper.createDataFrame(
columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'],
data = [ ['COVID19', 'JANSSEN', VAX_LOT, '1'],
['COVID19', 'JANSSEN', VAX_LOT_lower, '1']],
index = pd.Index(
name = 'VAERS_ID',
data=[
2547730,
2547731]),
dtypes = {
'VAX_DOSE_SERIES': 'string',
'VAX_LOT': 'string'})
VAERSSYMPTOMS = TestHelper.createDataFrame(
columns = ['SYMPTOM1', 'SYMPTOM2', 'SYMPTOM3', 'SYMPTOM4', 'SYMPTOM5'],
data = [ ['Blood pressure orthostatic abnormal', np.nan, np.nan, np.nan, np.nan],
['Blood pressure orthostatic abnormal', np.nan, np.nan, np.nan, np.nan]],
index = pd.Index(
name = 'VAERS_ID',
data=[
2547730,
2547731]))
# When
symptomByBatchcodeTable = SymptomByBatchcodeTableFactory.createSymptomByBatchcodeTable(VAERSVAX, VAERSSYMPTOMS)
# Then
assert_frame_equal(
symptomByBatchcodeTable,
TestHelper.createDataFrame(
columns = ['SYMPTOM'],
data = [ ['Blood pressure orthostatic abnormal'],
['Blood pressure orthostatic abnormal']],
index = pd.Index(
name = 'VAX_LOT1',
data = [VAX_LOT,
VAX_LOT])),
check_dtype = False)

View File

@@ -1,4 +1,6 @@
import pandas as pd import pandas as pd
from DataFrameNormalizer import DataFrameNormalizer
class VaersDescrReader: class VaersDescrReader:
@@ -30,7 +32,7 @@ class VaersDescrReader:
date_parser = lambda dateStr: pd.to_datetime(dateStr, format = "%m/%d/%Y")) date_parser = lambda dateStr: pd.to_datetime(dateStr, format = "%m/%d/%Y"))
def _readVAERSVAX(self, file): def _readVAERSVAX(self, file):
return self._read_csv( VAERSVAX = self._read_csv(
file = file, file = file,
usecols = ['VAERS_ID', 'VAX_DOSE_SERIES', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT'], usecols = ['VAERS_ID', 'VAX_DOSE_SERIES', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT'],
dtype = dtype =
@@ -38,6 +40,9 @@ class VaersDescrReader:
"VAX_DOSE_SERIES": "string", "VAX_DOSE_SERIES": "string",
"VAX_LOT": "string" "VAX_LOT": "string"
}) })
DataFrameNormalizer.convertVAX_LOTColumnToUpperCase(VAERSVAX)
return VAERSVAX
def _readVAERSSYMPTOMS(self, file): def _readVAERSSYMPTOMS(self, file):
return self._read_csv( return self._read_csv(