diff --git a/src/HowBadIsMyBatch.ipynb b/src/HowBadIsMyBatch.ipynb index 6896d0e0756..c644158bc6d 100644 --- a/src/HowBadIsMyBatch.ipynb +++ b/src/HowBadIsMyBatch.ipynb @@ -182,14 +182,14 @@ { "cell_type": "code", "execution_count": null, - "id": "fc2c5591", + "id": "6e6efc19", "metadata": {}, "outputs": [], "source": [ "from HistogramDescriptionPersister import HistogramDescriptionPersister\n", "\n", "histogramDescriptionPersister = HistogramDescriptionPersister('../docs/data/histograms')\n", - "histogramDescriptionPersister.saveHistogramDescriptionsForBatchcodes(['eh9899'], dictByBatchcodeTable)" + "histogramDescriptionPersister.saveHistogramDescriptionsForBatchcodes(batchcodes[:100], dictByBatchcodeTable)" ] }, { @@ -202,33 +202,6 @@ "batchcodes[:10]" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "ba9f665a", - "metadata": {}, - "outputs": [], - "source": [ - "from TableByBatchcodeFilter import TableByBatchcodeFilter\n", - "filteredTable = TableByBatchcodeFilter.filterTableByBatchcode('', dictByBatchcodeTable)\n", - "filteredTable" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c7027164", - "metadata": {}, - "outputs": [], - "source": [ - "from DictByBatchcodeTable2DictConverter import DictByBatchcodeTable2DictConverter\n", - "import json\n", - "# FK-TODO: filterTableByBatchcode() und convertDictByBatchcodeTable2Json() für alle Batchcodes aufrufen und jedes Ergebnis in einer Datei batchcode.json speichern.\n", - "dict = DictByBatchcodeTable2DictConverter.convertDictByBatchcodeTable2Dict(filteredTable, 'FE6208')\n", - "dict\n", - "# print(json.dumps(json.loads(jsonActual), indent=2))" - ] - }, { "cell_type": "code", "execution_count": null, diff --git a/src/SymptomByBatchcodeTableFactory.py b/src/SymptomByBatchcodeTableFactory.py index 62998f4e2fa..ba81712e3c0 100644 --- a/src/SymptomByBatchcodeTableFactory.py +++ b/src/SymptomByBatchcodeTableFactory.py @@ -1,12 +1,10 @@ import pandas as pd import numpy as np -from DataFrameNormalizer import DataFrameNormalizer class SymptomByBatchcodeTableFactory: @staticmethod def createSymptomByBatchcodeTable(VAERSVAX, VAERSSYMPTOMS): - DataFrameNormalizer.convertVAX_LOTColumnToUpperCase(VAERSVAX) index_columns = SymptomByBatchcodeTableFactory._getIndexColumns(VAERSVAX) symptomColumn = 'SYMPTOM' return (pd diff --git a/src/SymptomByBatchcodeTableFactoryTest.py b/src/SymptomByBatchcodeTableFactoryTest.py index c05b250e7dd..f37db763ffb 100644 --- a/src/SymptomByBatchcodeTableFactoryTest.py +++ b/src/SymptomByBatchcodeTableFactoryTest.py @@ -167,45 +167,3 @@ class SymptomByBatchcodeTableFactoryTest(unittest.TestCase): index = pd.MultiIndex.from_tuples( names = ['VAX_LOT1', 'VAX_LOT2'], tuples = [['1808982', 'EW0175']] * 13 + [['EW0167', 'EW0175']] * 10))) - - def test_createSymptomByBatchcodeTable_after_convertVAX_LOTColumnToUpperCase(self): - # Given - VAX_LOT = 'EW0175' - VAX_LOT_lower = VAX_LOT.lower() - VAERSVAX = TestHelper.createDataFrame( - columns = ['VAX_TYPE', 'VAX_MANU', 'VAX_LOT', 'VAX_DOSE_SERIES'], - data = [ ['COVID19', 'JANSSEN', VAX_LOT, '1'], - ['COVID19', 'JANSSEN', VAX_LOT_lower, '1']], - index = pd.Index( - name = 'VAERS_ID', - data=[ - 2547730, - 2547731]), - dtypes = { - 'VAX_DOSE_SERIES': 'string', - 'VAX_LOT': 'string'}) - VAERSSYMPTOMS = TestHelper.createDataFrame( - columns = ['SYMPTOM1', 'SYMPTOM2', 'SYMPTOM3', 'SYMPTOM4', 'SYMPTOM5'], - data = [ ['Blood pressure orthostatic abnormal', np.nan, np.nan, np.nan, np.nan], - ['Blood pressure orthostatic abnormal', np.nan, np.nan, np.nan, np.nan]], - index = pd.Index( - name = 'VAERS_ID', - data=[ - 2547730, - 2547731])) - - # When - symptomByBatchcodeTable = SymptomByBatchcodeTableFactory.createSymptomByBatchcodeTable(VAERSVAX, VAERSSYMPTOMS) - - # Then - assert_frame_equal( - symptomByBatchcodeTable, - TestHelper.createDataFrame( - columns = ['SYMPTOM'], - data = [ ['Blood pressure orthostatic abnormal'], - ['Blood pressure orthostatic abnormal']], - index = pd.Index( - name = 'VAX_LOT1', - data = [VAX_LOT, - VAX_LOT])), - check_dtype = False) diff --git a/src/VaersDescrReader.py b/src/VaersDescrReader.py index fb19dcb1e08..99e541f1743 100644 --- a/src/VaersDescrReader.py +++ b/src/VaersDescrReader.py @@ -1,4 +1,6 @@ import pandas as pd +from DataFrameNormalizer import DataFrameNormalizer + class VaersDescrReader: @@ -30,7 +32,7 @@ class VaersDescrReader: date_parser = lambda dateStr: pd.to_datetime(dateStr, format = "%m/%d/%Y")) def _readVAERSVAX(self, file): - return self._read_csv( + VAERSVAX = self._read_csv( file = file, usecols = ['VAERS_ID', 'VAX_DOSE_SERIES', 'VAX_TYPE', 'VAX_MANU', 'VAX_LOT'], dtype = @@ -38,6 +40,9 @@ class VaersDescrReader: "VAX_DOSE_SERIES": "string", "VAX_LOT": "string" }) + DataFrameNormalizer.convertVAX_LOTColumnToUpperCase(VAERSVAX) + return VAERSVAX + def _readVAERSSYMPTOMS(self, file): return self._read_csv(