From 0ec8d406c9ea81bc3e28bb297b3b10375b352d3d Mon Sep 17 00:00:00 2001 From: frankknoll Date: Fri, 24 Mar 2023 23:36:33 +0100 Subject: [PATCH] refining some tests --- src/BatchcodeCompletion.py | 3 ++ src/BatchcodeCompletionTest.py | 24 +++++++++++++ src/HowBadIsMyBatch.ipynb | 66 ++++++++++++++++++++++++++++++++++ src/SmartRegexpFactory.py | 2 +- src/SmartRegexpFactoryTest.py | 2 +- 5 files changed, 95 insertions(+), 2 deletions(-) diff --git a/src/BatchcodeCompletion.py b/src/BatchcodeCompletion.py index 23371ac9775..c008c0ad6dc 100644 --- a/src/BatchcodeCompletion.py +++ b/src/BatchcodeCompletion.py @@ -1,4 +1,5 @@ from SmartRegexpFactory import SmartRegexpFactory +import pandas as pd class BatchcodeCompletion: @@ -6,6 +7,8 @@ class BatchcodeCompletion: self.ADR_by_Batchcode = ADR_by_Batchcode.sort_values(by = 'Adverse Reaction Reports', ascending = False) def completeBatchcode(self, partialBatchcode): + if pd.isna(partialBatchcode): + return None return self._getBatchcodeHavingMostADRs(self._filterBy(partialBatchcode)) def _filterBy(self, partialBatchcode): diff --git a/src/BatchcodeCompletionTest.py b/src/BatchcodeCompletionTest.py index a81b4d9be03..35d8464727b 100644 --- a/src/BatchcodeCompletionTest.py +++ b/src/BatchcodeCompletionTest.py @@ -2,6 +2,7 @@ import unittest from TestHelper import TestHelper from BatchcodeCompletion import BatchcodeCompletion import pandas as pd +import numpy as np class BatchcodeCompletionTest(unittest.TestCase): @@ -49,6 +50,29 @@ class BatchcodeCompletionTest(unittest.TestCase): # Then self.assertIsNone(completedBatchcode) + def test_completeBatchcode_NaN(self): + # Given + # FK-TODO: use empty ADR_by_Batchcode + ADR_by_Batchcode = TestHelper.createDataFrame( + columns = ['Adverse Reaction Reports'], + data = [ [1], + [200], + [149]], + index = pd.Index( + [ + 'LOT000057A', + '030L20B', + '000057A' + ], + name = 'VAX_LOT')) + batchcodeCompletion = BatchcodeCompletion(ADR_by_Batchcode) + + # When + completedBatchcode = batchcodeCompletion.completeBatchcode(np.NaN) + + # Then + self.assertIsNone(completedBatchcode) + def test_completeBatchcode_empty_ADR_by_Batchcode(self): # Given ADR_by_Batchcode = TestHelper.createDataFrame( diff --git a/src/HowBadIsMyBatch.ipynb b/src/HowBadIsMyBatch.ipynb index d3d29ca3be4..5ab6ef85ca0 100644 --- a/src/HowBadIsMyBatch.ipynb +++ b/src/HowBadIsMyBatch.ipynb @@ -23,6 +23,32 @@ "from HistogramFactoryAndPersister import createAndSaveGlobalHistograms, createAndSaveHistogramsForCountries" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "0cb5849c", + "metadata": {}, + "outputs": [], + "source": [ + "country_By_Clicked_Batchcode = pd.read_csv('data/Country By Clicked Batchcode.csv', header=[0, 1], index_col=0, skiprows=6)\n", + "country_By_Clicked_Batchcode.drop(index='Clicked Batchcode', inplace=True)\n", + "country_By_Clicked_Batchcode.index.rename('Clicked Batchcode', inplace=True)\n", + "country_By_Clicked_Batchcode" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b5066a92", + "metadata": {}, + "outputs": [], + "source": [ + "country_By_Batchcode_Search_Term = pd.read_csv('data/Country By Batchcode Search Term.csv', header=[0, 1], index_col=0, skiprows=6)\n", + "country_By_Batchcode_Search_Term.drop(index='Batchcode Search Term', inplace=True)\n", + "country_By_Batchcode_Search_Term.index.rename('Batchcode Search Term', inplace=True)\n", + "country_By_Batchcode_Search_Term" + ] + }, { "cell_type": "code", "execution_count": null, @@ -141,6 +167,46 @@ "internationalVaersCovid19" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "7a1023e8", + "metadata": {}, + "outputs": [], + "source": [ + "from BatchCodeTableFactory import BatchCodeTableFactory\n", + "\n", + "batchCodeTableFactory = BatchCodeTableFactory(internationalVaersCovid19)\n", + "batchCodeTable = batchCodeTableFactory.createGlobalBatchCodeTable()\n", + "batchCodeTable" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0e7dae70", + "metadata": {}, + "outputs": [], + "source": [ + "from CompletedBatchcodeColumnAdder import CompletedBatchcodeColumnAdder\n", + "from BatchcodeCompletion import BatchcodeCompletion\n", + "\n", + "batchcodeCompletion = BatchcodeCompletion(batchCodeTable)\n", + "completedBatchcodeColumnAdder = CompletedBatchcodeColumnAdder(batchcodeCompletion.completeBatchcode)\n", + "country_By_Batchcode_Search_Term = completedBatchcodeColumnAdder.addCompletedBatchcodeColumn(country_By_Batchcode_Search_Term)\n", + "country_By_Batchcode_Search_Term" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "89c65459", + "metadata": {}, + "outputs": [], + "source": [ + "country_By_Batchcode_Search_Term.to_excel('tmp/Country_By_Batchcode_Search_Term.xlsx')" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/src/SmartRegexpFactory.py b/src/SmartRegexpFactory.py index 48d5d3d08c8..bae1bae2e6e 100644 --- a/src/SmartRegexpFactory.py +++ b/src/SmartRegexpFactory.py @@ -10,7 +10,7 @@ class SmartRegexpFactory: flags=re.IGNORECASE) def getWords(self, searchTerm): - return re.split(r'\s+', searchTerm) + return [re.escape(word) for word in re.split(r'\s+', searchTerm)] def assertContainsWords(self, words): return ''.join([self.assertContainsWord(word) for word in words]) diff --git a/src/SmartRegexpFactoryTest.py b/src/SmartRegexpFactoryTest.py index 79c81d95426..5a8c56295ef 100644 --- a/src/SmartRegexpFactoryTest.py +++ b/src/SmartRegexpFactoryTest.py @@ -17,4 +17,4 @@ class SmartRegexpFactoryTest(unittest.TestCase): self.assertTrue(smartSearch(searchTerm = 'one two three', str = 'TESTone twoTEST TESTthreeTEST')) self.assertTrue(smartSearch(searchTerm = 'eins zwei drei', str = 'drei, EINS oder zwei?')) self.assertFalse(smartSearch(searchTerm = 'eins zwei drei', str = 'drei, ONE oder zwei?')) - + self.assertFalse(smartSearch(searchTerm = '(not set', str = 'drei, ONE oder zwei?'))