refining some tests

2023-03-24 23:36:33 +01:00
parent 78a94e113b
commit 0ec8d406c9
5 changed files with 95 additions and 2 deletions
--- a/src/BatchcodeCompletion.py
+++ b/src/BatchcodeCompletion.py
@@ -1,4 +1,5 @@
 from SmartRegexpFactory import SmartRegexpFactory
 import pandas as pd
 class BatchcodeCompletion:
@@ -6,6 +7,8 @@ class BatchcodeCompletion:
        self.ADR_by_Batchcode = ADR_by_Batchcode.sort_values(by = 'Adverse Reaction Reports', ascending = False)
    def completeBatchcode(self, partialBatchcode):
        if pd.isna(partialBatchcode):
            return None
        return self._getBatchcodeHavingMostADRs(self._filterBy(partialBatchcode))
    def _filterBy(self, partialBatchcode):
--- a/src/BatchcodeCompletionTest.py
+++ b/src/BatchcodeCompletionTest.py
@@ -2,6 +2,7 @@ import unittest
 from TestHelper import TestHelper
 from BatchcodeCompletion import BatchcodeCompletion
 import pandas as pd
 import numpy as np
 class BatchcodeCompletionTest(unittest.TestCase):
@@ -49,6 +50,29 @@ class BatchcodeCompletionTest(unittest.TestCase):
        # Then
        self.assertIsNone(completedBatchcode)
    def test_completeBatchcode_NaN(self):
        # Given
        # FK-TODO: use empty ADR_by_Batchcode
        ADR_by_Batchcode = TestHelper.createDataFrame(
                columns = ['Adverse Reaction Reports'],
                data = [  [1],
                          [200],
                          [149]],
                index = pd.Index(
                    [
                        'LOT000057A',
                        '030L20B',
                        '000057A'
                    ],
                    name = 'VAX_LOT'))
        batchcodeCompletion = BatchcodeCompletion(ADR_by_Batchcode)
        # When
        completedBatchcode = batchcodeCompletion.completeBatchcode(np.NaN)
        # Then
        self.assertIsNone(completedBatchcode)
    def test_completeBatchcode_empty_ADR_by_Batchcode(self):
        # Given
        ADR_by_Batchcode = TestHelper.createDataFrame(
--- a/src/HowBadIsMyBatch.ipynb
+++ b/src/HowBadIsMyBatch.ipynb
@@ -23,6 +23,32 @@
    "from HistogramFactoryAndPersister import createAndSaveGlobalHistograms, createAndSaveHistogramsForCountries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0cb5849c",
   "metadata": {},
   "outputs": [],
   "source": [
    "country_By_Clicked_Batchcode = pd.read_csv('data/Country By Clicked Batchcode.csv', header=[0, 1], index_col=0, skiprows=6)\n",
    "country_By_Clicked_Batchcode.drop(index='Clicked Batchcode', inplace=True)\n",
    "country_By_Clicked_Batchcode.index.rename('Clicked Batchcode', inplace=True)\n",
    "country_By_Clicked_Batchcode"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b5066a92",
   "metadata": {},
   "outputs": [],
   "source": [
    "country_By_Batchcode_Search_Term = pd.read_csv('data/Country By Batchcode Search Term.csv', header=[0, 1], index_col=0, skiprows=6)\n",
    "country_By_Batchcode_Search_Term.drop(index='Batchcode Search Term', inplace=True)\n",
    "country_By_Batchcode_Search_Term.index.rename('Batchcode Search Term', inplace=True)\n",
    "country_By_Batchcode_Search_Term"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
@@ -141,6 +167,46 @@
    "internationalVaersCovid19"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7a1023e8",
   "metadata": {},
   "outputs": [],
   "source": [
    "from BatchCodeTableFactory import BatchCodeTableFactory\n",
    "\n",
    "batchCodeTableFactory = BatchCodeTableFactory(internationalVaersCovid19)\n",
    "batchCodeTable = batchCodeTableFactory.createGlobalBatchCodeTable()\n",
    "batchCodeTable"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0e7dae70",
   "metadata": {},
   "outputs": [],
   "source": [
    "from CompletedBatchcodeColumnAdder import CompletedBatchcodeColumnAdder\n",
    "from BatchcodeCompletion import BatchcodeCompletion\n",
    "\n",
    "batchcodeCompletion = BatchcodeCompletion(batchCodeTable)\n",
    "completedBatchcodeColumnAdder = CompletedBatchcodeColumnAdder(batchcodeCompletion.completeBatchcode)\n",
    "country_By_Batchcode_Search_Term = completedBatchcodeColumnAdder.addCompletedBatchcodeColumn(country_By_Batchcode_Search_Term)\n",
    "country_By_Batchcode_Search_Term"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "89c65459",
   "metadata": {},
   "outputs": [],
   "source": [
    "country_By_Batchcode_Search_Term.to_excel('tmp/Country_By_Batchcode_Search_Term.xlsx')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
--- a/src/SmartRegexpFactory.py
+++ b/src/SmartRegexpFactory.py
@@ -10,7 +10,7 @@ class SmartRegexpFactory:
            flags=re.IGNORECASE)
    def getWords(self, searchTerm):
-        return re.split(r'\s+', searchTerm)
+        return [re.escape(word) for word in re.split(r'\s+', searchTerm)]
    def assertContainsWords(self, words):
        return ''.join([self.assertContainsWord(word) for word in words])
--- a/src/SmartRegexpFactoryTest.py
+++ b/src/SmartRegexpFactoryTest.py
@@ -17,4 +17,4 @@ class SmartRegexpFactoryTest(unittest.TestCase):
        self.assertTrue(smartSearch(searchTerm = 'one two three', str = 'TESTone twoTEST TESTthreeTEST'))
        self.assertTrue(smartSearch(searchTerm = 'eins zwei drei', str = 'drei, EINS oder zwei?'))
        self.assertFalse(smartSearch(searchTerm = 'eins zwei drei', str = 'drei, ONE oder zwei?'))
-        
+        self.assertFalse(smartSearch(searchTerm = '(not set', str = 'drei, ONE oder zwei?'))