refining some tests

This commit is contained in:
frankknoll
2023-03-24 23:36:33 +01:00
parent 78a94e113b
commit 0ec8d406c9
5 changed files with 95 additions and 2 deletions

View File

@@ -1,4 +1,5 @@
from SmartRegexpFactory import SmartRegexpFactory from SmartRegexpFactory import SmartRegexpFactory
import pandas as pd
class BatchcodeCompletion: class BatchcodeCompletion:
@@ -6,6 +7,8 @@ class BatchcodeCompletion:
self.ADR_by_Batchcode = ADR_by_Batchcode.sort_values(by = 'Adverse Reaction Reports', ascending = False) self.ADR_by_Batchcode = ADR_by_Batchcode.sort_values(by = 'Adverse Reaction Reports', ascending = False)
def completeBatchcode(self, partialBatchcode): def completeBatchcode(self, partialBatchcode):
if pd.isna(partialBatchcode):
return None
return self._getBatchcodeHavingMostADRs(self._filterBy(partialBatchcode)) return self._getBatchcodeHavingMostADRs(self._filterBy(partialBatchcode))
def _filterBy(self, partialBatchcode): def _filterBy(self, partialBatchcode):

View File

@@ -2,6 +2,7 @@ import unittest
from TestHelper import TestHelper from TestHelper import TestHelper
from BatchcodeCompletion import BatchcodeCompletion from BatchcodeCompletion import BatchcodeCompletion
import pandas as pd import pandas as pd
import numpy as np
class BatchcodeCompletionTest(unittest.TestCase): class BatchcodeCompletionTest(unittest.TestCase):
@@ -49,6 +50,29 @@ class BatchcodeCompletionTest(unittest.TestCase):
# Then # Then
self.assertIsNone(completedBatchcode) self.assertIsNone(completedBatchcode)
def test_completeBatchcode_NaN(self):
# Given
# FK-TODO: use empty ADR_by_Batchcode
ADR_by_Batchcode = TestHelper.createDataFrame(
columns = ['Adverse Reaction Reports'],
data = [ [1],
[200],
[149]],
index = pd.Index(
[
'LOT000057A',
'030L20B',
'000057A'
],
name = 'VAX_LOT'))
batchcodeCompletion = BatchcodeCompletion(ADR_by_Batchcode)
# When
completedBatchcode = batchcodeCompletion.completeBatchcode(np.NaN)
# Then
self.assertIsNone(completedBatchcode)
def test_completeBatchcode_empty_ADR_by_Batchcode(self): def test_completeBatchcode_empty_ADR_by_Batchcode(self):
# Given # Given
ADR_by_Batchcode = TestHelper.createDataFrame( ADR_by_Batchcode = TestHelper.createDataFrame(

View File

@@ -23,6 +23,32 @@
"from HistogramFactoryAndPersister import createAndSaveGlobalHistograms, createAndSaveHistogramsForCountries" "from HistogramFactoryAndPersister import createAndSaveGlobalHistograms, createAndSaveHistogramsForCountries"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"id": "0cb5849c",
"metadata": {},
"outputs": [],
"source": [
"country_By_Clicked_Batchcode = pd.read_csv('data/Country By Clicked Batchcode.csv', header=[0, 1], index_col=0, skiprows=6)\n",
"country_By_Clicked_Batchcode.drop(index='Clicked Batchcode', inplace=True)\n",
"country_By_Clicked_Batchcode.index.rename('Clicked Batchcode', inplace=True)\n",
"country_By_Clicked_Batchcode"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b5066a92",
"metadata": {},
"outputs": [],
"source": [
"country_By_Batchcode_Search_Term = pd.read_csv('data/Country By Batchcode Search Term.csv', header=[0, 1], index_col=0, skiprows=6)\n",
"country_By_Batchcode_Search_Term.drop(index='Batchcode Search Term', inplace=True)\n",
"country_By_Batchcode_Search_Term.index.rename('Batchcode Search Term', inplace=True)\n",
"country_By_Batchcode_Search_Term"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
@@ -141,6 +167,46 @@
"internationalVaersCovid19" "internationalVaersCovid19"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"id": "7a1023e8",
"metadata": {},
"outputs": [],
"source": [
"from BatchCodeTableFactory import BatchCodeTableFactory\n",
"\n",
"batchCodeTableFactory = BatchCodeTableFactory(internationalVaersCovid19)\n",
"batchCodeTable = batchCodeTableFactory.createGlobalBatchCodeTable()\n",
"batchCodeTable"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0e7dae70",
"metadata": {},
"outputs": [],
"source": [
"from CompletedBatchcodeColumnAdder import CompletedBatchcodeColumnAdder\n",
"from BatchcodeCompletion import BatchcodeCompletion\n",
"\n",
"batchcodeCompletion = BatchcodeCompletion(batchCodeTable)\n",
"completedBatchcodeColumnAdder = CompletedBatchcodeColumnAdder(batchcodeCompletion.completeBatchcode)\n",
"country_By_Batchcode_Search_Term = completedBatchcodeColumnAdder.addCompletedBatchcodeColumn(country_By_Batchcode_Search_Term)\n",
"country_By_Batchcode_Search_Term"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "89c65459",
"metadata": {},
"outputs": [],
"source": [
"country_By_Batchcode_Search_Term.to_excel('tmp/Country_By_Batchcode_Search_Term.xlsx')"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,

View File

@@ -10,7 +10,7 @@ class SmartRegexpFactory:
flags=re.IGNORECASE) flags=re.IGNORECASE)
def getWords(self, searchTerm): def getWords(self, searchTerm):
return re.split(r'\s+', searchTerm) return [re.escape(word) for word in re.split(r'\s+', searchTerm)]
def assertContainsWords(self, words): def assertContainsWords(self, words):
return ''.join([self.assertContainsWord(word) for word in words]) return ''.join([self.assertContainsWord(word) for word in words])

View File

@@ -17,4 +17,4 @@ class SmartRegexpFactoryTest(unittest.TestCase):
self.assertTrue(smartSearch(searchTerm = 'one two three', str = 'TESTone twoTEST TESTthreeTEST')) self.assertTrue(smartSearch(searchTerm = 'one two three', str = 'TESTone twoTEST TESTthreeTEST'))
self.assertTrue(smartSearch(searchTerm = 'eins zwei drei', str = 'drei, EINS oder zwei?')) self.assertTrue(smartSearch(searchTerm = 'eins zwei drei', str = 'drei, EINS oder zwei?'))
self.assertFalse(smartSearch(searchTerm = 'eins zwei drei', str = 'drei, ONE oder zwei?')) self.assertFalse(smartSearch(searchTerm = 'eins zwei drei', str = 'drei, ONE oder zwei?'))
self.assertFalse(smartSearch(searchTerm = '(not set', str = 'drei, ONE oder zwei?'))