continuing
This commit is contained in:
19
src/BatchcodeCompletion.py
Normal file
19
src/BatchcodeCompletion.py
Normal file
@@ -0,0 +1,19 @@
|
||||
from SmartRegexpFactory import SmartRegexpFactory
|
||||
import pandas as pd
|
||||
|
||||
class BatchcodeCompletion:
|
||||
|
||||
def __init__(self, ADR_by_Batchcode):
|
||||
self.ADR_by_Batchcode = ADR_by_Batchcode.sort_values(by = 'Adverse Reaction Reports', ascending = False)
|
||||
|
||||
def completeBatchcode(self, partialBatchcode):
|
||||
if pd.isna(partialBatchcode):
|
||||
return None
|
||||
return self._getBatchcodeHavingMostADRs(self._filterBy(partialBatchcode))
|
||||
|
||||
def _filterBy(self, partialBatchcode):
|
||||
smartRegexp = SmartRegexpFactory().createSmartRegexp(partialBatchcode)
|
||||
return self.ADR_by_Batchcode[self.ADR_by_Batchcode.index.str.contains(smartRegexp, na = False, regex = True)]
|
||||
|
||||
def _getBatchcodeHavingMostADRs(self, ADR_by_Batchcode):
|
||||
return ADR_by_Batchcode.index[0] if not ADR_by_Batchcode.empty else None
|
||||
83
src/BatchcodeCompletionTest.py
Normal file
83
src/BatchcodeCompletionTest.py
Normal file
@@ -0,0 +1,83 @@
|
||||
import unittest
|
||||
from TestHelper import TestHelper
|
||||
from BatchcodeCompletion import BatchcodeCompletion
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
class BatchcodeCompletionTest(unittest.TestCase):
|
||||
|
||||
def test_completeBatchcode(self):
|
||||
# Given
|
||||
ADR_by_Batchcode = TestHelper.createDataFrame(
|
||||
columns = ['Adverse Reaction Reports'],
|
||||
data = [ [1],
|
||||
[200],
|
||||
[149]],
|
||||
index = pd.Index(
|
||||
[
|
||||
'LOT000057A',
|
||||
'030L20B',
|
||||
'000057A'
|
||||
],
|
||||
name = 'VAX_LOT'))
|
||||
batchcodeCompletion = BatchcodeCompletion(ADR_by_Batchcode)
|
||||
|
||||
# When
|
||||
completedBatchcode = batchcodeCompletion.completeBatchcode('000057')
|
||||
|
||||
# Then
|
||||
self.assertEqual(completedBatchcode, '000057A')
|
||||
|
||||
def test_completeBatchcode_no_completion(self):
|
||||
# Given
|
||||
ADR_by_Batchcode = TestHelper.createDataFrame(
|
||||
columns = ['Adverse Reaction Reports'],
|
||||
data = [ [1],
|
||||
[200],
|
||||
[149]],
|
||||
index = pd.Index(
|
||||
[
|
||||
'LOT000057A',
|
||||
'030L20B',
|
||||
'000057A'
|
||||
],
|
||||
name = 'VAX_LOT'))
|
||||
batchcodeCompletion = BatchcodeCompletion(ADR_by_Batchcode)
|
||||
|
||||
# When
|
||||
completedBatchcode = batchcodeCompletion.completeBatchcode('non existing batch code')
|
||||
|
||||
# Then
|
||||
self.assertIsNone(completedBatchcode)
|
||||
|
||||
def test_completeBatchcode_NaN(self):
|
||||
# Given
|
||||
ADR_by_Batchcode = TestHelper.createDataFrame(
|
||||
columns = ['Adverse Reaction Reports'],
|
||||
data = [],
|
||||
index = pd.Index(
|
||||
[],
|
||||
name = 'VAX_LOT'))
|
||||
batchcodeCompletion = BatchcodeCompletion(ADR_by_Batchcode)
|
||||
|
||||
# When
|
||||
completedBatchcode = batchcodeCompletion.completeBatchcode(np.NaN)
|
||||
|
||||
# Then
|
||||
self.assertIsNone(completedBatchcode)
|
||||
|
||||
def test_completeBatchcode_empty_ADR_by_Batchcode(self):
|
||||
# Given
|
||||
ADR_by_Batchcode = TestHelper.createDataFrame(
|
||||
columns = ['Adverse Reaction Reports'],
|
||||
data = [],
|
||||
index = pd.Index(
|
||||
[],
|
||||
name = 'VAX_LOT'))
|
||||
batchcodeCompletion = BatchcodeCompletion(ADR_by_Batchcode)
|
||||
|
||||
# When
|
||||
completedBatchcode = batchcodeCompletion.completeBatchcode('non existing batch code')
|
||||
|
||||
# Then
|
||||
self.assertIsNone(completedBatchcode)
|
||||
8
src/CountriesColumnAdder.py
Normal file
8
src/CountriesColumnAdder.py
Normal file
@@ -0,0 +1,8 @@
|
||||
class CountriesColumnAdder:
|
||||
|
||||
def addCountriesColumn(self, countriesByBatchcodeTable, columnName):
|
||||
countriesByBatchcodeTable[columnName] = countriesByBatchcodeTable.apply(self._getCountriesHavingEvents, axis='columns')
|
||||
return countriesByBatchcodeTable
|
||||
|
||||
def _getCountriesHavingEvents(self, eventCountByCountry):
|
||||
return set(eventCountByCountry[eventCountByCountry > 0].index)
|
||||
31
src/CountriesColumnAdderTest.py
Normal file
31
src/CountriesColumnAdderTest.py
Normal file
@@ -0,0 +1,31 @@
|
||||
import unittest
|
||||
from TestHelper import TestHelper
|
||||
from pandas.testing import assert_frame_equal
|
||||
import pandas as pd
|
||||
from CountriesColumnAdder import CountriesColumnAdder
|
||||
|
||||
class CountriesColumnAdderTest(unittest.TestCase):
|
||||
|
||||
def test_addCountriesColumn(self):
|
||||
# Given
|
||||
countriesByBatchcodeTable = TestHelper.createDataFrame(
|
||||
columns = ['United States', 'Germany', 'Italy'],
|
||||
data = [ [20, 0, 3]],
|
||||
index = pd.Index(
|
||||
name = 'Batchcode',
|
||||
data = ['FE6208']))
|
||||
columnName = 'Countries guessed'
|
||||
|
||||
# When
|
||||
countriesByBatchcodeTableWithCountriesColumn = CountriesColumnAdder().addCountriesColumn(countriesByBatchcodeTable, columnName)
|
||||
|
||||
# Then
|
||||
assert_frame_equal(
|
||||
countriesByBatchcodeTableWithCountriesColumn,
|
||||
TestHelper.createDataFrame(
|
||||
columns = ['United States', 'Germany', 'Italy', columnName],
|
||||
data = [ [20, 0, 3, {'United States', 'Italy'}]],
|
||||
index = pd.Index(
|
||||
name = 'Batchcode',
|
||||
data = ['FE6208'])))
|
||||
|
||||
15
src/CountriesMerger.py
Normal file
15
src/CountriesMerger.py
Normal file
@@ -0,0 +1,15 @@
|
||||
import pandas as pd
|
||||
|
||||
|
||||
class CountriesMerger:
|
||||
|
||||
@staticmethod
|
||||
def mergeSrcIntoDst(src: pd.Series, dst: pd.Series):
|
||||
def merge(series):
|
||||
return sorted(set().union(*series.dropna()))
|
||||
|
||||
mergedSeries = (pd
|
||||
.merge(dst, src, how='left', left_index=True, right_index=True)
|
||||
.apply(merge, axis='columns'))
|
||||
mergedSeries.name = dst.name
|
||||
return mergedSeries
|
||||
25
src/CountriesMergerTest.py
Normal file
25
src/CountriesMergerTest.py
Normal file
@@ -0,0 +1,25 @@
|
||||
import unittest
|
||||
import pandas as pd
|
||||
from CountriesMerger import CountriesMerger
|
||||
from pandas.testing import assert_series_equal
|
||||
|
||||
|
||||
class CountriesMergerTest(unittest.TestCase):
|
||||
|
||||
def test_mergeSrcIntoDst(self):
|
||||
# Given
|
||||
dstCountries = pd.Series({'NO72A': ['CountryA-1'], 'EW096': ['CountryA-2']}, name='dstCountries')
|
||||
srcCountries = pd.Series({'NO72A': ['CountryB-1'], 'FS192': ['CountryB-2']}, name='srcCountries')
|
||||
|
||||
# When
|
||||
mergedCountries = CountriesMerger.mergeSrcIntoDst(src = srcCountries, dst = dstCountries)
|
||||
|
||||
# Then
|
||||
assert_series_equal(
|
||||
mergedCountries,
|
||||
pd.Series(
|
||||
{
|
||||
'NO72A': ['CountryA-1', 'CountryB-1'],
|
||||
'EW096': ['CountryA-2']
|
||||
},
|
||||
name=dstCountries.name))
|
||||
19
src/SmartRegexpFactory.py
Normal file
19
src/SmartRegexpFactory.py
Normal file
@@ -0,0 +1,19 @@
|
||||
import re
|
||||
|
||||
|
||||
# adapted from function _fnFilterCreateSearch() defined in https://github.com/DataTables/DataTablesSrc/blob/master/js/core/core.filter.js
|
||||
class SmartRegexpFactory:
|
||||
|
||||
def createSmartRegexp(self, searchTerm):
|
||||
return re.compile(
|
||||
rf'^{self.assertContainsWords(self.getWords(searchTerm))}.*$',
|
||||
flags=re.IGNORECASE)
|
||||
|
||||
def getWords(self, searchTerm):
|
||||
return [re.escape(word) for word in re.split(r'\s+', searchTerm)]
|
||||
|
||||
def assertContainsWords(self, words):
|
||||
return ''.join([self.assertContainsWord(word) for word in words])
|
||||
|
||||
def assertContainsWord(self, word):
|
||||
return f'(?=.*?{word})'
|
||||
20
src/SmartRegexpFactoryTest.py
Normal file
20
src/SmartRegexpFactoryTest.py
Normal file
@@ -0,0 +1,20 @@
|
||||
import unittest
|
||||
from SmartRegexpFactory import SmartRegexpFactory
|
||||
|
||||
|
||||
class SmartRegexpFactoryTest(unittest.TestCase):
|
||||
|
||||
def test_smartSearch(self):
|
||||
def smartSearch(searchTerm, str):
|
||||
smartRegexp = SmartRegexpFactory().createSmartRegexp(searchTerm)
|
||||
return bool(smartRegexp.match(str))
|
||||
|
||||
self.assertTrue(smartSearch(searchTerm = 'one two three', str = 'one two three'))
|
||||
self.assertTrue(smartSearch(searchTerm = 'one two three', str = 'ONE two ThReE'))
|
||||
self.assertFalse(smartSearch(searchTerm = 'one two three', str = 'one two'))
|
||||
self.assertFalse(smartSearch(searchTerm = 'one two three', str = 'three two'))
|
||||
self.assertTrue(smartSearch(searchTerm = 'one two three', str = 'three two one'))
|
||||
self.assertTrue(smartSearch(searchTerm = 'one two three', str = 'TESTone twoTEST TESTthreeTEST'))
|
||||
self.assertTrue(smartSearch(searchTerm = 'eins zwei drei', str = 'drei, EINS oder zwei?'))
|
||||
self.assertFalse(smartSearch(searchTerm = 'eins zwei drei', str = 'drei, ONE oder zwei?'))
|
||||
self.assertFalse(smartSearch(searchTerm = '(not set', str = 'drei, ONE oder zwei?'))
|
||||
5510
src/data/Country By Batchcode Search Term.csv
Normal file
5510
src/data/Country By Batchcode Search Term.csv
Normal file
File diff suppressed because it is too large
Load Diff
2958
src/data/Country By Clicked Batchcode.csv
Normal file
2958
src/data/Country By Clicked Batchcode.csv
Normal file
File diff suppressed because it is too large
Load Diff
91
src/data/Selected Country.csv
Normal file
91
src/data/Selected Country.csv
Normal file
@@ -0,0 +1,91 @@
|
||||
# ----------------------------------------
|
||||
# HowBadIsMyBatch
|
||||
# Batchcode By Country-Selected Country
|
||||
# 20230201-20230401
|
||||
# ----------------------------------------
|
||||
|
||||
Country,United States,Germany,Canada,United Kingdom,Italy,Japan,Romania,Australia,Austria,Poland,Hungary,France,Netherlands,Switzerland,Brazil,Spain,Sweden,Singapore,Argentina,Portugal,Totals
|
||||
Item ID,Active users,Active users,Active users,Active users,Active users,Active users,Active users,Active users,Active users,Active users,Active users,Active users,Active users,Active users,Active users,Active users,Active users,Active users,Active users,Active users,Active users
|
||||
,1734,542,264,237,156,89,114,75,54,51,51,38,38,25,14,28,26,4,19,17,3817,Grand total
|
||||
Global,1730,538,261,237,156,88,114,75,54,51,50,38,38,25,14,28,26,4,19,17,3804
|
||||
United States,327,4,3,2,4,3,0,1,2,0,0,0,0,0,0,1,0,1,1,0,352
|
||||
Germany,0,137,0,1,6,1,0,0,6,2,1,1,0,0,0,2,0,1,0,1,163
|
||||
Canada,2,1,59,0,1,1,0,0,0,0,0,0,0,0,1,0,0,1,0,0,67
|
||||
Australia,0,1,1,0,1,1,0,23,1,1,0,0,0,0,0,0,0,0,0,0,31
|
||||
Japan,0,1,0,0,2,18,0,0,0,0,0,0,0,0,1,0,0,0,0,0,23
|
||||
Switzerland,0,3,1,1,1,0,0,0,0,0,0,0,0,11,0,0,1,1,0,0,19
|
||||
Unknown Country,3,1,0,9,2,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,19
|
||||
Israel,0,1,1,1,3,1,0,0,2,0,0,0,0,0,0,0,0,1,0,0,15
|
||||
New Zealand,1,0,1,1,0,1,0,0,0,0,0,0,1,0,1,0,0,1,0,0,12
|
||||
Philippines,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,10
|
||||
Brazil,0,4,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,1,8
|
||||
Argentina,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,4,0,7
|
||||
China,0,2,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7
|
||||
India,0,1,1,0,1,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,7
|
||||
Norway,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,7
|
||||
Afghanistan,1,2,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,6
|
||||
Mexico,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,6
|
||||
"Iran, Islamic Republic of",0,2,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,5
|
||||
Malaysia,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,5
|
||||
Peru,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,5
|
||||
Albania,0,3,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,4
|
||||
Bahamas,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,4
|
||||
Costa Rica,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,4
|
||||
Hong Kong,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,4
|
||||
Nepal,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4
|
||||
Thailand,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,4
|
||||
Ukraine,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,4
|
||||
Uruguay,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,4
|
||||
"Venezuela, Bolivarian Republic of",2,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,4
|
||||
Viet Nam,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4
|
||||
Andorra,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,3
|
||||
Chile,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3
|
||||
Colombia,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3
|
||||
French Polynesia,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,3
|
||||
Indonesia,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,3
|
||||
"Korea, Republic of",0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,3
|
||||
South Africa,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3
|
||||
Algeria,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,2
|
||||
Bahrain,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2
|
||||
Bosnia and Herzegovina,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2
|
||||
Cayman Islands,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2
|
||||
Côte d'Ivoire,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2
|
||||
Dominican Republic,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,2
|
||||
Egypt,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2
|
||||
Guatemala,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2
|
||||
"Korea, Democratic People's Republic of",0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2
|
||||
Mauritius,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2
|
||||
"Moldova, Republic of",1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2
|
||||
Nigeria,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,2
|
||||
Pakistan,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2
|
||||
Russian Federation,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,2
|
||||
Saudi Arabia,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2
|
||||
Serbia,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2
|
||||
Tunisia,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2
|
||||
Turkey,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2
|
||||
United Arab Emirates,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2
|
||||
Azerbaijan,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
|
||||
Bangladesh,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
|
||||
Bermuda,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
|
||||
Brunei Darussalam,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
|
||||
Ecuador,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
|
||||
El Salvador,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1
|
||||
Georgia,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
|
||||
Ghana,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1
|
||||
Iraq,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
|
||||
Jamaica,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
|
||||
Jordan,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1
|
||||
Kuwait,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1
|
||||
Lesotho,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1
|
||||
Liberia,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
|
||||
Macao,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1
|
||||
Mongolia,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
|
||||
Morocco,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
|
||||
Nicaragua,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
|
||||
"Palestine, State of",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
|
||||
Paraguay,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
|
||||
Réunion,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1
|
||||
Singapore,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1
|
||||
Sri Lanka,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1
|
||||
"Tanzania, United Republic of",0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1
|
||||
|
||||
|
Reference in New Issue
Block a user