diff --git a/src/captcha/CaptchaReader.py b/src/captcha/CaptchaReader.py index df2b0c5f6e2..460083f7f87 100644 --- a/src/captcha/CaptchaReader.py +++ b/src/captcha/CaptchaReader.py @@ -7,6 +7,7 @@ from captcha.DatasetFactory import DatasetFactory import numpy as np from tensorflow import keras + class CaptchaReader: def __init__(self, modelFilepath, captchaShape): @@ -14,19 +15,17 @@ class CaptchaReader: self.captchaShape = captchaShape def getTextInCaptchaImage(self, captchaImageFile): - # FK-TODO: refactor - modelDAO = ModelDAO(inColab = False) - model = modelDAO.loadModel(self.modelFilepath) - prediction_model = ModelFactory.createPredictionModel(model) - charNumConverter = CharNumConverter(CaptchaGenerator.characters) - datasetFactory = DatasetFactory(self.captchaShape,charNumConverter.char_to_num, batch_size = 64) - batchImages = self._asSingleSampleBatch(datasetFactory._encode_single_sample(captchaImageFile, 'dummy')['image']) - preds = prediction_model.predict(batchImages) - predictionsDecoder = PredictionsDecoder(CaptchaGenerator.captchaLength, charNumConverter.num_to_char) - pred_texts = predictionsDecoder.decode_batch_predictions(preds) - return pred_texts[0] + return self._getTextsInCaptchaImage(self._getCaptchaImage(captchaImageFile))[0] + + def _getCaptchaImage(self, captchaImageFile): + return self._asSingleSampleBatch(DatasetFactory.encodeImage(captchaImageFile, self.captchaShape)) def _asSingleSampleBatch(self, img): - array = keras.utils.img_to_array(img) - array = np.expand_dims(array, axis=0) - return array + return np.expand_dims(keras.utils.img_to_array(img), axis=0) + + def _getTextsInCaptchaImage(self, captchaImage): + preds = self._createPredictionModel().predict(captchaImage) + return PredictionsDecoder(CaptchaGenerator.captchaLength, CharNumConverter(CaptchaGenerator.characters).num_to_char).decode_batch_predictions(preds) + + def _createPredictionModel(self): + return ModelFactory.createPredictionModel(ModelDAO(inColab=False).loadModel(self.modelFilepath)) diff --git a/src/captcha/DatasetFactory.py b/src/captcha/DatasetFactory.py index aac88cee4e9..a45aa3ddaf9 100644 --- a/src/captcha/DatasetFactory.py +++ b/src/captcha/DatasetFactory.py @@ -10,15 +10,18 @@ class DatasetFactory: def createDataset(self, x, y): dataset = tf.data.Dataset.from_tensor_slices((x, y)) - dataset = dataset.map(self._encode_single_sample, num_parallel_calls=tf.data.AUTOTUNE) + dataset = dataset.map(self._encodeImageAndLabel, num_parallel_calls=tf.data.AUTOTUNE) dataset = dataset.batch(self.batch_size).prefetch(buffer_size=tf.data.AUTOTUNE) return dataset - def _encode_single_sample(self, img_path, label): - img = tf.io.read_file(img_path) + def _encodeImageAndLabel(self, imageFilename, label): + return { + "image": DatasetFactory.encodeImage(imageFilename, self.captchaShape), + "label": self.char_to_num(tf.strings.unicode_split(label, input_encoding="UTF-8"))} + + @staticmethod + def encodeImage(imageFilename, captchaShape): + img = tf.io.read_file(imageFilename) img = tf.io.decode_jpeg(img, channels=3) - img = tf.image.resize(img, [self.captchaShape.height, self.captchaShape.width]) - # Map the characters in label to numbers - label = self.char_to_num(tf.strings.unicode_split(label, input_encoding="UTF-8")) - # Return a dict as our model is expecting two inputs - return {"image": img, "label": label} + img = tf.image.resize(img, [captchaShape.height, captchaShape.width]) + return img