HowBadIsMyBatch/src/captcha/ModelFactory.py

from captcha.CTCLayer import CTCLayer
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers


class ModelFactory:

    predictionModelInputLayerName = "image"
    predictionModelOutputLayerName = "dense2"

    def __init__(self, captchaShape, char_to_num):
        self.captchaShape = captchaShape
        self.char_to_num = char_to_num

    # see https://www.tensorflow.org/api_docs/python/tf/keras/applications/resnet/ResNet101
    def createResNet101(self):
        return self._createModel(
            baseModelFactory = lambda input_tensor: tf.keras.applications.resnet.ResNet101(
                input_tensor = input_tensor,
                weights = 'imagenet',
                include_top = False),
            preprocess_input = tf.keras.applications.resnet.preprocess_input,
            name = 'ResNet101')

    def createMobileNetV2(self):
        return self._createModel(
            baseModelFactory = lambda input_tensor: tf.keras.applications.MobileNetV2(
                input_tensor = input_tensor,
                weights = 'imagenet',
                include_top = False),
            preprocess_input = tf.keras.applications.mobilenet_v2.preprocess_input,
            name = 'MobileNetV2')

    def createMobileNetV3Small(self):
        return self._createModel(
            baseModelFactory = lambda input_tensor: tf.keras.applications.MobileNetV3Small(
                input_tensor = input_tensor,
                minimalistic = True,
                weights = 'imagenet',
                include_top = False),
            preprocess_input = tf.keras.applications.mobilenet_v3.preprocess_input,
            name = 'MobileNetV3Small')

    @staticmethod
    def createPredictionModel(model):
        return keras.models.Model(
            model.get_layer(name=ModelFactory.predictionModelInputLayerName).input,
            model.get_layer(name=ModelFactory.predictionModelOutputLayerName).output)

    def _createModel(self, baseModelFactory, preprocess_input, name):
        # Inputs to the model
        input_image = layers.Input(
            shape = (self.captchaShape.height, self.captchaShape.width, 3),
            name = ModelFactory.predictionModelInputLayerName,
            dtype = "float32")
        labels = layers.Input(name="label", shape=(None,), dtype="float32")

        image = preprocess_input(input_image)
        # Transpose the image because we want the time dimension to correspond to the width of the image.
        image = tf.keras.layers.Permute(dims=[2, 1, 3])(image)
        base_model = baseModelFactory(image)
        x = layers.Reshape(
            target_shape=(base_model.output_shape[1], base_model.output_shape[2] * base_model.output_shape[3]),
            name="reshape")(base_model.output)
        x = layers.Dense(64, activation="relu", name="dense1")(x)
        x = layers.Dropout(0.2)(x)

        # RNNs
        x = layers.Bidirectional(
            layers.LSTM(
                128,
                return_sequences=True,
                dropout=0.25,
                unroll=False,
                name="LSTM1"))(x)
        x = layers.Bidirectional(
            layers.LSTM(
                64,
                return_sequences=True,
                dropout=0.25,
                unroll=False,
                name="LSTM2"))(x)

        # Output layer
        x = layers.Dense(
            len(self.char_to_num.get_vocabulary()) + 1,
            activation="softmax",
            name=ModelFactory.predictionModelOutputLayerName)(x)

        # Add CTC layer for calculating CTC loss at each step
        output = CTCLayer(name="ctc_loss")(labels, x)

        model = keras.models.Model(
            inputs=[input_image, labels],
            outputs=output,
            name=name)
        # "The model is optimized by a stochastic gradient descent (SGD) strategy with an initial learning rate of 0.004, weight decay of 0.00004 and momentum of 0.9."
        # from tensorflow.keras.optimizers import SGD
        # model.compile(optimizer=SGD(learning_rate=0.004, "weight_decay=0.00004," momentum=0.9)
        model.compile(optimizer=keras.optimizers.Adam())
        return model