Files
HowBadIsMyBatch/src/captcha/ModelFactory.py
frankknoll a9e7bf4833 refactoring
2023-03-15 17:14:24 +01:00

103 lines
4.1 KiB
Python

from captcha.CTCLayer import CTCLayer
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
class ModelFactory:
predictionModelInputLayerName = "image"
predictionModelOutputLayerName = "dense2"
def __init__(self, captchaShape, char_to_num):
self.captchaShape = captchaShape
self.char_to_num = char_to_num
# see https://www.tensorflow.org/api_docs/python/tf/keras/applications/resnet/ResNet101
def createResNet101(self):
return self._createModel(
baseModelFactory = lambda input_tensor: tf.keras.applications.resnet.ResNet101(
input_tensor = input_tensor,
weights = 'imagenet',
include_top = False),
preprocess_input = tf.keras.applications.resnet.preprocess_input,
name = 'ResNet101')
def createMobileNetV2(self):
return self._createModel(
baseModelFactory = lambda input_tensor: tf.keras.applications.MobileNetV2(
input_tensor = input_tensor,
weights = 'imagenet',
include_top = False),
preprocess_input = tf.keras.applications.mobilenet_v2.preprocess_input,
name = 'MobileNetV2')
def createMobileNetV3Small(self):
return self._createModel(
baseModelFactory = lambda input_tensor: tf.keras.applications.MobileNetV3Small(
input_tensor = input_tensor,
minimalistic = True,
weights = 'imagenet',
include_top = False),
preprocess_input = tf.keras.applications.mobilenet_v3.preprocess_input,
name = 'MobileNetV3Small')
@staticmethod
def createPredictionModel(model):
return keras.models.Model(
model.get_layer(name=ModelFactory.predictionModelInputLayerName).input,
model.get_layer(name=ModelFactory.predictionModelOutputLayerName).output)
def _createModel(self, baseModelFactory, preprocess_input, name):
# Inputs to the model
input_image = layers.Input(
shape = (self.captchaShape.height, self.captchaShape.width, 3),
name = ModelFactory.predictionModelInputLayerName,
dtype = "float32")
labels = layers.Input(name="label", shape=(None,), dtype="float32")
image = preprocess_input(input_image)
# Transpose the image because we want the time dimension to correspond to the width of the image.
image = tf.keras.layers.Permute(dims=[2, 1, 3])(image)
base_model = baseModelFactory(image)
x = layers.Reshape(
target_shape=(base_model.output_shape[1], base_model.output_shape[2] * base_model.output_shape[3]),
name="reshape")(base_model.output)
x = layers.Dense(64, activation="relu", name="dense1")(x)
x = layers.Dropout(0.2)(x)
# RNNs
x = layers.Bidirectional(
layers.LSTM(
128,
return_sequences=True,
dropout=0.25,
unroll=False,
name="LSTM1"))(x)
x = layers.Bidirectional(
layers.LSTM(
64,
return_sequences=True,
dropout=0.25,
unroll=False,
name="LSTM2"))(x)
# Output layer
x = layers.Dense(
len(self.char_to_num.get_vocabulary()) + 1,
activation="softmax",
name=ModelFactory.predictionModelOutputLayerName)(x)
# Add CTC layer for calculating CTC loss at each step
output = CTCLayer(name="ctc_loss")(labels, x)
model = keras.models.Model(
inputs=[input_image, labels],
outputs=output,
name=name)
# "The model is optimized by a stochastic gradient descent (SGD) strategy with an initial learning rate of 0.004, weight decay of 0.00004 and momentum of 0.9."
# from tensorflow.keras.optimizers import SGD
# model.compile(optimizer=SGD(learning_rate=0.004, "weight_decay=0.00004," momentum=0.9)
model.compile(optimizer=keras.optimizers.Adam())
return model