adding CaptchaReaderTest

This commit is contained in:
frankknoll
2023-03-15 14:47:07 +01:00
parent 7bdd20c650
commit 6330b7b724
1062 changed files with 617 additions and 369 deletions

1
.gitignore vendored
View File

@@ -14,3 +14,4 @@ src/HowBadIsMyBatch.nbconvert.html
src/__pycache__/
src/intensivstationen/__pycache__/
google-chrome-stable_current_amd64*
src/captcha/__pycache__

View File

@@ -1,74 +0,0 @@
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from PIL import Image
import numpy as np
import io
# copied from value of characters variable in captcha_ocr.ipynb or captcha_ocr_trainAndSaveModel.ipynb
characters = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'a', 'b', 'c', 'd', 'e', 'f']
img_width = 241
img_height = 62
downsample_factor = 4
# copied from value of max_length variable in captcha_ocr.ipynb or captcha_ocr_trainAndSaveModel.ipynb
max_length = 6
char_to_num = layers.StringLookup(
vocabulary=list(characters),
mask_token=None)
num_to_char = layers.StringLookup(
vocabulary=char_to_num.get_vocabulary(),
mask_token=None, invert=True)
def encode_single_sample(img_path):
# 1. Read image
img = tf.io.read_file(img_path)
# 2. Decode and convert to grayscale
img = tf.io.decode_png(img, channels=1)
# 3. Convert to float32 in [0, 1] range
img = tf.image.convert_image_dtype(img, tf.float32)
# 4. Resize to the desired size
img = tf.image.resize(img, [img_height, img_width])
# 5. Transpose the image because we want the time
# dimension to correspond to the width of the image.
img = tf.transpose(img, perm=[1, 0, 2])
# 7. Return a dict as our model is expecting two inputs
return asSingleSampleBatch(img)
def asSingleSampleBatch(img):
array = keras.utils.img_to_array(img)
array = np.expand_dims(array, axis=0)
return array
def decode_batch_predictions(pred):
input_len = np.ones(pred.shape[0]) * pred.shape[1]
# Use greedy search. For complex tasks, you can use beam search
results = keras.backend.ctc_decode(pred, input_length=input_len, greedy=True)[0][0][:, :max_length]
# Iterate over the results and get back the text
output_text = []
for res in results:
res = tf.strings.reduce_join(num_to_char(res)).numpy().decode("utf-8")
output_text.append(res)
return output_text
def _getModel():
print("loading model...")
model = load_model()
model.summary()
return model
def load_model():
model = keras.models.load_model('src/model')
return keras.models.Model(
model.get_layer(name="image").input,
model.get_layer(name="dense2").output)
def getTextInCaptchaImage(captchaImageFile):
batchImages = encode_single_sample(captchaImageFile)
preds = _getModel().predict(batchImages)
return decode_batch_predictions(preds)[0]

View File

@@ -1,15 +0,0 @@
import unittest
from CaptchaReader import getTextInCaptchaImage
class CaptchaReaderTest(unittest.TestCase):
def test_getTextInCaptchaImage(self):
# Given
textInCaptchaImage = '1Ad47a'
# When
textInCaptchaImageActual = getTextInCaptchaImage(f'src/captchas/VAERS/{textInCaptchaImage}.jpeg')
# Then
self.assertEqual(textInCaptchaImageActual, textInCaptchaImage)

View File

@@ -2,7 +2,7 @@ import os
import time
from WebDriver import getWebDriver, isCaptchaSolved, saveCaptchaImageAs
from selenium.webdriver.common.by import By
from CaptchaReader import getTextInCaptchaImage
from captcha.CaptchaReader import CaptchaReader
from zipUtils import unzipAndRemove
@@ -15,11 +15,15 @@ from zipUtils import unzipAndRemove
def solveCaptchaAndStartFileDownload(driver, captchaImageFile):
saveCaptchaImageAs(driver, captchaImageFile)
textInCaptchaImage = getTextInCaptchaImage(captchaImageFile)
textInCaptchaImage = _createCaptchaReader().getTextInCaptchaImage(captchaImageFile)
print('textInCaptchaImage:', textInCaptchaImage)
driver.find_element(By.ID, "verificationCode").send_keys(textInCaptchaImage)
driver.find_element(By.CSS_SELECTOR, '[name="downloadbut"]').click()
def _createCaptchaReader():
working_directory = os.path.dirname(__file__)
return CaptchaReader(modelFilepath = f'{working_directory}/captcha/MobileNetV3Small')
def downloadFile(absoluteFile, driver, maxTries):
def _downloadFile():
driver.get('https://vaers.hhs.gov/eSubDownload/index.jsp?fn=' + os.path.basename(absoluteFile))
@@ -42,7 +46,7 @@ def _waitUntilDownloadHasFinished(file):
time.sleep(2)
def downloadVAERSFile(file, downloadDir):
driver = getWebDriver(downloadDir, isHeadless = True)
driver = getWebDriver(downloadDir, isHeadless = False)
downloadedFile = downloadFile(
absoluteFile = downloadDir + "/" + file,
driver = driver,

0
src/__init__.py Normal file
View File

27
src/captcha/CTCLayer.py Normal file
View File

@@ -0,0 +1,27 @@
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
# see https://keras.io/guides/making_new_layers_and_models_via_subclassing/
class CTCLayer(layers.Layer):
def __init__(self, name=None):
super().__init__(name=name)
self.loss_fn = keras.backend.ctc_batch_cost
def call(self, y_true, y_pred):
# Compute the training-time loss value and add it
# to the layer using `self.add_loss()`.
batch_len = tf.cast(tf.shape(y_true)[0], dtype="int64")
input_length = tf.cast(tf.shape(y_pred)[1], dtype="int64")
label_length = tf.cast(tf.shape(y_true)[1], dtype="int64")
input_length = input_length * tf.ones(shape=(batch_len, 1), dtype="int64")
label_length = label_length * tf.ones(shape=(batch_len, 1), dtype="int64")
loss = self.loss_fn(y_true, y_pred, input_length, label_length)
self.add_loss(loss)
# At test time, just return the computed predictions
return y_pred

View File

@@ -0,0 +1,43 @@
from PIL import Image, ImageDraw, ImageFont
import random
import string
import shutil
class CaptchaGenerator:
characters = sorted(set(list(string.ascii_letters + string.digits)))
captchaLength = 6
def __init__(self, numCaptchas, dataDir):
self.numCaptchas = numCaptchas
self.dataDir = dataDir
def createAndSaveCaptchas(self):
self._prepareDataDir()
for _ in range(self.numCaptchas):
self._createAndSaveCaptcha()
def _prepareDataDir(self):
shutil.rmtree(self.dataDir, ignore_errors = True)
self.dataDir.mkdir(parents=True, exist_ok=True)
def _createAndSaveCaptcha(self):
captchaString = self._createCaptchaString()
captcha = self._createCaptcha(captchaString)
captcha.save(f"{str(self.dataDir)}/{captchaString}.jpeg")
def _createCaptchaString(self):
return ''.join(random.choice(CaptchaGenerator.characters) for _ in range(CaptchaGenerator.captchaLength))
def _createCaptcha(self, word):
image = Image.new("RGB", (360, 96), "#373737")
draw = ImageDraw.Draw(image)
font = ImageFont.truetype("ariali.ttf", size=40)
draw.text((30, 10), word[0], font=font)
draw.text((80, 30), word[1], font=font)
draw.text((135, 10), word[2], font=font)
draw.text((190, 30), word[3], font=font)
draw.text((250, 10), word[4], font=font)
draw.text((295, 30), word[5], font=font)
return image

View File

@@ -0,0 +1,34 @@
from captcha.ModelDAO import ModelDAO
from captcha.ModelFactory import ModelFactory
from captcha.PredictionsDecoder import PredictionsDecoder
from captcha.CaptchaGenerator import CaptchaGenerator
from captcha.CharNumConverter import CharNumConverter
from captcha.DatasetFactory import DatasetFactory
import numpy as np
from tensorflow import keras
# FK-TODO: DRY with captcha.ipynb
img_width = 241
img_height = 62
class CaptchaReader:
def __init__(self, modelFilepath):
self.modelFilepath = modelFilepath
def getTextInCaptchaImage(self, captchaImageFile):
modelDAO = ModelDAO(inColab = False)
model = modelDAO.loadModel(self.modelFilepath)
prediction_model = ModelFactory.createPredictionModel(model)
charNumConverter = CharNumConverter(CaptchaGenerator.characters)
datasetFactory = DatasetFactory(img_height, img_width, charNumConverter.char_to_num, batch_size = 64)
batchImages = self._asSingleSampleBatch(datasetFactory._encode_single_sample(captchaImageFile, 'dummy')['image'])
preds = prediction_model.predict(batchImages)
predictionsDecoder = PredictionsDecoder(CaptchaGenerator.captchaLength, charNumConverter.num_to_char)
pred_texts = predictionsDecoder.decode_batch_predictions(preds)
return pred_texts[0]
def _asSingleSampleBatch(self, img):
array = keras.utils.img_to_array(img)
array = np.expand_dims(array, axis=0)
return array

View File

@@ -0,0 +1,19 @@
import unittest
from captcha.CaptchaReader import CaptchaReader
import os
class CaptchaReaderTest(unittest.TestCase):
def setUp(self):
self.working_directory = os.path.dirname(__file__)
def test_getTextInCaptchaImage(self):
# Given
textInCaptchaImage = '1Ad47a'
captchaReader = CaptchaReader(modelFilepath = f'{self.working_directory}/MobileNetV3Small')
# When
textInCaptchaImageActual = captchaReader.getTextInCaptchaImage(f'{self.working_directory}/captchas/VAERS/{textInCaptchaImage}.jpeg')
# Then
self.assertEqual(textInCaptchaImageActual, textInCaptchaImage)

View File

@@ -0,0 +1,10 @@
from tensorflow.keras import layers
class CharNumConverter:
def __init__(self, characters):
self.char_to_num = layers.StringLookup(vocabulary=list(characters), mask_token=None)
self.num_to_char = layers.StringLookup(
vocabulary=self.char_to_num.get_vocabulary(),
mask_token=None,
invert=True)

View File

@@ -0,0 +1,25 @@
import tensorflow as tf
class DatasetFactory:
def __init__(self, img_height, img_width, char_to_num, batch_size):
self.img_height = img_height
self.img_width = img_width
self.char_to_num = char_to_num
self.batch_size = batch_size
def createDataset(self, x, y):
dataset = tf.data.Dataset.from_tensor_slices((x, y))
dataset = dataset.map(self._encode_single_sample, num_parallel_calls=tf.data.AUTOTUNE)
dataset = dataset.batch(self.batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)
return dataset
def _encode_single_sample(self, img_path, label):
img = tf.io.read_file(img_path)
img = tf.io.decode_jpeg(img, channels=3)
img = tf.image.resize(img, [self.img_height, self.img_width])
# Map the characters in label to numbers
label = self.char_to_num(tf.strings.unicode_split(label, input_encoding="UTF-8"))
# Return a dict as our model is expecting two inputs
return {"image": img, "label": label}

View File

@@ -0,0 +1,27 @@
from pathlib import Path
class GoogleDriveManager:
_googleDriveFolder = Path('/content/gdrive')
_baseFolder = _googleDriveFolder / 'MyDrive/CAPTCHA/models/'
@staticmethod
def mount():
from google.colab import drive
drive.mount(str(GoogleDriveManager._googleDriveFolder))
@staticmethod
def uploadFolderToGoogleDrive(folder):
pass
# FK-TODO: refactor
# !zip -r {folder}.zip {folder}/
# !cp {folder}.zip {GoogleDriveManager._baseFolder}
@staticmethod
def downloadFolderFromGoogleDrive(folder):
pass
# FK-TODO: refactor
# !cp {GoogleDriveManager._baseFolder}/{folder}.zip .
# !rm -rf {folder}
# !unzip {folder}.zip

Binary file not shown.

File diff suppressed because one or more lines are too long

Binary file not shown.

Binary file not shown.

20
src/captcha/ModelDAO.py Normal file
View File

@@ -0,0 +1,20 @@
from tensorflow import keras
from captcha.GoogleDriveManager import GoogleDriveManager
import shutil
class ModelDAO:
def __init__(self, inColab):
self.inColab = inColab
def saveModel(self, model):
shutil.rmtree(model.name, ignore_errors = True)
model.save(model.name)
if self.inColab:
GoogleDriveManager.uploadFolderToGoogleDrive(model.name)
def loadModel(self, modelFilepath):
if self.inColab:
GoogleDriveManager.downloadFolderFromGoogleDrive(modelFilepath)
return keras.models.load_model(modelFilepath)

103
src/captcha/ModelFactory.py Normal file
View File

@@ -0,0 +1,103 @@
from captcha.CTCLayer import CTCLayer
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
class ModelFactory:
predictionModelInputLayerName = "image"
predictionModelOutputLayerName = "dense2"
def __init__(self, img_height, img_width, char_to_num):
self.img_height = img_height
self.img_width = img_width
self.char_to_num = char_to_num
# see https://www.tensorflow.org/api_docs/python/tf/keras/applications/resnet/ResNet101
def createResNet101(self):
return self._createModel(
baseModelFactory = lambda input_tensor: tf.keras.applications.resnet.ResNet101(
input_tensor = input_tensor,
weights = 'imagenet',
include_top = False),
preprocess_input = tf.keras.applications.resnet.preprocess_input,
name = 'ResNet101')
def createMobileNetV2(self):
return self._createModel(
baseModelFactory = lambda input_tensor: tf.keras.applications.MobileNetV2(
input_tensor = input_tensor,
weights = 'imagenet',
include_top = False),
preprocess_input = tf.keras.applications.mobilenet_v2.preprocess_input,
name = 'MobileNetV2')
def createMobileNetV3Small(self):
return self._createModel(
baseModelFactory = lambda input_tensor: tf.keras.applications.MobileNetV3Small(
input_tensor = input_tensor,
minimalistic = True,
weights = 'imagenet',
include_top = False),
preprocess_input = tf.keras.applications.mobilenet_v3.preprocess_input,
name = 'MobileNetV3Small')
@staticmethod
def createPredictionModel(model):
return keras.models.Model(
model.get_layer(name=ModelFactory.predictionModelInputLayerName).input,
model.get_layer(name=ModelFactory.predictionModelOutputLayerName).output)
def _createModel(self, baseModelFactory, preprocess_input, name):
# Inputs to the model
input_image = layers.Input(
shape=(self.img_height, self.img_width, 3),
name=ModelFactory.predictionModelInputLayerName,
dtype="float32")
labels = layers.Input(name="label", shape=(None,), dtype="float32")
image = preprocess_input(input_image)
# Transpose the image because we want the time dimension to correspond to the width of the image.
image = tf.keras.layers.Permute(dims=[2, 1, 3])(image)
base_model = baseModelFactory(image)
x = layers.Reshape(
target_shape=(base_model.output_shape[1], base_model.output_shape[2] * base_model.output_shape[3]),
name="reshape")(base_model.output)
x = layers.Dense(64, activation="relu", name="dense1")(x)
x = layers.Dropout(0.2)(x)
# RNNs
x = layers.Bidirectional(
layers.LSTM(
128,
return_sequences=True,
dropout=0.25,
unroll=False,
name="LSTM1"))(x)
x = layers.Bidirectional(
layers.LSTM(
64,
return_sequences=True,
dropout=0.25,
unroll=False,
name="LSTM2"))(x)
# Output layer
x = layers.Dense(
len(self.char_to_num.get_vocabulary()) + 1,
activation="softmax",
name=ModelFactory.predictionModelOutputLayerName)(x)
# Add CTC layer for calculating CTC loss at each step
output = CTCLayer(name="ctc_loss")(labels, x)
model = keras.models.Model(
inputs=[input_image, labels],
outputs=output,
name=name)
# "The model is optimized by a stochastic gradient descent (SGD) strategy with an initial learning rate of 0.004, weight decay of 0.00004 and momentum of 0.9."
# from tensorflow.keras.optimizers import SGD
# model.compile(optimizer=SGD(learning_rate=0.004, "weight_decay=0.00004," momentum=0.9)
model.compile(optimizer=keras.optimizers.Adam())
return model

View File

@@ -0,0 +1,24 @@
import tensorflow as tf
from tensorflow import keras
import numpy as np
class PredictionsDecoder:
def __init__(self, captchaLength, num_to_char):
self.captchaLength = captchaLength
self.num_to_char = num_to_char
def decode_batch_predictions(self, pred):
return self.asStrings(self.ctc_decode(pred))
def ctc_decode(self, pred):
input_len = np.ones(pred.shape[0]) * pred.shape[1]
# Use greedy search. For complex tasks, you can use beam search
return keras.backend.ctc_decode(pred, input_length=input_len, greedy=True)[0][0][:, :self.captchaLength]
def asStrings(self, labels):
return [self.asString(label) for label in labels]
def asString(self, label):
return tf.strings.reduce_join(self.num_to_char(label)).numpy().decode("utf-8")

0
src/captcha/__init__.py Normal file
View File

View File

@@ -25,11 +25,22 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {
"id": "zZSwQragIS_v"
},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2023-03-15 10:46:02.303787: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE4.1 SSE4.2 AVX AVX2 FMA\n",
"To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
"/home/frankknoll/.local/lib/python3.9/site-packages/scipy/__init__.py:146: UserWarning: A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.23.5\n",
" warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n"
]
}
],
"source": [
"import os\n",
"import numpy as np\n",
@@ -43,90 +54,29 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"metadata": {
"id": "QB8QZJPg3MGI"
},
"outputs": [],
"source": [
"class GoogleDriveManager:\n",
" \n",
" _googleDriveFolder = Path('/content/gdrive')\n",
" _baseFolder = _googleDriveFolder / 'MyDrive/CAPTCHA/models/'\n",
"\n",
" @staticmethod\n",
" def mount():\n",
" from google.colab import drive\n",
" drive.mount(str(GoogleDriveManager._googleDriveFolder))\n",
"\n",
" @staticmethod\n",
" def uploadFolderToGoogleDrive(folder):\n",
" !zip -r {folder}.zip {folder}/\n",
" !cp {folder}.zip {GoogleDriveManager._baseFolder}\n",
"\n",
" @staticmethod\n",
" def downloadFolderFromGoogleDrive(folder):\n",
" !cp {GoogleDriveManager._baseFolder}/{folder}.zip .\n",
" !rm -rf {folder}\n",
" !unzip {folder}.zip\n"
"from GoogleDriveManager import GoogleDriveManager"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"metadata": {
"id": "C3bxU1US2blM"
},
"outputs": [],
"source": [
"from PIL import Image, ImageDraw, ImageFont\n",
"import random\n",
"import string\n",
"from pathlib import Path\n",
"\n",
"\n",
"class CaptchaGenerator:\n",
"\n",
" characters = sorted(set(list(string.ascii_letters + string.digits)))\n",
" captchaLength = 6\n",
"\n",
" def __init__(self, numCaptchas, dataDir):\n",
" self.numCaptchas = numCaptchas\n",
" self.dataDir = dataDir\n",
"\n",
" def createAndSaveCaptchas(self):\n",
" self._prepareDataDir()\n",
" for _ in range(self.numCaptchas):\n",
" self._createAndSaveCaptcha()\n",
"\n",
" def _prepareDataDir(self):\n",
" !rm -fr {self.dataDir}\n",
" self.dataDir.mkdir(parents=True, exist_ok=True)\n",
"\n",
" def _createAndSaveCaptcha(self):\n",
" captchaString = self._createCaptchaString()\n",
" captcha = self._createCaptcha(captchaString)\n",
" captcha.save(f\"{str(self.dataDir)}/{captchaString}.jpeg\")\n",
"\n",
" def _createCaptchaString(self):\n",
" return ''.join(random.choice(CaptchaGenerator.characters) for _ in range(CaptchaGenerator.captchaLength))\n",
"\n",
" def _createCaptcha(self, word):\n",
" image = Image.new(\"RGB\", (360, 96), \"#373737\")\n",
" draw = ImageDraw.Draw(image)\n",
" font = ImageFont.truetype(\"ariali.ttf\", size=40)\n",
" draw.text((30, 10), word[0], font=font)\n",
" draw.text((80, 30), word[1], font=font)\n",
" draw.text((135, 10), word[2], font=font)\n",
" draw.text((190, 30), word[3], font=font)\n",
" draw.text((250, 10), word[4], font=font)\n",
" draw.text((295, 30), word[5], font=font)\n",
" return image\n"
"from CaptchaGenerator import CaptchaGenerator"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 4,
"metadata": {
"id": "0DZfMrbe3MGN"
},
@@ -141,25 +91,18 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 5,
"metadata": {
"id": "sNJjugG83MGO"
},
"outputs": [],
"source": [
"class CharNumConverter:\n",
"\n",
" def __init__(self, characters):\n",
" self.char_to_num = layers.StringLookup(vocabulary=list(characters), mask_token=None)\n",
" self.num_to_char = layers.StringLookup(\n",
" vocabulary=self.char_to_num.get_vocabulary(),\n",
" mask_token=None,\n",
" invert=True)"
"from CharNumConverter import CharNumConverter"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 6,
"metadata": {
"id": "qxs04OTR3MGP"
},
@@ -194,39 +137,18 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 7,
"metadata": {
"id": "dAAACymS3MGR"
},
"outputs": [],
"source": [
"class DatasetFactory:\n",
" \n",
" def __init__(self, img_height, img_width, char_to_num, batch_size):\n",
" self.img_height = img_height\n",
" self.img_width = img_width\n",
" self.char_to_num = char_to_num\n",
" self.batch_size = batch_size\n",
"\n",
" def createDataset(self, x, y):\n",
" dataset = tf.data.Dataset.from_tensor_slices((x, y))\n",
" dataset = dataset.map(self._encode_single_sample, num_parallel_calls=tf.data.AUTOTUNE)\n",
" dataset = dataset.batch(self.batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)\n",
" return dataset\n",
"\n",
" def _encode_single_sample(self, img_path, label):\n",
" img = tf.io.read_file(img_path)\n",
" img = tf.io.decode_jpeg(img, channels=3)\n",
" img = tf.image.resize(img, [self.img_height, self.img_width])\n",
" # Map the characters in label to numbers\n",
" label = self.char_to_num(tf.strings.unicode_split(label, input_encoding=\"UTF-8\"))\n",
" # Return a dict as our model is expecting two inputs\n",
" return {\"image\": img, \"label\": label}\n"
"from DatasetFactory import DatasetFactory"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 8,
"metadata": {
"id": "kdL9_t03Mf3t"
},
@@ -248,7 +170,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 9,
"metadata": {
"id": "FqVSEuZp3MGT"
},
@@ -275,7 +197,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 10,
"metadata": {
"id": "apkeCHhP3MGU"
},
@@ -300,138 +222,18 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 11,
"metadata": {
"id": "st13jAjL3MGV"
},
"outputs": [],
"source": [
"# see https://keras.io/guides/making_new_layers_and_models_via_subclassing/\n",
"class CTCLayer(layers.Layer):\n",
" \n",
" def __init__(self, name=None):\n",
" super().__init__(name=name)\n",
" self.loss_fn = keras.backend.ctc_batch_cost\n",
"\n",
" def call(self, y_true, y_pred):\n",
" # Compute the training-time loss value and add it\n",
" # to the layer using `self.add_loss()`.\n",
" batch_len = tf.cast(tf.shape(y_true)[0], dtype=\"int64\")\n",
" input_length = tf.cast(tf.shape(y_pred)[1], dtype=\"int64\")\n",
" label_length = tf.cast(tf.shape(y_true)[1], dtype=\"int64\")\n",
"\n",
" input_length = input_length * tf.ones(shape=(batch_len, 1), dtype=\"int64\")\n",
" label_length = label_length * tf.ones(shape=(batch_len, 1), dtype=\"int64\")\n",
"\n",
" loss = self.loss_fn(y_true, y_pred, input_length, label_length)\n",
" self.add_loss(loss)\n",
"\n",
" # At test time, just return the computed predictions\n",
" return y_pred\n",
"\n",
"\n",
"class ModelFactory:\n",
" \n",
" predictionModelInputLayerName = \"image\"\n",
" predictionModelOutputLayerName = \"dense2\"\n",
"\n",
" def __init__(self, img_height, img_width, char_to_num):\n",
" self.img_height = img_height\n",
" self.img_width = img_width\n",
" self.char_to_num = char_to_num\n",
"\n",
" # see https://www.tensorflow.org/api_docs/python/tf/keras/applications/resnet/ResNet101\n",
" def createResNet101(self):\n",
" return self._createModel(\n",
" baseModelFactory = lambda input_tensor: tf.keras.applications.resnet.ResNet101(\n",
" input_tensor = input_tensor,\n",
" weights = 'imagenet',\n",
" include_top = False),\n",
" preprocess_input = tf.keras.applications.resnet.preprocess_input,\n",
" name = 'ResNet101')\n",
"\n",
" def createMobileNetV2(self):\n",
" return self._createModel(\n",
" baseModelFactory = lambda input_tensor: tf.keras.applications.MobileNetV2(\n",
" input_tensor = input_tensor,\n",
" weights = 'imagenet',\n",
" include_top = False),\n",
" preprocess_input = tf.keras.applications.mobilenet_v2.preprocess_input,\n",
" name = 'MobileNetV2')\n",
"\n",
" def createMobileNetV3Small(self):\n",
" return self._createModel(\n",
" baseModelFactory = lambda input_tensor: tf.keras.applications.MobileNetV3Small(\n",
" input_tensor = input_tensor,\n",
" minimalistic = True,\n",
" weights = 'imagenet',\n",
" include_top = False),\n",
" preprocess_input = tf.keras.applications.mobilenet_v3.preprocess_input,\n",
" name = 'MobileNetV3Small')\n",
" \n",
" @staticmethod\n",
" def createPredictionModel(model):\n",
" return keras.models.Model(\n",
" model.get_layer(name=ModelFactory.predictionModelInputLayerName).input,\n",
" model.get_layer(name=ModelFactory.predictionModelOutputLayerName).output)\n",
"\n",
" def _createModel(self, baseModelFactory, preprocess_input, name):\n",
" # Inputs to the model\n",
" input_image = layers.Input(\n",
" shape=(self.img_height, self.img_width, 3),\n",
" name=ModelFactory.predictionModelInputLayerName,\n",
" dtype=\"float32\")\n",
" labels = layers.Input(name=\"label\", shape=(None,), dtype=\"float32\")\n",
" \n",
" image = preprocess_input(input_image)\n",
" # Transpose the image because we want the time dimension to correspond to the width of the image.\n",
" image = tf.keras.layers.Permute(dims=[2, 1, 3])(image)\n",
" base_model = baseModelFactory(image)\n",
" x = layers.Reshape(\n",
" target_shape=(base_model.output_shape[1], base_model.output_shape[2] * base_model.output_shape[3]),\n",
" name=\"reshape\")(base_model.output)\n",
" x = layers.Dense(64, activation=\"relu\", name=\"dense1\")(x)\n",
" x = layers.Dropout(0.2)(x)\n",
"\n",
" # RNNs\n",
" x = layers.Bidirectional(\n",
" layers.LSTM(\n",
" 128,\n",
" return_sequences=True,\n",
" dropout=0.25,\n",
" unroll=False,\n",
" name=\"LSTM1\"))(x)\n",
" x = layers.Bidirectional(\n",
" layers.LSTM(\n",
" 64,\n",
" return_sequences=True,\n",
" dropout=0.25,\n",
" unroll=False,\n",
" name=\"LSTM2\"))(x)\n",
"\n",
" # Output layer\n",
" x = layers.Dense(\n",
" len(self.char_to_num.get_vocabulary()) + 1,\n",
" activation=\"softmax\",\n",
" name=ModelFactory.predictionModelOutputLayerName)(x)\n",
"\n",
" # Add CTC layer for calculating CTC loss at each step\n",
" output = CTCLayer(name=\"ctc_loss\")(labels, x)\n",
"\n",
" model = keras.models.Model(\n",
" inputs=[input_image, labels],\n",
" outputs=output,\n",
" name=name)\n",
" # \"The model is optimized by a stochastic gradient descent (SGD) strategy with an initial learning rate of 0.004, weight decay of 0.00004 and momentum of 0.9.\"\n",
" # from tensorflow.keras.optimizers import SGD\n",
" # model.compile(optimizer=SGD(learning_rate=0.004, \"weight_decay=0.00004,\" momentum=0.9)\n",
" model.compile(optimizer=keras.optimizers.Adam())\n",
" return model\n"
"from ModelFactory import ModelFactory"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
@@ -442,61 +244,29 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 13,
"metadata": {
"id": "B7GZlk2_3MGX"
},
"outputs": [],
"source": [
"class PredictionsDecoder:\n",
"\n",
" def __init__(self, captchaLength, num_to_char):\n",
" self.captchaLength = captchaLength\n",
" self.num_to_char = num_to_char\n",
"\n",
" def decode_batch_predictions(self, pred):\n",
" return self.asStrings(self.ctc_decode(pred))\n",
"\n",
" def ctc_decode(self, pred):\n",
" input_len = np.ones(pred.shape[0]) * pred.shape[1]\n",
" # Use greedy search. For complex tasks, you can use beam search\n",
" return keras.backend.ctc_decode(pred, input_length=input_len, greedy=True)[0][0][:, :self.captchaLength]\n",
"\n",
" def asStrings(self, labels):\n",
" return [self.asString(label) for label in labels]\n",
"\n",
" def asString(self, label):\n",
" return tf.strings.reduce_join(self.num_to_char(label)).numpy().decode(\"utf-8\")\n"
"from PredictionsDecoder import PredictionsDecoder"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 14,
"metadata": {
"id": "8Oa7avYt3MGX"
},
"outputs": [],
"source": [
"class ModelDAO:\n",
"\n",
" def __init__(self, inColab):\n",
" self.inColab = inColab\n",
"\n",
" def saveModel(self, model):\n",
" !rm -rf {model.name}\n",
" model.save(model.name)\n",
" if self.inColab:\n",
" GoogleDriveManager.uploadFolderToGoogleDrive(model.name)\n",
"\n",
" def loadModel(self, modelName):\n",
" if self.inColab:\n",
" GoogleDriveManager.downloadFolderFromGoogleDrive(modelName)\n",
" return keras.models.load_model(modelName)\n"
"from ModelDAO import ModelDAO"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 15,
"metadata": {
"id": "S3X_SslH3MGY"
},
@@ -523,7 +293,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 16,
"metadata": {
"id": "NZrKXF6P3MGY"
},
@@ -534,7 +304,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 17,
"metadata": {
"id": "7EsmTaF03MGZ"
},
@@ -546,7 +316,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 18,
"metadata": {
"id": "S_4hl4S4BmZK"
},
@@ -559,7 +329,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 19,
"metadata": {
"id": "WmUghcQaMf3y"
},
@@ -570,18 +340,28 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 20,
"metadata": {
"id": "cpxO7yGAMf3z"
},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2023-03-15 10:41:54.085280: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE4.1 SSE4.2 AVX AVX2 FMA\n",
"To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
"2023-03-15 10:41:54.089954: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.\n"
]
}
],
"source": [
"charNumConverter = CharNumConverter(CaptchaGenerator.characters)"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 21,
"metadata": {
"id": "tVb5nDFTMf3z"
},
@@ -592,7 +372,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 22,
"metadata": {
"id": "t1wzlHQ-Mf3z"
},
@@ -603,7 +383,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 23,
"metadata": {
"id": "s35OUslsMf30"
},
@@ -623,7 +403,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 24,
"metadata": {
"id": "oRcemcbG3MGa"
},
@@ -637,7 +417,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 25,
"metadata": {
"id": "P7myCt7e2h6A"
},
@@ -645,13 +425,13 @@
"source": [
"# \"We generate 200,000 images for base model pre-training\"\n",
"captchaGenerator = CaptchaGenerator(\n",
" numCaptchas = 200000, # 50, # 200000,\n",
" numCaptchas = 50, # 50, # 200000,\n",
" dataDir = Path(\"captchas/generated/VAERS/\"))"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 27,
"metadata": {
"id": "j9apYsyI3MGb"
},

View File

Before

Width:  |  Height:  |  Size: 3.6 KiB

After

Width:  |  Height:  |  Size: 3.6 KiB

View File

Before

Width:  |  Height:  |  Size: 3.5 KiB

After

Width:  |  Height:  |  Size: 3.5 KiB

View File

Before

Width:  |  Height:  |  Size: 3.5 KiB

After

Width:  |  Height:  |  Size: 3.5 KiB

View File

Before

Width:  |  Height:  |  Size: 3.3 KiB

After

Width:  |  Height:  |  Size: 3.3 KiB

View File

Before

Width:  |  Height:  |  Size: 3.5 KiB

After

Width:  |  Height:  |  Size: 3.5 KiB

View File

Before

Width:  |  Height:  |  Size: 3.7 KiB

After

Width:  |  Height:  |  Size: 3.7 KiB

View File

Before

Width:  |  Height:  |  Size: 4.0 KiB

After

Width:  |  Height:  |  Size: 4.0 KiB

View File

Before

Width:  |  Height:  |  Size: 3.5 KiB

After

Width:  |  Height:  |  Size: 3.5 KiB

View File

Before

Width:  |  Height:  |  Size: 3.6 KiB

After

Width:  |  Height:  |  Size: 3.6 KiB

View File

Before

Width:  |  Height:  |  Size: 3.7 KiB

After

Width:  |  Height:  |  Size: 3.7 KiB

View File

Before

Width:  |  Height:  |  Size: 3.2 KiB

After

Width:  |  Height:  |  Size: 3.2 KiB

View File

Before

Width:  |  Height:  |  Size: 3.5 KiB

After

Width:  |  Height:  |  Size: 3.5 KiB

View File

Before

Width:  |  Height:  |  Size: 3.7 KiB

After

Width:  |  Height:  |  Size: 3.7 KiB

View File

Before

Width:  |  Height:  |  Size: 3.8 KiB

After

Width:  |  Height:  |  Size: 3.8 KiB

View File

Before

Width:  |  Height:  |  Size: 3.7 KiB

After

Width:  |  Height:  |  Size: 3.7 KiB

View File

Before

Width:  |  Height:  |  Size: 3.6 KiB

After

Width:  |  Height:  |  Size: 3.6 KiB

View File

Before

Width:  |  Height:  |  Size: 3.3 KiB

After

Width:  |  Height:  |  Size: 3.3 KiB

View File

Before

Width:  |  Height:  |  Size: 3.6 KiB

After

Width:  |  Height:  |  Size: 3.6 KiB

View File

Before

Width:  |  Height:  |  Size: 3.6 KiB

After

Width:  |  Height:  |  Size: 3.6 KiB

View File

Before

Width:  |  Height:  |  Size: 3.7 KiB

After

Width:  |  Height:  |  Size: 3.7 KiB

View File

Before

Width:  |  Height:  |  Size: 3.4 KiB

After

Width:  |  Height:  |  Size: 3.4 KiB

View File

Before

Width:  |  Height:  |  Size: 3.6 KiB

After

Width:  |  Height:  |  Size: 3.6 KiB

View File

Before

Width:  |  Height:  |  Size: 3.7 KiB

After

Width:  |  Height:  |  Size: 3.7 KiB

View File

Before

Width:  |  Height:  |  Size: 3.2 KiB

After

Width:  |  Height:  |  Size: 3.2 KiB

View File

Before

Width:  |  Height:  |  Size: 3.7 KiB

After

Width:  |  Height:  |  Size: 3.7 KiB

View File

Before

Width:  |  Height:  |  Size: 3.5 KiB

After

Width:  |  Height:  |  Size: 3.5 KiB

View File

Before

Width:  |  Height:  |  Size: 3.5 KiB

After

Width:  |  Height:  |  Size: 3.5 KiB

View File

Before

Width:  |  Height:  |  Size: 3.8 KiB

After

Width:  |  Height:  |  Size: 3.8 KiB

View File

Before

Width:  |  Height:  |  Size: 3.6 KiB

After

Width:  |  Height:  |  Size: 3.6 KiB

View File

Before

Width:  |  Height:  |  Size: 3.7 KiB

After

Width:  |  Height:  |  Size: 3.7 KiB

View File

Before

Width:  |  Height:  |  Size: 3.6 KiB

After

Width:  |  Height:  |  Size: 3.6 KiB

View File

Before

Width:  |  Height:  |  Size: 3.7 KiB

After

Width:  |  Height:  |  Size: 3.7 KiB

View File

Before

Width:  |  Height:  |  Size: 3.6 KiB

After

Width:  |  Height:  |  Size: 3.6 KiB

View File

Before

Width:  |  Height:  |  Size: 3.7 KiB

After

Width:  |  Height:  |  Size: 3.7 KiB

View File

Before

Width:  |  Height:  |  Size: 3.5 KiB

After

Width:  |  Height:  |  Size: 3.5 KiB

View File

Before

Width:  |  Height:  |  Size: 3.5 KiB

After

Width:  |  Height:  |  Size: 3.5 KiB

View File

Before

Width:  |  Height:  |  Size: 3.4 KiB

After

Width:  |  Height:  |  Size: 3.4 KiB

View File

Before

Width:  |  Height:  |  Size: 3.5 KiB

After

Width:  |  Height:  |  Size: 3.5 KiB

View File

Before

Width:  |  Height:  |  Size: 3.6 KiB

After

Width:  |  Height:  |  Size: 3.6 KiB

View File

Before

Width:  |  Height:  |  Size: 3.5 KiB

After

Width:  |  Height:  |  Size: 3.5 KiB

View File

Before

Width:  |  Height:  |  Size: 3.4 KiB

After

Width:  |  Height:  |  Size: 3.4 KiB

View File

Before

Width:  |  Height:  |  Size: 3.6 KiB

After

Width:  |  Height:  |  Size: 3.6 KiB

View File

Before

Width:  |  Height:  |  Size: 3.4 KiB

After

Width:  |  Height:  |  Size: 3.4 KiB

View File

Before

Width:  |  Height:  |  Size: 3.6 KiB

After

Width:  |  Height:  |  Size: 3.6 KiB

View File

Before

Width:  |  Height:  |  Size: 3.3 KiB

After

Width:  |  Height:  |  Size: 3.3 KiB

View File

Before

Width:  |  Height:  |  Size: 3.6 KiB

After

Width:  |  Height:  |  Size: 3.6 KiB

View File

Before

Width:  |  Height:  |  Size: 3.6 KiB

After

Width:  |  Height:  |  Size: 3.6 KiB

View File

Before

Width:  |  Height:  |  Size: 3.6 KiB

After

Width:  |  Height:  |  Size: 3.6 KiB

View File

Before

Width:  |  Height:  |  Size: 3.7 KiB

After

Width:  |  Height:  |  Size: 3.7 KiB

View File

Before

Width:  |  Height:  |  Size: 3.5 KiB

After

Width:  |  Height:  |  Size: 3.5 KiB

View File

Before

Width:  |  Height:  |  Size: 3.4 KiB

After

Width:  |  Height:  |  Size: 3.4 KiB

View File

Before

Width:  |  Height:  |  Size: 3.7 KiB

After

Width:  |  Height:  |  Size: 3.7 KiB

View File

Before

Width:  |  Height:  |  Size: 3.7 KiB

After

Width:  |  Height:  |  Size: 3.7 KiB

View File

Before

Width:  |  Height:  |  Size: 3.6 KiB

After

Width:  |  Height:  |  Size: 3.6 KiB

View File

Before

Width:  |  Height:  |  Size: 3.7 KiB

After

Width:  |  Height:  |  Size: 3.7 KiB

View File

Before

Width:  |  Height:  |  Size: 3.6 KiB

After

Width:  |  Height:  |  Size: 3.6 KiB

View File

Before

Width:  |  Height:  |  Size: 3.4 KiB

After

Width:  |  Height:  |  Size: 3.4 KiB

View File

Before

Width:  |  Height:  |  Size: 3.8 KiB

After

Width:  |  Height:  |  Size: 3.8 KiB

View File

Before

Width:  |  Height:  |  Size: 3.7 KiB

After

Width:  |  Height:  |  Size: 3.7 KiB

View File

Before

Width:  |  Height:  |  Size: 3.4 KiB

After

Width:  |  Height:  |  Size: 3.4 KiB

View File

Before

Width:  |  Height:  |  Size: 3.9 KiB

After

Width:  |  Height:  |  Size: 3.9 KiB

View File

Before

Width:  |  Height:  |  Size: 3.4 KiB

After

Width:  |  Height:  |  Size: 3.4 KiB

View File

Before

Width:  |  Height:  |  Size: 3.4 KiB

After

Width:  |  Height:  |  Size: 3.4 KiB

View File

Before

Width:  |  Height:  |  Size: 3.4 KiB

After

Width:  |  Height:  |  Size: 3.4 KiB

View File

Before

Width:  |  Height:  |  Size: 3.8 KiB

After

Width:  |  Height:  |  Size: 3.8 KiB

View File

Before

Width:  |  Height:  |  Size: 3.8 KiB

After

Width:  |  Height:  |  Size: 3.8 KiB

View File

Before

Width:  |  Height:  |  Size: 3.5 KiB

After

Width:  |  Height:  |  Size: 3.5 KiB

View File

Before

Width:  |  Height:  |  Size: 3.3 KiB

After

Width:  |  Height:  |  Size: 3.3 KiB

View File

Before

Width:  |  Height:  |  Size: 3.3 KiB

After

Width:  |  Height:  |  Size: 3.3 KiB

View File

Before

Width:  |  Height:  |  Size: 3.4 KiB

After

Width:  |  Height:  |  Size: 3.4 KiB

View File

Before

Width:  |  Height:  |  Size: 3.6 KiB

After

Width:  |  Height:  |  Size: 3.6 KiB

View File

Before

Width:  |  Height:  |  Size: 3.6 KiB

After

Width:  |  Height:  |  Size: 3.6 KiB

View File

Before

Width:  |  Height:  |  Size: 3.6 KiB

After

Width:  |  Height:  |  Size: 3.6 KiB

View File

Before

Width:  |  Height:  |  Size: 3.3 KiB

After

Width:  |  Height:  |  Size: 3.3 KiB

View File

Before

Width:  |  Height:  |  Size: 3.4 KiB

After

Width:  |  Height:  |  Size: 3.4 KiB

View File

Before

Width:  |  Height:  |  Size: 3.7 KiB

After

Width:  |  Height:  |  Size: 3.7 KiB

View File

Before

Width:  |  Height:  |  Size: 3.5 KiB

After

Width:  |  Height:  |  Size: 3.5 KiB

View File

Before

Width:  |  Height:  |  Size: 3.4 KiB

After

Width:  |  Height:  |  Size: 3.4 KiB

Some files were not shown because too many files have changed in this diff Show More