Merge branch 'main' into pages

This commit is contained in:
frankknoll
2023-03-15 18:28:13 +01:00
1066 changed files with 1165 additions and 81 deletions

1
.gitignore vendored
View File

@@ -14,3 +14,4 @@ src/HowBadIsMyBatch.nbconvert.html
src/__pycache__/
src/intensivstationen/__pycache__/
google-chrome-stable_current_amd64*
src/captcha/__pycache__

View File

@@ -3,7 +3,7 @@ channels:
- defaults
# - conda-forge
dependencies:
- python=3
- python=3.9
- ipykernel
- numpy
- pandas
@@ -12,7 +12,7 @@ dependencies:
- bs4
- lxml
- jupyter
- tensorflow
- tensorflow=2.11
- nb_conda_kernels
- pillow
- openpyxl

View File

@@ -1,74 +0,0 @@
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from PIL import Image
import numpy as np
import io
# copied from value of characters variable in captcha_ocr.ipynb or captcha_ocr_trainAndSaveModel.ipynb
characters = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'a', 'b', 'c', 'd', 'e', 'f']
img_width = 241
img_height = 62
downsample_factor = 4
# copied from value of max_length variable in captcha_ocr.ipynb or captcha_ocr_trainAndSaveModel.ipynb
max_length = 6
char_to_num = layers.StringLookup(
vocabulary=list(characters),
mask_token=None)
num_to_char = layers.StringLookup(
vocabulary=char_to_num.get_vocabulary(),
mask_token=None, invert=True)
def encode_single_sample(img_path):
# 1. Read image
img = tf.io.read_file(img_path)
# 2. Decode and convert to grayscale
img = tf.io.decode_png(img, channels=1)
# 3. Convert to float32 in [0, 1] range
img = tf.image.convert_image_dtype(img, tf.float32)
# 4. Resize to the desired size
img = tf.image.resize(img, [img_height, img_width])
# 5. Transpose the image because we want the time
# dimension to correspond to the width of the image.
img = tf.transpose(img, perm=[1, 0, 2])
# 7. Return a dict as our model is expecting two inputs
return asSingleSampleBatch(img)
def asSingleSampleBatch(img):
array = keras.utils.img_to_array(img)
array = np.expand_dims(array, axis=0)
return array
def decode_batch_predictions(pred):
input_len = np.ones(pred.shape[0]) * pred.shape[1]
# Use greedy search. For complex tasks, you can use beam search
results = keras.backend.ctc_decode(pred, input_length=input_len, greedy=True)[0][0][:, :max_length]
# Iterate over the results and get back the text
output_text = []
for res in results:
res = tf.strings.reduce_join(num_to_char(res)).numpy().decode("utf-8")
output_text.append(res)
return output_text
def _getModel():
print("loading model...")
model = load_model()
model.summary()
return model
def load_model():
model = keras.models.load_model('model')
return keras.models.Model(
model.get_layer(name="image").input,
model.get_layer(name="dense2").output)
def getTextInCaptchaImage(captchaImageFile):
batchImages = encode_single_sample(captchaImageFile)
preds = _getModel().predict(batchImages)
return decode_batch_predictions(preds)[0]

View File

@@ -329,7 +329,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.8"
"version": "3.9.15"
},
"vscode": {
"interpreter": {

View File

@@ -2,8 +2,9 @@ import os
import time
from WebDriver import getWebDriver, isCaptchaSolved, saveCaptchaImageAs
from selenium.webdriver.common.by import By
from CaptchaReader import getTextInCaptchaImage
from captcha.CaptchaReader import CaptchaReader
from zipUtils import unzipAndRemove
from captcha.CaptchaShape import CaptchaShape
#def getTextInCaptchaImage(captchaImageFile):
@@ -15,11 +16,16 @@ from zipUtils import unzipAndRemove
def solveCaptchaAndStartFileDownload(driver, captchaImageFile):
saveCaptchaImageAs(driver, captchaImageFile)
textInCaptchaImage = getTextInCaptchaImage(captchaImageFile)
textInCaptchaImage = _createCaptchaReader().getTextInCaptchaImage(captchaImageFile)
print('textInCaptchaImage:', textInCaptchaImage)
driver.find_element(By.ID, "verificationCode").send_keys(textInCaptchaImage)
driver.find_element(By.CSS_SELECTOR, '[name="downloadbut"]').click()
def _createCaptchaReader():
working_directory = os.path.dirname(__file__)
return CaptchaReader(modelFilepath = f'{working_directory}/captcha/MobileNetV3Small',
captchaShape = CaptchaShape())
def downloadFile(absoluteFile, driver, maxTries):
def _downloadFile():
driver.get('https://vaers.hhs.gov/eSubDownload/index.jsp?fn=' + os.path.basename(absoluteFile))
@@ -42,7 +48,7 @@ def _waitUntilDownloadHasFinished(file):
time.sleep(2)
def downloadVAERSFile(file, downloadDir):
driver = getWebDriver(downloadDir, isHeadless = True)
driver = getWebDriver(downloadDir, isHeadless = False)
downloadedFile = downloadFile(
absoluteFile = downloadDir + "/" + file,
driver = driver,

0
src/__init__.py Normal file
View File

561
src/captcha.ipynb Normal file
View File

@@ -0,0 +1,561 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "UNKC5YSEIS_d"
},
"source": [
"# Captchas\n",
"\n",
"**see:** https://keras.io/examples/vision/captcha_ocr/<br>\n",
"**original:** https://colab.research.google.com/drive/1Olw2KMHfPlnGaYuzffl2zb6D1etlBGZf?usp=sharing<br>\n",
"**View Github version in Colab:** <a href=\"https://colab.research.google.com/github/KnollFrank/2captcha-worker-assistant-server/blob/master/captcha_ocr_trainAndSaveModel_colab.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a><br>\n",
"**paper:** Simple and Easy: Transfer Learning-Based Attacks to Text CAPTCHA<br>"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "wRUsVuIiIS_s"
},
"source": [
"## Setup"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "zZSwQragIS_v"
},
"outputs": [],
"source": [
"import os\n",
"import numpy as np\n",
"from pathlib import Path\n",
"import tensorflow as tf\n",
"from captcha.GoogleDriveManager import GoogleDriveManager\n",
"from captcha.CaptchaGenerator import CaptchaGenerator\n",
"from captcha.CharNumConverter import CharNumConverter\n",
"from captcha.DataSplitter import DataSplitter\n",
"from captcha.DatasetFactory import DatasetFactory\n",
"from captcha.ModelFactory import ModelFactory\n",
"from captcha.PredictionsDecoder import PredictionsDecoder\n",
"from captcha.ModelDAO import ModelDAO\n",
"from captcha.CaptchaShape import CaptchaShape"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "0DZfMrbe3MGN"
},
"outputs": [],
"source": [
"def getImagesAndLabels(dataDir):\n",
" fileSuffix = \".jpeg\"\n",
" images = sorted(list(map(str, list(dataDir.glob(\"*\" + fileSuffix)))))\n",
" labels = [image.split(os.path.sep)[-1].split(fileSuffix)[0] for image in images]\n",
" return images, labels\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "kdL9_t03Mf3t"
},
"outputs": [],
"source": [
"def getTrainValidationTestDatasets(dataDir, datasetFactory):\n",
" images, labels = getImagesAndLabels(dataDir)\n",
" print(\"Number of images found:\", len(images))\n",
" print(\"Characters:\", CaptchaGenerator.characters)\n",
"\n",
" dataSplitter = DataSplitter(images, labels)\n",
" \n",
" return (\n",
" datasetFactory.createDataset(*dataSplitter.getTrain()),\n",
" datasetFactory.createDataset(*dataSplitter.getValid()),\n",
" datasetFactory.createDataset(*dataSplitter.getTest())\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "FqVSEuZp3MGT"
},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"import math\n",
"\n",
"def displayImagesInGrid(numGridCols, images, titles, titleColors):\n",
" assert len(images) == len(titles) == len(titleColors)\n",
" images = [image.numpy().astype(np.uint8) for image in images]\n",
" numGridRows = math.ceil(len(images) / numGridCols)\n",
" _, axs = plt.subplots(numGridRows, numGridCols, figsize=(15, 5))\n",
" for row in range(numGridRows):\n",
" for col in range(numGridCols):\n",
" ax = axs[row, col]\n",
" ax.axis(\"off\")\n",
" i = row * numGridCols + col\n",
" if(i < len(images)):\n",
" ax.imshow(images[i])\n",
" ax.set_title(titles[i], color=titleColors[i])\n",
" plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "apkeCHhP3MGU"
},
"outputs": [],
"source": [
"def display16Predictions(model, dataset, predictionsDecoder):\n",
" for batch in dataset.take(1):\n",
" numPredictions2Display = 16\n",
" batch_images = batch[\"image\"][:numPredictions2Display]\n",
" batch_labels = batch[\"label\"][:numPredictions2Display]\n",
"\n",
" preds = model.predict(batch_images)\n",
" pred_texts = predictionsDecoder.decode_batch_predictions(preds)\n",
" orig_texts = predictionsDecoder.asStrings(batch_labels)\n",
"\n",
" displayImagesInGrid(\n",
" 4,\n",
" batch_images,\n",
" [f\"Prediction/Truth: {pred_text}/{orig_text}\" for (pred_text, orig_text) in zip(pred_texts, orig_texts)],\n",
" ['green' if pred_text == orig_text else 'red' for (pred_text, orig_text) in zip(pred_texts, orig_texts)])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def printLayers(model):\n",
" for i, layer in enumerate(model.layers):\n",
" print(i, layer.name)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "S3X_SslH3MGY"
},
"outputs": [],
"source": [
"# FK-TODO: entferne die getAccuracy()-Methode. Implementiere stattdessen https://stackoverflow.com/questions/37657260/how-to-implement-custom-metric-in-keras oder https://keras.io/api/metrics/#custom-metrics\n",
"def getAccuracy(dataset, prediction_model, ctc_decode):\n",
" accuracy = tf.keras.metrics.Accuracy()\n",
"\n",
" for batch in dataset:\n",
" accuracy.update_state(batch[\"label\"], ctc_decode(prediction_model.predict(batch[\"image\"], verbose=0)))\n",
"\n",
" return accuracy.result().numpy()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "94755hrNMf3w"
},
"source": [
"## Preparation"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "NZrKXF6P3MGY"
},
"outputs": [],
"source": [
"inColab = 'google.colab' in str(get_ipython())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "7EsmTaF03MGZ"
},
"outputs": [],
"source": [
"if inColab:\n",
" GoogleDriveManager.mount()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "S_4hl4S4BmZK"
},
"outputs": [],
"source": [
"if inColab:\n",
" !cp {GoogleDriveManager._baseFolder}/captchas.zip .\n",
" !unzip captchas.zip"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "WmUghcQaMf3y"
},
"outputs": [],
"source": [
"modelDAO = ModelDAO(inColab)\n",
"charNumConverter = CharNumConverter(CaptchaGenerator.characters)\n",
"predictionsDecoder = PredictionsDecoder(CaptchaGenerator.captchaLength, charNumConverter.num_to_char)\n",
"captchaShape = CaptchaShape()\n",
"datasetFactory = DatasetFactory(captchaShape, charNumConverter.char_to_num, batch_size = 64)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "lsLuSi7h3MGZ"
},
"source": [
"## Create And Train Base Model"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "oRcemcbG3MGa"
},
"outputs": [],
"source": [
"if inColab:\n",
" !sudo apt install ttf-mscorefonts-installer\n",
" !sudo fc-cache -f\n",
" !fc-match Arial"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "P7myCt7e2h6A"
},
"outputs": [],
"source": [
"# \"We generate 200,000 images for base model pre-training\"\n",
"captchaGenerator = CaptchaGenerator(\n",
" numCaptchas = 50, # 50, # 200000,\n",
" dataDir = Path(\"captchas/generated/VAERS/\"))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "j9apYsyI3MGb"
},
"outputs": [],
"source": [
"captchaGenerator.createAndSaveCaptchas()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "AgN4skCkMf31"
},
"outputs": [],
"source": [
"train_dataset, validation_dataset, test_dataset = getTrainValidationTestDatasets(captchaGenerator.dataDir, datasetFactory)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "RcgWHXVSNsa7"
},
"outputs": [],
"source": [
"for batch in train_dataset.take(1):\n",
" numImages2Display = 16\n",
" images = batch[\"image\"][:numImages2Display]\n",
" labels = batch[\"label\"][:numImages2Display]\n",
" displayImagesInGrid(4, images, predictionsDecoder.asStrings(labels), ['black'] * len(labels))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "zDoFYKM2hdEW"
},
"outputs": [],
"source": [
"modelFactory = ModelFactory(captchaShape, charNumConverter.char_to_num)\n",
"model = modelFactory.createMobileNetV3Small()\n",
"model.summary()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "ltXYrpjIITAb"
},
"outputs": [],
"source": [
"# \"the success rates became stable after the base-model training epochs exceeded 20\"\n",
"history = model.fit(\n",
" train_dataset,\n",
" validation_data=validation_dataset,\n",
" epochs=20)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "fPG-Yl1SJfF7"
},
"outputs": [],
"source": [
"modelDAO.saveModel(model)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "NnNHMtIGITAe"
},
"outputs": [],
"source": [
"prediction_model = ModelFactory.createPredictionModel(model)\n",
"prediction_model.summary()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "YW651ztD8sKI"
},
"outputs": [],
"source": [
"display16Predictions(prediction_model, test_dataset, predictionsDecoder)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "V5gqMBIwBmZU"
},
"outputs": [],
"source": [
"getAccuracy(test_dataset, prediction_model, predictionsDecoder.ctc_decode)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "UYxiYTH9BmZU"
},
"source": [
"## Transfer learning"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "WV8IS4KrBmZU"
},
"outputs": [],
"source": [
"# \"we collected 1,500 real CAPTCHAs from the websites. Note that only 500 of them are used for fine-tuning, and another 1,000 are applied to calculate the test accuracy\"\n",
"# FK-TODO: lade das pre-trainierte model und trainiere es mit 500 real-world-Daten aus dem Ordner captchas/VAERS/, die restlichen 540 (es sollten nach obigem Zitat aber 1,000 sein) sind dann die Test-Daten.\n",
"# see https://keras.io/guides/transfer_learning/\n",
"# see https://www.tensorflow.org/tutorials/images/transfer_learning\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"modelName, numTrainableLayers = 'MobileNetV3Small', 104\n",
"# modelName, numTrainableLayers = 'ResNet101', 348"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "D7ogEQmB3MGj"
},
"outputs": [],
"source": [
"# FK-TODO: DRY with VAERSFileDownloader\n",
"modelFilepath = f'{os.getcwd()}/captcha/{modelName}'\n",
"model = modelDAO.loadModel(modelFilepath)\n",
"model.summary(show_trainable=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "gbPigogKNFrD"
},
"outputs": [],
"source": [
"# printLayers(model)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "59quw8o3Mf34"
},
"outputs": [],
"source": [
"model.trainable = True\n",
"for layer in model.layers[:numTrainableLayers]:\n",
" layer.trainable = False"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "acGczax3Mf34"
},
"outputs": [],
"source": [
"model.summary(show_trainable=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "q7_MjUO0BmZV"
},
"outputs": [],
"source": [
"train_dataset, validation_dataset, test_dataset = getTrainValidationTestDatasets(Path(\"captcha/captchas/VAERS/\"), datasetFactory)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "dZsCpibkBmZX"
},
"outputs": [],
"source": [
"# \"The model is optimized by a stochastic gradient descent (SGD) strategy with an initial learning rate of 0.004, weight decay of 0.00004 and momentum of 0.9.\"\n",
"from tensorflow.keras.optimizers import SGD\n",
"# model.compile(optimizer=SGD(learning_rate=0.0001, momentum=0.9))\n",
"model.compile(optimizer='adam')\n",
"\n",
"# \"Therefore, in our experiments, we chose 1 epoch for the fine-tuning stage.\"\n",
"history = model.fit(\n",
" train_dataset,\n",
" validation_data=validation_dataset,\n",
" epochs=20)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "TRbJigbH3MGl"
},
"outputs": [],
"source": [
"prediction_model = ModelFactory.createPredictionModel(model)\n",
"prediction_model.summary()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "rPszfhJ4BmZX"
},
"outputs": [],
"source": [
"getAccuracy(test_dataset, prediction_model, predictionsDecoder.ctc_decode)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"id": "hfmRY1qC7aVV"
},
"outputs": [],
"source": [
"display16Predictions(prediction_model, test_dataset, predictionsDecoder)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"modelDAO.saveModel(model)"
]
}
],
"metadata": {
"accelerator": "GPU",
"colab": {
"collapsed_sections": [],
"name": "captcha.ipynb",
"private_outputs": true,
"provenance": []
},
"gpuClass": "standard",
"kernelspec": {
"display_name": "howbadismybatch-venv-kernel",
"language": "python",
"name": "howbadismybatch-venv-kernel"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.15"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

27
src/captcha/CTCLayer.py Normal file
View File

@@ -0,0 +1,27 @@
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
# see https://keras.io/guides/making_new_layers_and_models_via_subclassing/
class CTCLayer(layers.Layer):
def __init__(self, name=None):
super().__init__(name=name)
self.loss_fn = keras.backend.ctc_batch_cost
def call(self, y_true, y_pred):
# Compute the training-time loss value and add it
# to the layer using `self.add_loss()`.
batch_len = tf.cast(tf.shape(y_true)[0], dtype="int64")
input_length = tf.cast(tf.shape(y_pred)[1], dtype="int64")
label_length = tf.cast(tf.shape(y_true)[1], dtype="int64")
input_length = input_length * tf.ones(shape=(batch_len, 1), dtype="int64")
label_length = label_length * tf.ones(shape=(batch_len, 1), dtype="int64")
loss = self.loss_fn(y_true, y_pred, input_length, label_length)
self.add_loss(loss)
# At test time, just return the computed predictions
return y_pred

View File

@@ -0,0 +1,43 @@
from PIL import Image, ImageDraw, ImageFont
import random
import string
import shutil
class CaptchaGenerator:
characters = sorted(set(list(string.ascii_letters + string.digits)))
captchaLength = 6
def __init__(self, numCaptchas, dataDir):
self.numCaptchas = numCaptchas
self.dataDir = dataDir
def createAndSaveCaptchas(self):
self._prepareDataDir()
for _ in range(self.numCaptchas):
self._createAndSaveCaptcha()
def _prepareDataDir(self):
shutil.rmtree(self.dataDir, ignore_errors = True)
self.dataDir.mkdir(parents=True, exist_ok=True)
def _createAndSaveCaptcha(self):
captchaString = self._createCaptchaString()
captcha = self._createCaptcha(captchaString)
captcha.save(f"{str(self.dataDir)}/{captchaString}.jpeg")
def _createCaptchaString(self):
return ''.join(random.choice(CaptchaGenerator.characters) for _ in range(CaptchaGenerator.captchaLength))
def _createCaptcha(self, word):
image = Image.new("RGB", (360, 96), "#373737")
draw = ImageDraw.Draw(image)
font = ImageFont.truetype("ariali.ttf", size=40)
draw.text((30, 10), word[0], font=font)
draw.text((80, 30), word[1], font=font)
draw.text((135, 10), word[2], font=font)
draw.text((190, 30), word[3], font=font)
draw.text((250, 10), word[4], font=font)
draw.text((295, 30), word[5], font=font)
return image

View File

@@ -0,0 +1,31 @@
from captcha.ModelDAO import ModelDAO
from captcha.ModelFactory import ModelFactory
from captcha.PredictionsDecoder import PredictionsDecoder
from captcha.CaptchaGenerator import CaptchaGenerator
from captcha.CharNumConverter import CharNumConverter
from captcha.DatasetFactory import DatasetFactory
import numpy as np
from tensorflow import keras
class CaptchaReader:
def __init__(self, modelFilepath, captchaShape):
self.modelFilepath = modelFilepath
self.captchaShape = captchaShape
def getTextInCaptchaImage(self, captchaImageFile):
return self._getTextsInCaptchaImage(self._getCaptchaImage(captchaImageFile))[0]
def _getCaptchaImage(self, captchaImageFile):
return self._asSingleSampleBatch(DatasetFactory.encodeImage(captchaImageFile, self.captchaShape))
def _asSingleSampleBatch(self, img):
return np.expand_dims(keras.utils.img_to_array(img), axis=0)
def _getTextsInCaptchaImage(self, captchaImage):
preds = self._createPredictionModel().predict(captchaImage)
return PredictionsDecoder(CaptchaGenerator.captchaLength, CharNumConverter(CaptchaGenerator.characters).num_to_char).decode_batch_predictions(preds)
def _createPredictionModel(self):
return ModelFactory.createPredictionModel(ModelDAO(inColab=False).loadModel(self.modelFilepath))

View File

@@ -0,0 +1,22 @@
import unittest
from captcha.CaptchaReader import CaptchaReader
from captcha.CaptchaShape import CaptchaShape
import os
class CaptchaReaderTest(unittest.TestCase):
def setUp(self):
self.working_directory = os.path.dirname(__file__)
def test_getTextInCaptchaImage(self):
# Given
textInCaptchaImage = '1Ad47a'
captchaReader = CaptchaReader(
modelFilepath = f'{self.working_directory}/MobileNetV3Small',
captchaShape = CaptchaShape())
# When
textInCaptchaImageActual = captchaReader.getTextInCaptchaImage(f'{self.working_directory}/captchas/VAERS/{textInCaptchaImage}.jpeg')
# Then
self.assertEqual(textInCaptchaImageActual, textInCaptchaImage)

View File

@@ -0,0 +1,5 @@
class CaptchaShape:
def __init__(self):
self.width = 241
self.height = 62

View File

@@ -0,0 +1,10 @@
from tensorflow.keras import layers
class CharNumConverter:
def __init__(self, characters):
self.char_to_num = layers.StringLookup(vocabulary=list(characters), mask_token=None)
self.num_to_char = layers.StringLookup(
vocabulary=self.char_to_num.get_vocabulary(),
mask_token=None,
invert=True)

View File

@@ -0,0 +1,28 @@
import numpy as np
class DataSplitter:
def __init__(self, x, y):
(self.x_train, self.y_train), (x_valid_test, y_valid_test) = DataSplitter._splitData(np.array(x), np.array(y), train_size=0.7)
(self.x_valid, self.y_valid), (self.x_test, self.y_test) = DataSplitter._splitData(x_valid_test, y_valid_test, train_size=0.5)
def getTrain(self):
return (self.x_train, self.y_train)
def getValid(self):
return (self.x_valid, self.y_valid)
def getTest(self):
return (self.x_test, self.y_test)
@staticmethod
def _splitData(x, y, train_size=0.9, shuffle=True):
size = len(x)
indices = np.arange(size)
if shuffle:
np.random.shuffle(indices)
train_samples = int(size * train_size)
x_train, y_train = x[indices[:train_samples]], y[indices[:train_samples]]
x_test, y_test = x[indices[train_samples:]], y[indices[train_samples:]]
return (x_train, y_train), (x_test, y_test)

View File

@@ -0,0 +1,27 @@
import tensorflow as tf
class DatasetFactory:
def __init__(self, captchaShape, char_to_num, batch_size):
self.captchaShape = captchaShape
self.char_to_num = char_to_num
self.batch_size = batch_size
def createDataset(self, x, y):
dataset = tf.data.Dataset.from_tensor_slices((x, y))
dataset = dataset.map(self._encodeImageAndLabel, num_parallel_calls=tf.data.AUTOTUNE)
dataset = dataset.batch(self.batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)
return dataset
def _encodeImageAndLabel(self, imageFilename, label):
return {
"image": DatasetFactory.encodeImage(imageFilename, self.captchaShape),
"label": self.char_to_num(tf.strings.unicode_split(label, input_encoding="UTF-8"))}
@staticmethod
def encodeImage(imageFilename, captchaShape):
img = tf.io.read_file(imageFilename)
img = tf.io.decode_jpeg(img, channels=3)
img = tf.image.resize(img, [captchaShape.height, captchaShape.width])
return img

View File

@@ -0,0 +1,27 @@
from pathlib import Path
class GoogleDriveManager:
_googleDriveFolder = Path('/content/gdrive')
_baseFolder = _googleDriveFolder / 'MyDrive/CAPTCHA/models/'
@staticmethod
def mount():
from google.colab import drive
drive.mount(str(GoogleDriveManager._googleDriveFolder))
@staticmethod
def uploadFolderToGoogleDrive(folder):
pass
# FK-FIXME:
# !zip -r {folder}.zip {folder}/
# !cp {folder}.zip {GoogleDriveManager._baseFolder}
@staticmethod
def downloadFolderFromGoogleDrive(folder):
pass
# FK-FIXME:
# !cp {GoogleDriveManager._baseFolder}/{folder}.zip .
# !rm -rf {folder}
# !unzip {folder}.zip

Binary file not shown.

File diff suppressed because one or more lines are too long

Binary file not shown.

Binary file not shown.

20
src/captcha/ModelDAO.py Normal file
View File

@@ -0,0 +1,20 @@
from tensorflow import keras
from captcha.GoogleDriveManager import GoogleDriveManager
import shutil
class ModelDAO:
def __init__(self, inColab):
self.inColab = inColab
def saveModel(self, model):
shutil.rmtree(model.name, ignore_errors = True)
model.save(model.name)
if self.inColab:
GoogleDriveManager.uploadFolderToGoogleDrive(model.name)
def loadModel(self, modelFilepath):
if self.inColab:
GoogleDriveManager.downloadFolderFromGoogleDrive(modelFilepath)
return keras.models.load_model(modelFilepath)

102
src/captcha/ModelFactory.py Normal file
View File

@@ -0,0 +1,102 @@
from captcha.CTCLayer import CTCLayer
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
class ModelFactory:
predictionModelInputLayerName = "image"
predictionModelOutputLayerName = "dense2"
def __init__(self, captchaShape, char_to_num):
self.captchaShape = captchaShape
self.char_to_num = char_to_num
# see https://www.tensorflow.org/api_docs/python/tf/keras/applications/resnet/ResNet101
def createResNet101(self):
return self._createModel(
baseModelFactory = lambda input_tensor: tf.keras.applications.resnet.ResNet101(
input_tensor = input_tensor,
weights = 'imagenet',
include_top = False),
preprocess_input = tf.keras.applications.resnet.preprocess_input,
name = 'ResNet101')
def createMobileNetV2(self):
return self._createModel(
baseModelFactory = lambda input_tensor: tf.keras.applications.MobileNetV2(
input_tensor = input_tensor,
weights = 'imagenet',
include_top = False),
preprocess_input = tf.keras.applications.mobilenet_v2.preprocess_input,
name = 'MobileNetV2')
def createMobileNetV3Small(self):
return self._createModel(
baseModelFactory = lambda input_tensor: tf.keras.applications.MobileNetV3Small(
input_tensor = input_tensor,
minimalistic = True,
weights = 'imagenet',
include_top = False),
preprocess_input = tf.keras.applications.mobilenet_v3.preprocess_input,
name = 'MobileNetV3Small')
@staticmethod
def createPredictionModel(model):
return keras.models.Model(
model.get_layer(name=ModelFactory.predictionModelInputLayerName).input,
model.get_layer(name=ModelFactory.predictionModelOutputLayerName).output)
def _createModel(self, baseModelFactory, preprocess_input, name):
# Inputs to the model
input_image = layers.Input(
shape = (self.captchaShape.height, self.captchaShape.width, 3),
name = ModelFactory.predictionModelInputLayerName,
dtype = "float32")
labels = layers.Input(name="label", shape=(None,), dtype="float32")
image = preprocess_input(input_image)
# Transpose the image because we want the time dimension to correspond to the width of the image.
image = tf.keras.layers.Permute(dims=[2, 1, 3])(image)
base_model = baseModelFactory(image)
x = layers.Reshape(
target_shape=(base_model.output_shape[1], base_model.output_shape[2] * base_model.output_shape[3]),
name="reshape")(base_model.output)
x = layers.Dense(64, activation="relu", name="dense1")(x)
x = layers.Dropout(0.2)(x)
# RNNs
x = layers.Bidirectional(
layers.LSTM(
128,
return_sequences=True,
dropout=0.25,
unroll=False,
name="LSTM1"))(x)
x = layers.Bidirectional(
layers.LSTM(
64,
return_sequences=True,
dropout=0.25,
unroll=False,
name="LSTM2"))(x)
# Output layer
x = layers.Dense(
len(self.char_to_num.get_vocabulary()) + 1,
activation="softmax",
name=ModelFactory.predictionModelOutputLayerName)(x)
# Add CTC layer for calculating CTC loss at each step
output = CTCLayer(name="ctc_loss")(labels, x)
model = keras.models.Model(
inputs=[input_image, labels],
outputs=output,
name=name)
# "The model is optimized by a stochastic gradient descent (SGD) strategy with an initial learning rate of 0.004, weight decay of 0.00004 and momentum of 0.9."
# from tensorflow.keras.optimizers import SGD
# model.compile(optimizer=SGD(learning_rate=0.004, "weight_decay=0.00004," momentum=0.9)
model.compile(optimizer=keras.optimizers.Adam())
return model

View File

@@ -0,0 +1,24 @@
import tensorflow as tf
from tensorflow import keras
import numpy as np
class PredictionsDecoder:
def __init__(self, captchaLength, num_to_char):
self.captchaLength = captchaLength
self.num_to_char = num_to_char
def decode_batch_predictions(self, pred):
return self.asStrings(self.ctc_decode(pred))
def ctc_decode(self, pred):
input_len = np.ones(pred.shape[0]) * pred.shape[1]
# Use greedy search. For complex tasks, you can use beam search
return keras.backend.ctc_decode(pred, input_length=input_len, greedy=True)[0][0][:, :self.captchaLength]
def asStrings(self, labels):
return [self.asString(label) for label in labels]
def asString(self, label):
return tf.strings.reduce_join(self.num_to_char(label)).numpy().decode("utf-8")

0
src/captcha/__init__.py Normal file
View File

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.4 KiB

Some files were not shown because too many files have changed in this diff Show More