adding some VAERS captchas

This commit is contained in:
frankknoll
2023-03-14 17:37:05 +01:00
parent 253e7ff425
commit eed59e2315
1041 changed files with 953 additions and 0 deletions

953
src/captcha.ipynb Normal file
View File

@@ -0,0 +1,953 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "UNKC5YSEIS_d"
},
"source": [
"# Captchas\n",
"\n",
"**see:** https://keras.io/examples/vision/captcha_ocr/<br>\n",
"**original:** https://colab.research.google.com/drive/1Olw2KMHfPlnGaYuzffl2zb6D1etlBGZf?usp=sharing<br>\n",
"**View Github version in Colab:** <a href=\"https://colab.research.google.com/github/KnollFrank/2captcha-worker-assistant-server/blob/master/captcha_ocr_trainAndSaveModel_colab.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a><br>\n",
"**paper:** Simple and Easy: Transfer Learning-Based Attacks to Text CAPTCHA<br>"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "wRUsVuIiIS_s"
},
"source": [
"## Setup"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "zZSwQragIS_v"
},
"outputs": [],
"source": [
"import os\n",
"import numpy as np\n",
"\n",
"from pathlib import Path\n",
"\n",
"import tensorflow as tf\n",
"from tensorflow import keras\n",
"from tensorflow.keras import layers\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "QB8QZJPg3MGI"
},
"outputs": [],
"source": [
"class GoogleDriveManager:\n",
" \n",
" _googleDriveFolder = Path('/content/gdrive')\n",
" _baseFolder = _googleDriveFolder / 'MyDrive/CAPTCHA/models/'\n",
"\n",
" @staticmethod\n",
" def mount():\n",
" from google.colab import drive\n",
" drive.mount(str(GoogleDriveManager._googleDriveFolder))\n",
"\n",
" @staticmethod\n",
" def uploadFolderToGoogleDrive(folder):\n",
" !zip -r {folder}.zip {folder}/\n",
" !cp {folder}.zip {GoogleDriveManager._baseFolder}\n",
"\n",
" @staticmethod\n",
" def downloadFolderFromGoogleDrive(folder):\n",
" !cp {GoogleDriveManager._baseFolder}/{folder}.zip .\n",
" !rm -rf {folder}\n",
" !unzip {folder}.zip\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "C3bxU1US2blM"
},
"outputs": [],
"source": [
"from PIL import Image, ImageDraw, ImageFont\n",
"import random\n",
"import string\n",
"from pathlib import Path\n",
"\n",
"\n",
"class CaptchaGenerator:\n",
"\n",
" characters = sorted(set(list(string.ascii_letters + string.digits)))\n",
" captchaLength = 6\n",
"\n",
" def __init__(self, numCaptchas, dataDir):\n",
" self.numCaptchas = numCaptchas\n",
" self.dataDir = dataDir\n",
"\n",
" def createAndSaveCaptchas(self):\n",
" self._prepareDataDir()\n",
" for _ in range(self.numCaptchas):\n",
" self._createAndSaveCaptcha()\n",
"\n",
" def _prepareDataDir(self):\n",
" !rm -fr {self.dataDir}\n",
" self.dataDir.mkdir(parents=True, exist_ok=True)\n",
"\n",
" def _createAndSaveCaptcha(self):\n",
" captchaString = self._createCaptchaString()\n",
" captcha = self._createCaptcha(captchaString)\n",
" captcha.save(f\"{str(self.dataDir)}/{captchaString}.jpeg\")\n",
"\n",
" def _createCaptchaString(self):\n",
" return ''.join(random.choice(CaptchaGenerator.characters) for _ in range(CaptchaGenerator.captchaLength))\n",
"\n",
" def _createCaptcha(self, word):\n",
" image = Image.new(\"RGB\", (360, 96), \"#373737\")\n",
" draw = ImageDraw.Draw(image)\n",
" font = ImageFont.truetype(\"ariali.ttf\", size=40)\n",
" draw.text((30, 10), word[0], font=font)\n",
" draw.text((80, 30), word[1], font=font)\n",
" draw.text((135, 10), word[2], font=font)\n",
" draw.text((190, 30), word[3], font=font)\n",
" draw.text((250, 10), word[4], font=font)\n",
" draw.text((295, 30), word[5], font=font)\n",
" return image\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "0DZfMrbe3MGN"
},
"outputs": [],
"source": [
"def getImagesAndLabels(dataDir):\n",
" fileSuffix = \".jpeg\"\n",
" images = sorted(list(map(str, list(dataDir.glob(\"*\" + fileSuffix)))))\n",
" labels = [image.split(os.path.sep)[-1].split(fileSuffix)[0] for image in images]\n",
" return images, labels\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "sNJjugG83MGO"
},
"outputs": [],
"source": [
"class CharNumConverter:\n",
"\n",
" def __init__(self, characters):\n",
" self.char_to_num = layers.StringLookup(vocabulary=list(characters), mask_token=None)\n",
" self.num_to_char = layers.StringLookup(\n",
" vocabulary=self.char_to_num.get_vocabulary(),\n",
" mask_token=None,\n",
" invert=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "qxs04OTR3MGP"
},
"outputs": [],
"source": [
"class DataSplitter:\n",
"\n",
" def __init__(self, x, y):\n",
" (self.x_train, self.y_train), (x_valid_test, y_valid_test) = DataSplitter._splitData(np.array(x), np.array(y), train_size=0.7)\n",
" (self.x_valid, self.y_valid), (self.x_test, self.y_test) = DataSplitter._splitData(x_valid_test, y_valid_test, train_size=0.5)\n",
"\n",
" def getTrain(self):\n",
" return (self.x_train, self.y_train)\n",
"\n",
" def getValid(self):\n",
" return (self.x_valid, self.y_valid)\n",
"\n",
" def getTest(self):\n",
" return (self.x_test, self.y_test)\n",
"\n",
" @staticmethod\n",
" def _splitData(x, y, train_size=0.9, shuffle=True):\n",
" size = len(x)\n",
" indices = np.arange(size)\n",
" if shuffle:\n",
" np.random.shuffle(indices)\n",
" train_samples = int(size * train_size)\n",
" x_train, y_train = x[indices[:train_samples]], y[indices[:train_samples]]\n",
" x_test, y_test = x[indices[train_samples:]], y[indices[train_samples:]]\n",
" return (x_train, y_train), (x_test, y_test)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "dAAACymS3MGR"
},
"outputs": [],
"source": [
"class DatasetFactory:\n",
" \n",
" def __init__(self, img_height, img_width, char_to_num, batch_size):\n",
" self.img_height = img_height\n",
" self.img_width = img_width\n",
" self.char_to_num = char_to_num\n",
" self.batch_size = batch_size\n",
"\n",
" def createDataset(self, x, y):\n",
" dataset = tf.data.Dataset.from_tensor_slices((x, y))\n",
" dataset = dataset.map(self._encode_single_sample, num_parallel_calls=tf.data.AUTOTUNE)\n",
" dataset = dataset.batch(self.batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)\n",
" return dataset\n",
"\n",
" def _encode_single_sample(self, img_path, label):\n",
" img = tf.io.read_file(img_path)\n",
" img = tf.io.decode_jpeg(img, channels=3)\n",
" img = tf.image.resize(img, [self.img_height, self.img_width])\n",
" # Map the characters in label to numbers\n",
" label = self.char_to_num(tf.strings.unicode_split(label, input_encoding=\"UTF-8\"))\n",
" # Return a dict as our model is expecting two inputs\n",
" return {\"image\": img, \"label\": label}\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "kdL9_t03Mf3t"
},
"outputs": [],
"source": [
"def getTrainValidationTestDatasets(dataDir, datasetFactory):\n",
" images, labels = getImagesAndLabels(dataDir)\n",
" print(\"Number of images found:\", len(images))\n",
" print(\"Characters:\", CaptchaGenerator.characters)\n",
"\n",
" dataSplitter = DataSplitter(images, labels)\n",
" \n",
" return (\n",
" datasetFactory.createDataset(*dataSplitter.getTrain()),\n",
" datasetFactory.createDataset(*dataSplitter.getValid()),\n",
" datasetFactory.createDataset(*dataSplitter.getTest())\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "FqVSEuZp3MGT"
},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"import math\n",
"\n",
"def displayImagesInGrid(numGridCols, images, titles, titleColors):\n",
" assert len(images) == len(titles) == len(titleColors)\n",
" images = [image.numpy().astype(np.uint8) for image in images]\n",
" numGridRows = math.ceil(len(images) / numGridCols)\n",
" _, axs = plt.subplots(numGridRows, numGridCols, figsize=(15, 5))\n",
" for row in range(numGridRows):\n",
" for col in range(numGridCols):\n",
" ax = axs[row, col]\n",
" ax.axis(\"off\")\n",
" i = row * numGridCols + col\n",
" if(i < len(images)):\n",
" ax.imshow(images[i])\n",
" ax.set_title(titles[i], color=titleColors[i])\n",
" plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "apkeCHhP3MGU"
},
"outputs": [],
"source": [
"def display16Predictions(model, dataset, predictionsDecoder):\n",
" for batch in dataset.take(1):\n",
" numPredictions2Display = 16\n",
" batch_images = batch[\"image\"][:numPredictions2Display]\n",
" batch_labels = batch[\"label\"][:numPredictions2Display]\n",
"\n",
" preds = model.predict(batch_images)\n",
" pred_texts = predictionsDecoder.decode_batch_predictions(preds)\n",
" orig_texts = predictionsDecoder.asStrings(batch_labels)\n",
"\n",
" displayImagesInGrid(\n",
" 4,\n",
" batch_images,\n",
" [f\"Prediction/Truth: {pred_text}/{orig_text}\" for (pred_text, orig_text) in zip(pred_texts, orig_texts)],\n",
" ['green' if pred_text == orig_text else 'red' for (pred_text, orig_text) in zip(pred_texts, orig_texts)])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "st13jAjL3MGV"
},
"outputs": [],
"source": [
"# see https://keras.io/guides/making_new_layers_and_models_via_subclassing/\n",
"class CTCLayer(layers.Layer):\n",
" \n",
" def __init__(self, name=None):\n",
" super().__init__(name=name)\n",
" self.loss_fn = keras.backend.ctc_batch_cost\n",
"\n",
" def call(self, y_true, y_pred):\n",
" # Compute the training-time loss value and add it\n",
" # to the layer using `self.add_loss()`.\n",
" batch_len = tf.cast(tf.shape(y_true)[0], dtype=\"int64\")\n",
" input_length = tf.cast(tf.shape(y_pred)[1], dtype=\"int64\")\n",
" label_length = tf.cast(tf.shape(y_true)[1], dtype=\"int64\")\n",
"\n",
" input_length = input_length * tf.ones(shape=(batch_len, 1), dtype=\"int64\")\n",
" label_length = label_length * tf.ones(shape=(batch_len, 1), dtype=\"int64\")\n",
"\n",
" loss = self.loss_fn(y_true, y_pred, input_length, label_length)\n",
" self.add_loss(loss)\n",
"\n",
" # At test time, just return the computed predictions\n",
" return y_pred\n",
"\n",
"\n",
"class ModelFactory:\n",
" \n",
" predictionModelInputLayerName = \"image\"\n",
" predictionModelOutputLayerName = \"dense2\"\n",
"\n",
" def __init__(self, img_height, img_width, char_to_num):\n",
" self.img_height = img_height\n",
" self.img_width = img_width\n",
" self.char_to_num = char_to_num\n",
"\n",
" # see https://www.tensorflow.org/api_docs/python/tf/keras/applications/resnet/ResNet101\n",
" def createResNet101(self):\n",
" return self._createModel(\n",
" baseModelFactory = lambda input_tensor: tf.keras.applications.resnet.ResNet101(\n",
" input_tensor = input_tensor,\n",
" weights = 'imagenet',\n",
" include_top = False),\n",
" preprocess_input = tf.keras.applications.resnet.preprocess_input,\n",
" name = 'ResNet101')\n",
"\n",
" def createMobileNetV2(self):\n",
" return self._createModel(\n",
" baseModelFactory = lambda input_tensor: tf.keras.applications.MobileNetV2(\n",
" input_tensor = input_tensor,\n",
" weights = 'imagenet',\n",
" include_top = False),\n",
" preprocess_input = tf.keras.applications.mobilenet_v2.preprocess_input,\n",
" name = 'MobileNetV2')\n",
"\n",
" def createMobileNetV3Small(self):\n",
" return self._createModel(\n",
" baseModelFactory = lambda input_tensor: tf.keras.applications.MobileNetV3Small(\n",
" input_tensor = input_tensor,\n",
" minimalistic = True,\n",
" weights = 'imagenet',\n",
" include_top = False),\n",
" preprocess_input = tf.keras.applications.mobilenet_v3.preprocess_input,\n",
" name = 'MobileNetV3Small')\n",
" \n",
" @staticmethod\n",
" def createPredictionModel(model):\n",
" return keras.models.Model(\n",
" model.get_layer(name=ModelFactory.predictionModelInputLayerName).input,\n",
" model.get_layer(name=ModelFactory.predictionModelOutputLayerName).output)\n",
"\n",
" def _createModel(self, baseModelFactory, preprocess_input, name):\n",
" # Inputs to the model\n",
" input_image = layers.Input(\n",
" shape=(self.img_height, self.img_width, 3),\n",
" name=ModelFactory.predictionModelInputLayerName,\n",
" dtype=\"float32\")\n",
" labels = layers.Input(name=\"label\", shape=(None,), dtype=\"float32\")\n",
" \n",
" image = preprocess_input(input_image)\n",
" # Transpose the image because we want the time dimension to correspond to the width of the image.\n",
" image = tf.keras.layers.Permute(dims=[2, 1, 3])(image)\n",
" base_model = baseModelFactory(image)\n",
" x = layers.Reshape(\n",
" target_shape=(base_model.output_shape[1], base_model.output_shape[2] * base_model.output_shape[3]),\n",
" name=\"reshape\")(base_model.output)\n",
" x = layers.Dense(64, activation=\"relu\", name=\"dense1\")(x)\n",
" x = layers.Dropout(0.2)(x)\n",
"\n",
" # RNNs\n",
" x = layers.Bidirectional(\n",
" layers.LSTM(\n",
" 128,\n",
" return_sequences=True,\n",
" dropout=0.25,\n",
" unroll=False,\n",
" name=\"LSTM1\"))(x)\n",
" x = layers.Bidirectional(\n",
" layers.LSTM(\n",
" 64,\n",
" return_sequences=True,\n",
" dropout=0.25,\n",
" unroll=False,\n",
" name=\"LSTM2\"))(x)\n",
"\n",
" # Output layer\n",
" x = layers.Dense(\n",
" len(self.char_to_num.get_vocabulary()) + 1,\n",
" activation=\"softmax\",\n",
" name=ModelFactory.predictionModelOutputLayerName)(x)\n",
"\n",
" # Add CTC layer for calculating CTC loss at each step\n",
" output = CTCLayer(name=\"ctc_loss\")(labels, x)\n",
"\n",
" model = keras.models.Model(\n",
" inputs=[input_image, labels],\n",
" outputs=output,\n",
" name=name)\n",
" # \"The model is optimized by a stochastic gradient descent (SGD) strategy with an initial learning rate of 0.004, weight decay of 0.00004 and momentum of 0.9.\"\n",
" # from tensorflow.keras.optimizers import SGD\n",
" # model.compile(optimizer=SGD(learning_rate=0.004, \"weight_decay=0.00004,\" momentum=0.9)\n",
" model.compile(optimizer=keras.optimizers.Adam())\n",
" return model\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def printLayers(model):\n",
" for i, layer in enumerate(model.layers):\n",
" print(i, layer.name)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "B7GZlk2_3MGX"
},
"outputs": [],
"source": [
"class PredictionsDecoder:\n",
"\n",
" def __init__(self, captchaLength, num_to_char):\n",
" self.captchaLength = captchaLength\n",
" self.num_to_char = num_to_char\n",
"\n",
" def decode_batch_predictions(self, pred):\n",
" return self.asStrings(self.ctc_decode(pred))\n",
"\n",
" def ctc_decode(self, pred):\n",
" input_len = np.ones(pred.shape[0]) * pred.shape[1]\n",
" # Use greedy search. For complex tasks, you can use beam search\n",
" return keras.backend.ctc_decode(pred, input_length=input_len, greedy=True)[0][0][:, :self.captchaLength]\n",
"\n",
" def asStrings(self, labels):\n",
" return [self.asString(label) for label in labels]\n",
"\n",
" def asString(self, label):\n",
" return tf.strings.reduce_join(self.num_to_char(label)).numpy().decode(\"utf-8\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "8Oa7avYt3MGX"
},
"outputs": [],
"source": [
"class ModelDAO:\n",
"\n",
" def __init__(self, inColab):\n",
" self.inColab = inColab\n",
"\n",
" def saveModel(self, model):\n",
" !rm -rf {model.name}\n",
" model.save(model.name)\n",
" if self.inColab:\n",
" GoogleDriveManager.uploadFolderToGoogleDrive(model.name)\n",
"\n",
" def loadModel(self, modelName):\n",
" if self.inColab:\n",
" GoogleDriveManager.downloadFolderFromGoogleDrive(modelName)\n",
" return keras.models.load_model(modelName)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "S3X_SslH3MGY"
},
"outputs": [],
"source": [
"# FK-TODO: entferne die getAccuracy()-Methode. Implementiere stattdessen https://stackoverflow.com/questions/37657260/how-to-implement-custom-metric-in-keras oder https://keras.io/api/metrics/#custom-metrics\n",
"def getAccuracy(dataset, prediction_model, ctc_decode):\n",
" accuracy = tf.keras.metrics.Accuracy()\n",
"\n",
" for batch in dataset:\n",
" accuracy.update_state(batch[\"label\"], ctc_decode(prediction_model.predict(batch[\"image\"], verbose=0)))\n",
"\n",
" return accuracy.result().numpy()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "94755hrNMf3w"
},
"source": [
"## Preparation"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "NZrKXF6P3MGY"
},
"outputs": [],
"source": [
"inColab = 'google.colab' in str(get_ipython())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "7EsmTaF03MGZ"
},
"outputs": [],
"source": [
"if inColab:\n",
" GoogleDriveManager.mount()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "S_4hl4S4BmZK"
},
"outputs": [],
"source": [
"if inColab:\n",
" !cp {GoogleDriveManager._baseFolder}/captchas.zip .\n",
" !unzip captchas.zip"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "WmUghcQaMf3y"
},
"outputs": [],
"source": [
"modelDAO = ModelDAO(inColab)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "cpxO7yGAMf3z"
},
"outputs": [],
"source": [
"charNumConverter = CharNumConverter(CaptchaGenerator.characters)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "tVb5nDFTMf3z"
},
"outputs": [],
"source": [
"predictionsDecoder = PredictionsDecoder(CaptchaGenerator.captchaLength, charNumConverter.num_to_char)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "t1wzlHQ-Mf3z"
},
"outputs": [],
"source": [
"(img_width, img_height) = (241, 62)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "s35OUslsMf30"
},
"outputs": [],
"source": [
"datasetFactory = DatasetFactory(img_height, img_width, charNumConverter.char_to_num, batch_size = 64)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "lsLuSi7h3MGZ"
},
"source": [
"## Create And Train Base Model"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "oRcemcbG3MGa"
},
"outputs": [],
"source": [
"if inColab:\n",
" !sudo apt install ttf-mscorefonts-installer\n",
" !sudo fc-cache -f\n",
" !fc-match Arial"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "P7myCt7e2h6A"
},
"outputs": [],
"source": [
"# \"We generate 200,000 images for base model pre-training\"\n",
"captchaGenerator = CaptchaGenerator(\n",
" numCaptchas = 200000, # 50, # 200000,\n",
" dataDir = Path(\"captchas/generated/VAERS/\"))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "j9apYsyI3MGb"
},
"outputs": [],
"source": [
"captchaGenerator.createAndSaveCaptchas()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "AgN4skCkMf31"
},
"outputs": [],
"source": [
"train_dataset, validation_dataset, test_dataset = getTrainValidationTestDatasets(captchaGenerator.dataDir, datasetFactory)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "RcgWHXVSNsa7"
},
"outputs": [],
"source": [
"for batch in train_dataset.take(1):\n",
" numImages2Display = 16\n",
" images = batch[\"image\"][:numImages2Display]\n",
" labels = batch[\"label\"][:numImages2Display]\n",
" displayImagesInGrid(4, images, predictionsDecoder.asStrings(labels), ['black'] * len(labels))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "V8ELN-qJ3MGe"
},
"outputs": [],
"source": [
"modelFactory = ModelFactory(img_height, img_width, charNumConverter.char_to_num)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "zDoFYKM2hdEW"
},
"outputs": [],
"source": [
"model = modelFactory.createMobileNetV3Small()\n",
"model.summary()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "ltXYrpjIITAb"
},
"outputs": [],
"source": [
"# \"the success rates became stable after the base-model training epochs exceeded 20\"\n",
"history = model.fit(\n",
" train_dataset,\n",
" validation_data=validation_dataset,\n",
" epochs=20)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "fPG-Yl1SJfF7"
},
"outputs": [],
"source": [
"modelDAO.saveModel(model)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "NnNHMtIGITAe"
},
"outputs": [],
"source": [
"prediction_model = ModelFactory.createPredictionModel(model)\n",
"prediction_model.summary()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "YW651ztD8sKI"
},
"outputs": [],
"source": [
"display16Predictions(prediction_model, test_dataset, predictionsDecoder)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "V5gqMBIwBmZU"
},
"outputs": [],
"source": [
"getAccuracy(test_dataset, prediction_model, predictionsDecoder.ctc_decode)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "UYxiYTH9BmZU"
},
"source": [
"## Transfer learning"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "WV8IS4KrBmZU"
},
"outputs": [],
"source": [
"# \"we collected 1,500 real CAPTCHAs from the websites. Note that only 500 of them are used for fine-tuning, and another 1,000 are applied to calculate the test accuracy\"\n",
"# FK-TODO: lade das pre-trainierte model und trainiere es mit 500 real-world-Daten aus dem Ordner captchas/VAERS/, die restlichen 540 (es sollten nach obigem Zitat aber 1,000 sein) sind dann die Test-Daten.\n",
"# see https://keras.io/guides/transfer_learning/\n",
"# see https://www.tensorflow.org/tutorials/images/transfer_learning\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"modelName, numTrainableLayers = 'MobileNetV3Small', 104\n",
"# modelName, numTrainableLayers = 'ResNet101', 348"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "D7ogEQmB3MGj"
},
"outputs": [],
"source": [
"model = modelDAO.loadModel(modelName)\n",
"model.summary(show_trainable=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "gbPigogKNFrD"
},
"outputs": [],
"source": [
"# printLayers(model)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "59quw8o3Mf34"
},
"outputs": [],
"source": [
"model.trainable = True\n",
"for layer in model.layers[:numTrainableLayers]:\n",
" layer.trainable = False"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "acGczax3Mf34"
},
"outputs": [],
"source": [
"model.summary(show_trainable=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "q7_MjUO0BmZV"
},
"outputs": [],
"source": [
"train_dataset, validation_dataset, test_dataset = getTrainValidationTestDatasets(Path(\"captchas/VAERS/\"), datasetFactory)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "dZsCpibkBmZX"
},
"outputs": [],
"source": [
"# \"The model is optimized by a stochastic gradient descent (SGD) strategy with an initial learning rate of 0.004, weight decay of 0.00004 and momentum of 0.9.\"\n",
"from tensorflow.keras.optimizers import SGD\n",
"# model.compile(optimizer=SGD(learning_rate=0.0001, momentum=0.9))\n",
"model.compile(optimizer='adam')\n",
"\n",
"# \"Therefore, in our experiments, we chose 1 epoch for the fine-tuning stage.\"\n",
"history = model.fit(\n",
" train_dataset,\n",
" validation_data=validation_dataset,\n",
" epochs=20)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "TRbJigbH3MGl"
},
"outputs": [],
"source": [
"prediction_model = ModelFactory.createPredictionModel(model)\n",
"prediction_model.summary()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "rPszfhJ4BmZX"
},
"outputs": [],
"source": [
"getAccuracy(test_dataset, prediction_model, predictionsDecoder.ctc_decode)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"id": "hfmRY1qC7aVV"
},
"outputs": [],
"source": [
"display16Predictions(prediction_model, test_dataset, predictionsDecoder)"
]
}
],
"metadata": {
"accelerator": "GPU",
"colab": {
"collapsed_sections": [],
"name": "captcha.ipynb",
"private_outputs": true,
"provenance": []
},
"gpuClass": "standard",
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.7"
},
"vscode": {
"interpreter": {
"hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1"
}
}
},
"nbformat": 4,
"nbformat_minor": 0
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.0 KiB

Some files were not shown because too many files have changed in this diff Show More