refactoring
This commit is contained in:
@@ -4,6 +4,7 @@ from WebDriver import getWebDriver, isCaptchaSolved, saveCaptchaImageAs
|
||||
from selenium.webdriver.common.by import By
|
||||
from captcha.CaptchaReader import CaptchaReader
|
||||
from zipUtils import unzipAndRemove
|
||||
from captcha.CaptchaShape import CaptchaShape
|
||||
|
||||
|
||||
#def getTextInCaptchaImage(captchaImageFile):
|
||||
@@ -22,7 +23,8 @@ def solveCaptchaAndStartFileDownload(driver, captchaImageFile):
|
||||
|
||||
def _createCaptchaReader():
|
||||
working_directory = os.path.dirname(__file__)
|
||||
return CaptchaReader(modelFilepath = f'{working_directory}/captcha/MobileNetV3Small')
|
||||
return CaptchaReader(modelFilepath = f'{working_directory}/captcha/MobileNetV3Small',
|
||||
captchaShape = CaptchaShape())
|
||||
|
||||
def downloadFile(absoluteFile, driver, maxTries):
|
||||
def _downloadFile():
|
||||
|
||||
@@ -25,22 +25,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "zZSwQragIS_v"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2023-03-15 10:46:02.303787: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE4.1 SSE4.2 AVX AVX2 FMA\n",
|
||||
"To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
|
||||
"/home/frankknoll/.local/lib/python3.9/site-packages/scipy/__init__.py:146: UserWarning: A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.23.5\n",
|
||||
" warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import numpy as np\n",
|
||||
@@ -54,29 +43,29 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "QB8QZJPg3MGI"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from GoogleDriveManager import GoogleDriveManager"
|
||||
"from captcha.GoogleDriveManager import GoogleDriveManager"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "C3bxU1US2blM"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from CaptchaGenerator import CaptchaGenerator"
|
||||
"from captcha.CaptchaGenerator import CaptchaGenerator"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "0DZfMrbe3MGN"
|
||||
},
|
||||
@@ -91,18 +80,18 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "sNJjugG83MGO"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from CharNumConverter import CharNumConverter"
|
||||
"from captcha.CharNumConverter import CharNumConverter"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "qxs04OTR3MGP"
|
||||
},
|
||||
@@ -137,18 +126,18 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "dAAACymS3MGR"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from DatasetFactory import DatasetFactory"
|
||||
"from captcha.DatasetFactory import DatasetFactory"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "kdL9_t03Mf3t"
|
||||
},
|
||||
@@ -170,7 +159,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "FqVSEuZp3MGT"
|
||||
},
|
||||
@@ -197,7 +186,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "apkeCHhP3MGU"
|
||||
},
|
||||
@@ -222,18 +211,18 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "st13jAjL3MGV"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from ModelFactory import ModelFactory"
|
||||
"from captcha.ModelFactory import ModelFactory"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -244,29 +233,29 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "B7GZlk2_3MGX"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from PredictionsDecoder import PredictionsDecoder"
|
||||
"from captcha.PredictionsDecoder import PredictionsDecoder"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "8Oa7avYt3MGX"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from ModelDAO import ModelDAO"
|
||||
"from captcha.ModelDAO import ModelDAO"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "S3X_SslH3MGY"
|
||||
},
|
||||
@@ -293,7 +282,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "NZrKXF6P3MGY"
|
||||
},
|
||||
@@ -304,7 +293,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "7EsmTaF03MGZ"
|
||||
},
|
||||
@@ -316,7 +305,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "S_4hl4S4BmZK"
|
||||
},
|
||||
@@ -329,7 +318,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "WmUghcQaMf3y"
|
||||
},
|
||||
@@ -340,28 +329,18 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "cpxO7yGAMf3z"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2023-03-15 10:41:54.085280: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE4.1 SSE4.2 AVX AVX2 FMA\n",
|
||||
"To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
|
||||
"2023-03-15 10:41:54.089954: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"charNumConverter = CharNumConverter(CaptchaGenerator.characters)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "tVb5nDFTMf3z"
|
||||
},
|
||||
@@ -372,24 +351,23 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"metadata": {
|
||||
"id": "t1wzlHQ-Mf3z"
|
||||
},
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"(img_width, img_height) = (241, 62)"
|
||||
"from captcha.CaptchaShape import CaptchaShape\n",
|
||||
"captchaShape = CaptchaShape()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "s35OUslsMf30"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"datasetFactory = DatasetFactory(img_height, img_width, charNumConverter.char_to_num, batch_size = 64)"
|
||||
"datasetFactory = DatasetFactory(captchaShape, charNumConverter.char_to_num, batch_size = 64)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -403,7 +381,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "oRcemcbG3MGa"
|
||||
},
|
||||
@@ -417,7 +395,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "P7myCt7e2h6A"
|
||||
},
|
||||
@@ -431,7 +409,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "j9apYsyI3MGb"
|
||||
},
|
||||
@@ -474,7 +452,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"modelFactory = ModelFactory(img_height, img_width, charNumConverter.char_to_num)"
|
||||
"modelFactory = ModelFactory(captchaShape, charNumConverter.char_to_num)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -590,7 +568,9 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model = modelDAO.loadModel(modelName)\n",
|
||||
"# FK-TODO: DRY with VAERSFileDownloader\n",
|
||||
"modelFilepath = f'{os.getcwd()}/captcha/{modelName}'\n",
|
||||
"model = modelDAO.loadModel(modelFilepath)\n",
|
||||
"model.summary(show_trainable=True)"
|
||||
]
|
||||
},
|
||||
@@ -637,7 +617,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"train_dataset, validation_dataset, test_dataset = getTrainValidationTestDatasets(Path(\"captchas/VAERS/\"), datasetFactory)"
|
||||
"train_dataset, validation_dataset, test_dataset = getTrainValidationTestDatasets(Path(\"captcha/captchas/VAERS/\"), datasetFactory)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -7,21 +7,19 @@ from captcha.DatasetFactory import DatasetFactory
|
||||
import numpy as np
|
||||
from tensorflow import keras
|
||||
|
||||
# FK-TODO: DRY with captcha.ipynb
|
||||
img_width = 241
|
||||
img_height = 62
|
||||
|
||||
class CaptchaReader:
|
||||
|
||||
def __init__(self, modelFilepath):
|
||||
def __init__(self, modelFilepath, captchaShape):
|
||||
self.modelFilepath = modelFilepath
|
||||
self.captchaShape = captchaShape
|
||||
|
||||
def getTextInCaptchaImage(self, captchaImageFile):
|
||||
# FK-TODO: refactor
|
||||
modelDAO = ModelDAO(inColab = False)
|
||||
model = modelDAO.loadModel(self.modelFilepath)
|
||||
prediction_model = ModelFactory.createPredictionModel(model)
|
||||
charNumConverter = CharNumConverter(CaptchaGenerator.characters)
|
||||
datasetFactory = DatasetFactory(img_height, img_width, charNumConverter.char_to_num, batch_size = 64)
|
||||
datasetFactory = DatasetFactory(self.captchaShape,charNumConverter.char_to_num, batch_size = 64)
|
||||
batchImages = self._asSingleSampleBatch(datasetFactory._encode_single_sample(captchaImageFile, 'dummy')['image'])
|
||||
preds = prediction_model.predict(batchImages)
|
||||
predictionsDecoder = PredictionsDecoder(CaptchaGenerator.captchaLength, charNumConverter.num_to_char)
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import unittest
|
||||
from captcha.CaptchaReader import CaptchaReader
|
||||
from captcha.CaptchaShape import CaptchaShape
|
||||
import os
|
||||
|
||||
class CaptchaReaderTest(unittest.TestCase):
|
||||
@@ -10,7 +11,9 @@ class CaptchaReaderTest(unittest.TestCase):
|
||||
def test_getTextInCaptchaImage(self):
|
||||
# Given
|
||||
textInCaptchaImage = '1Ad47a'
|
||||
captchaReader = CaptchaReader(modelFilepath = f'{self.working_directory}/MobileNetV3Small')
|
||||
captchaReader = CaptchaReader(
|
||||
modelFilepath = f'{self.working_directory}/MobileNetV3Small',
|
||||
captchaShape = CaptchaShape())
|
||||
|
||||
# When
|
||||
textInCaptchaImageActual = captchaReader.getTextInCaptchaImage(f'{self.working_directory}/captchas/VAERS/{textInCaptchaImage}.jpeg')
|
||||
|
||||
5
src/captcha/CaptchaShape.py
Normal file
5
src/captcha/CaptchaShape.py
Normal file
@@ -0,0 +1,5 @@
|
||||
class CaptchaShape:
|
||||
|
||||
def __init__(self):
|
||||
self.width = 241
|
||||
self.height = 62
|
||||
@@ -3,9 +3,8 @@ import tensorflow as tf
|
||||
|
||||
class DatasetFactory:
|
||||
|
||||
def __init__(self, img_height, img_width, char_to_num, batch_size):
|
||||
self.img_height = img_height
|
||||
self.img_width = img_width
|
||||
def __init__(self, captchaShape, char_to_num, batch_size):
|
||||
self.captchaShape = captchaShape
|
||||
self.char_to_num = char_to_num
|
||||
self.batch_size = batch_size
|
||||
|
||||
@@ -18,7 +17,7 @@ class DatasetFactory:
|
||||
def _encode_single_sample(self, img_path, label):
|
||||
img = tf.io.read_file(img_path)
|
||||
img = tf.io.decode_jpeg(img, channels=3)
|
||||
img = tf.image.resize(img, [self.img_height, self.img_width])
|
||||
img = tf.image.resize(img, [self.captchaShape.height, self.captchaShape.width])
|
||||
# Map the characters in label to numbers
|
||||
label = self.char_to_num(tf.strings.unicode_split(label, input_encoding="UTF-8"))
|
||||
# Return a dict as our model is expecting two inputs
|
||||
|
||||
@@ -9,9 +9,8 @@ class ModelFactory:
|
||||
predictionModelInputLayerName = "image"
|
||||
predictionModelOutputLayerName = "dense2"
|
||||
|
||||
def __init__(self, img_height, img_width, char_to_num):
|
||||
self.img_height = img_height
|
||||
self.img_width = img_width
|
||||
def __init__(self, captchaShape, char_to_num):
|
||||
self.captchaShape = captchaShape
|
||||
self.char_to_num = char_to_num
|
||||
|
||||
# see https://www.tensorflow.org/api_docs/python/tf/keras/applications/resnet/ResNet101
|
||||
@@ -52,7 +51,7 @@ class ModelFactory:
|
||||
def _createModel(self, baseModelFactory, preprocess_input, name):
|
||||
# Inputs to the model
|
||||
input_image = layers.Input(
|
||||
shape=(self.img_height, self.img_width, 3),
|
||||
shape = (self.captchaShape.height, self.captchaShape.width, 3),
|
||||
name = ModelFactory.predictionModelInputLayerName,
|
||||
dtype = "float32")
|
||||
labels = layers.Input(name="label", shape=(None,), dtype="float32")
|
||||
|
||||
Reference in New Issue
Block a user