From a9e7bf483392199bb83856220c1ce20150230543 Mon Sep 17 00:00:00 2001
From: frankknoll <Knoll_Frank@web.de>
Date: Wed, 15 Mar 2023 17:14:24 +0100
Subject: [PATCH] refactoring

---
 src/VAERSFileDownloader.py       |   4 +-
 src/{captcha => }/captcha.ipynb  | 108 +++++++++++++------------------
 src/captcha/CaptchaReader.py     |  10 ++-
 src/captcha/CaptchaReaderTest.py |   5 +-
 src/captcha/CaptchaShape.py      |   5 ++
 src/captcha/DatasetFactory.py    |   7 +-
 src/captcha/ModelFactory.py      |  11 ++--
 7 files changed, 68 insertions(+), 82 deletions(-)
 rename src/{captcha => }/captcha.ipynb (85%)
 create mode 100644 src/captcha/CaptchaShape.py

diff --git a/src/VAERSFileDownloader.py b/src/VAERSFileDownloader.py
index 5f98de34704..7d59a83996b 100644
--- a/src/VAERSFileDownloader.py
+++ b/src/VAERSFileDownloader.py
@@ -4,6 +4,7 @@ from WebDriver import getWebDriver, isCaptchaSolved, saveCaptchaImageAs
 from selenium.webdriver.common.by import By
 from captcha.CaptchaReader import CaptchaReader
 from zipUtils import unzipAndRemove
+from captcha.CaptchaShape import CaptchaShape
 
 
 #def getTextInCaptchaImage(captchaImageFile):
@@ -22,7 +23,8 @@ def solveCaptchaAndStartFileDownload(driver, captchaImageFile):
 
 def _createCaptchaReader():
     working_directory = os.path.dirname(__file__)
-    return CaptchaReader(modelFilepath = f'{working_directory}/captcha/MobileNetV3Small')
+    return CaptchaReader(modelFilepath = f'{working_directory}/captcha/MobileNetV3Small',
+                         captchaShape = CaptchaShape())
 
 def downloadFile(absoluteFile, driver, maxTries):
     def _downloadFile():
diff --git a/src/captcha/captcha.ipynb b/src/captcha.ipynb
similarity index 85%
rename from src/captcha/captcha.ipynb
rename to src/captcha.ipynb
index 48b04c13f33..c6ea0f2921a 100644
--- a/src/captcha/captcha.ipynb
+++ b/src/captcha.ipynb
@@ -25,22 +25,11 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 1,
+      "execution_count": null,
       "metadata": {
         "id": "zZSwQragIS_v"
       },
-      "outputs": [
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "2023-03-15 10:46:02.303787: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA\n",
-            "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
-            "/home/frankknoll/.local/lib/python3.9/site-packages/scipy/__init__.py:146: UserWarning: A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.23.5\n",
-            "  warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n"
-          ]
-        }
-      ],
+      "outputs": [],
       "source": [
         "import os\n",
         "import numpy as np\n",
@@ -54,29 +43,29 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 2,
+      "execution_count": null,
       "metadata": {
         "id": "QB8QZJPg3MGI"
       },
       "outputs": [],
       "source": [
-        "from GoogleDriveManager import GoogleDriveManager"
+        "from captcha.GoogleDriveManager import GoogleDriveManager"
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": 3,
+      "execution_count": null,
       "metadata": {
         "id": "C3bxU1US2blM"
       },
       "outputs": [],
       "source": [
-        "from CaptchaGenerator import CaptchaGenerator"
+        "from captcha.CaptchaGenerator import CaptchaGenerator"
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": 4,
+      "execution_count": null,
       "metadata": {
         "id": "0DZfMrbe3MGN"
       },
@@ -91,18 +80,18 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 5,
+      "execution_count": null,
       "metadata": {
         "id": "sNJjugG83MGO"
       },
       "outputs": [],
       "source": [
-        "from CharNumConverter import CharNumConverter"
+        "from captcha.CharNumConverter import CharNumConverter"
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": 6,
+      "execution_count": null,
       "metadata": {
         "id": "qxs04OTR3MGP"
       },
@@ -137,18 +126,18 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 7,
+      "execution_count": null,
       "metadata": {
         "id": "dAAACymS3MGR"
       },
       "outputs": [],
       "source": [
-        "from DatasetFactory import DatasetFactory"
+        "from captcha.DatasetFactory import DatasetFactory"
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": 8,
+      "execution_count": null,
       "metadata": {
         "id": "kdL9_t03Mf3t"
       },
@@ -170,7 +159,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 9,
+      "execution_count": null,
       "metadata": {
         "id": "FqVSEuZp3MGT"
       },
@@ -197,7 +186,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 10,
+      "execution_count": null,
       "metadata": {
         "id": "apkeCHhP3MGU"
       },
@@ -222,18 +211,18 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 11,
+      "execution_count": null,
       "metadata": {
         "id": "st13jAjL3MGV"
       },
       "outputs": [],
       "source": [
-        "from ModelFactory import ModelFactory"
+        "from captcha.ModelFactory import ModelFactory"
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": 12,
+      "execution_count": null,
       "metadata": {},
       "outputs": [],
       "source": [
@@ -244,29 +233,29 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 13,
+      "execution_count": null,
       "metadata": {
         "id": "B7GZlk2_3MGX"
       },
       "outputs": [],
       "source": [
-        "from PredictionsDecoder import PredictionsDecoder"
+        "from captcha.PredictionsDecoder import PredictionsDecoder"
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": 14,
+      "execution_count": null,
       "metadata": {
         "id": "8Oa7avYt3MGX"
       },
       "outputs": [],
       "source": [
-        "from ModelDAO import ModelDAO"
+        "from captcha.ModelDAO import ModelDAO"
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": 15,
+      "execution_count": null,
       "metadata": {
         "id": "S3X_SslH3MGY"
       },
@@ -293,7 +282,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 16,
+      "execution_count": null,
       "metadata": {
         "id": "NZrKXF6P3MGY"
       },
@@ -304,7 +293,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 17,
+      "execution_count": null,
       "metadata": {
         "id": "7EsmTaF03MGZ"
       },
@@ -316,7 +305,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 18,
+      "execution_count": null,
       "metadata": {
         "id": "S_4hl4S4BmZK"
       },
@@ -329,7 +318,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 19,
+      "execution_count": null,
       "metadata": {
         "id": "WmUghcQaMf3y"
       },
@@ -340,28 +329,18 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 20,
+      "execution_count": null,
       "metadata": {
         "id": "cpxO7yGAMf3z"
       },
-      "outputs": [
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "2023-03-15 10:41:54.085280: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA\n",
-            "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
-            "2023-03-15 10:41:54.089954: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.\n"
-          ]
-        }
-      ],
+      "outputs": [],
       "source": [
         "charNumConverter = CharNumConverter(CaptchaGenerator.characters)"
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": 21,
+      "execution_count": null,
       "metadata": {
         "id": "tVb5nDFTMf3z"
       },
@@ -372,24 +351,23 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 22,
-      "metadata": {
-        "id": "t1wzlHQ-Mf3z"
-      },
+      "execution_count": null,
+      "metadata": {},
       "outputs": [],
       "source": [
-        "(img_width, img_height) = (241, 62)"
+        "from captcha.CaptchaShape import CaptchaShape\n",
+        "captchaShape = CaptchaShape()"
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": 23,
+      "execution_count": null,
       "metadata": {
         "id": "s35OUslsMf30"
       },
       "outputs": [],
       "source": [
-        "datasetFactory = DatasetFactory(img_height, img_width, charNumConverter.char_to_num, batch_size = 64)"
+        "datasetFactory = DatasetFactory(captchaShape, charNumConverter.char_to_num, batch_size = 64)"
       ]
     },
     {
@@ -403,7 +381,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 24,
+      "execution_count": null,
       "metadata": {
         "id": "oRcemcbG3MGa"
       },
@@ -417,7 +395,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 25,
+      "execution_count": null,
       "metadata": {
         "id": "P7myCt7e2h6A"
       },
@@ -431,7 +409,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 27,
+      "execution_count": null,
       "metadata": {
         "id": "j9apYsyI3MGb"
       },
@@ -474,7 +452,7 @@
       },
       "outputs": [],
       "source": [
-        "modelFactory = ModelFactory(img_height, img_width, charNumConverter.char_to_num)"
+        "modelFactory = ModelFactory(captchaShape, charNumConverter.char_to_num)"
       ]
     },
     {
@@ -590,7 +568,9 @@
       },
       "outputs": [],
       "source": [
-        "model = modelDAO.loadModel(modelName)\n",
+        "# FK-TODO: DRY with VAERSFileDownloader\n",
+        "modelFilepath = f'{os.getcwd()}/captcha/{modelName}'\n",
+        "model = modelDAO.loadModel(modelFilepath)\n",
         "model.summary(show_trainable=True)"
       ]
     },
@@ -637,7 +617,7 @@
       },
       "outputs": [],
       "source": [
-        "train_dataset, validation_dataset, test_dataset = getTrainValidationTestDatasets(Path(\"captchas/VAERS/\"), datasetFactory)"
+        "train_dataset, validation_dataset, test_dataset = getTrainValidationTestDatasets(Path(\"captcha/captchas/VAERS/\"), datasetFactory)"
       ]
     },
     {
diff --git a/src/captcha/CaptchaReader.py b/src/captcha/CaptchaReader.py
index 418621cae56..df2b0c5f6e2 100644
--- a/src/captcha/CaptchaReader.py
+++ b/src/captcha/CaptchaReader.py
@@ -7,21 +7,19 @@ from captcha.DatasetFactory import DatasetFactory
 import numpy as np
 from tensorflow import keras
 
-# FK-TODO: DRY with captcha.ipynb
-img_width = 241
-img_height = 62
-
 class CaptchaReader:
 
-    def __init__(self, modelFilepath):
+    def __init__(self, modelFilepath, captchaShape):
         self.modelFilepath = modelFilepath
+        self.captchaShape = captchaShape
 
     def getTextInCaptchaImage(self, captchaImageFile):
+        # FK-TODO: refactor
         modelDAO = ModelDAO(inColab = False)
         model = modelDAO.loadModel(self.modelFilepath)
         prediction_model = ModelFactory.createPredictionModel(model)
         charNumConverter = CharNumConverter(CaptchaGenerator.characters)
-        datasetFactory = DatasetFactory(img_height, img_width, charNumConverter.char_to_num, batch_size = 64)
+        datasetFactory = DatasetFactory(self.captchaShape,charNumConverter.char_to_num, batch_size = 64)
         batchImages = self._asSingleSampleBatch(datasetFactory._encode_single_sample(captchaImageFile, 'dummy')['image'])
         preds = prediction_model.predict(batchImages)
         predictionsDecoder = PredictionsDecoder(CaptchaGenerator.captchaLength, charNumConverter.num_to_char)
diff --git a/src/captcha/CaptchaReaderTest.py b/src/captcha/CaptchaReaderTest.py
index 8a4050ca4e8..3a9112f5b47 100644
--- a/src/captcha/CaptchaReaderTest.py
+++ b/src/captcha/CaptchaReaderTest.py
@@ -1,5 +1,6 @@
 import unittest
 from captcha.CaptchaReader import CaptchaReader
+from captcha.CaptchaShape import CaptchaShape
 import os
 
 class CaptchaReaderTest(unittest.TestCase):
@@ -10,7 +11,9 @@ class CaptchaReaderTest(unittest.TestCase):
     def test_getTextInCaptchaImage(self):
         # Given
         textInCaptchaImage = '1Ad47a'
-        captchaReader = CaptchaReader(modelFilepath = f'{self.working_directory}/MobileNetV3Small')
+        captchaReader = CaptchaReader(
+            modelFilepath = f'{self.working_directory}/MobileNetV3Small',
+            captchaShape = CaptchaShape())
 
         # When
         textInCaptchaImageActual = captchaReader.getTextInCaptchaImage(f'{self.working_directory}/captchas/VAERS/{textInCaptchaImage}.jpeg')
diff --git a/src/captcha/CaptchaShape.py b/src/captcha/CaptchaShape.py
new file mode 100644
index 00000000000..ff46a10f315
--- /dev/null
+++ b/src/captcha/CaptchaShape.py
@@ -0,0 +1,5 @@
+class CaptchaShape:
+
+    def __init__(self):
+        self.width = 241
+        self.height = 62
diff --git a/src/captcha/DatasetFactory.py b/src/captcha/DatasetFactory.py
index c09b4b53bd2..aac88cee4e9 100644
--- a/src/captcha/DatasetFactory.py
+++ b/src/captcha/DatasetFactory.py
@@ -3,9 +3,8 @@ import tensorflow as tf
 
 class DatasetFactory:
     
-    def __init__(self, img_height, img_width, char_to_num, batch_size):
-        self.img_height = img_height
-        self.img_width = img_width
+    def __init__(self, captchaShape, char_to_num, batch_size):
+        self.captchaShape = captchaShape
         self.char_to_num = char_to_num
         self.batch_size = batch_size
 
@@ -18,7 +17,7 @@ class DatasetFactory:
     def _encode_single_sample(self, img_path, label):
         img = tf.io.read_file(img_path)
         img = tf.io.decode_jpeg(img, channels=3)
-        img = tf.image.resize(img, [self.img_height, self.img_width])
+        img = tf.image.resize(img, [self.captchaShape.height, self.captchaShape.width])
         # Map the characters in label to numbers
         label = self.char_to_num(tf.strings.unicode_split(label, input_encoding="UTF-8"))
         # Return a dict as our model is expecting two inputs
diff --git a/src/captcha/ModelFactory.py b/src/captcha/ModelFactory.py
index a829b5b2dff..07deafabb05 100644
--- a/src/captcha/ModelFactory.py
+++ b/src/captcha/ModelFactory.py
@@ -9,9 +9,8 @@ class ModelFactory:
     predictionModelInputLayerName = "image"
     predictionModelOutputLayerName = "dense2"
 
-    def __init__(self, img_height, img_width, char_to_num):
-        self.img_height = img_height
-        self.img_width = img_width
+    def __init__(self, captchaShape, char_to_num):
+        self.captchaShape = captchaShape
         self.char_to_num = char_to_num
 
     # see https://www.tensorflow.org/api_docs/python/tf/keras/applications/resnet/ResNet101
@@ -52,9 +51,9 @@ class ModelFactory:
     def _createModel(self, baseModelFactory, preprocess_input, name):
         # Inputs to the model
         input_image = layers.Input(
-            shape=(self.img_height, self.img_width, 3),
-            name=ModelFactory.predictionModelInputLayerName,
-            dtype="float32")
+            shape = (self.captchaShape.height, self.captchaShape.width, 3),
+            name = ModelFactory.predictionModelInputLayerName,
+            dtype = "float32")
         labels = layers.Input(name="label", shape=(None,), dtype="float32")
         
         image = preprocess_input(input_image)