Merge branch 'main' into pages

This commit is contained in:
frankknoll
2023-03-18 01:07:38 +01:00
6 changed files with 146 additions and 76 deletions

View File

@@ -16,6 +16,7 @@ dependencies:
- nb_conda_kernels - nb_conda_kernels
- pillow - pillow
- openpyxl - openpyxl
- tqdm
# - python-decouple # - python-decouple
# - selenium # - selenium
# - webdriver-manager # - webdriver-manager

View File

@@ -27,15 +27,90 @@
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {
"id": "zZSwQragIS_v" "id": "ioGwCR3Xl31V"
},
"outputs": [],
"source": [
"import sys\n",
"sys.argv = sys.argv[:1]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "l-coMy_2l31X"
},
"outputs": [],
"source": [
"def isInColab():\n",
" try:\n",
" import colab\n",
" return True\n",
" except:\n",
" return False"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "goO0feQwl31Y"
},
"outputs": [],
"source": [
"inColab = isInColab()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "nsE9VWCel31Z"
},
"outputs": [],
"source": [
"if inColab:\n",
" branch = 'read-captcha'\n",
" !git clone https://github.com/KnollFrank/HowBadIsMyBatch.git\n",
" !cd HowBadIsMyBatch; git checkout $branch"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "l9qhlDVNl31b"
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"import os\n", "import os\n",
"srcPath = '/content/HowBadIsMyBatch/src' if inColab else os.getcwd()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "c-2fE6vZsD7a"
},
"outputs": [],
"source": [
"if inColab:\n",
" sys.path.insert(0, srcPath)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "zZSwQragIS_v"
},
"outputs": [],
"source": [
"import numpy as np\n", "import numpy as np\n",
"from pathlib import Path\n", "from pathlib import Path\n",
"import tensorflow as tf\n", "import tensorflow as tf\n",
"from captcha.GoogleDriveManager import GoogleDriveManager\n",
"from captcha.CaptchaGenerator import CaptchaGenerator\n", "from captcha.CaptchaGenerator import CaptchaGenerator\n",
"from captcha.CharNumConverter import CharNumConverter\n", "from captcha.CharNumConverter import CharNumConverter\n",
"from captcha.DataSplitter import DataSplitter\n", "from captcha.DataSplitter import DataSplitter\n",
@@ -46,6 +121,40 @@
"from captcha.CaptchaShape import CaptchaShape" "from captcha.CaptchaShape import CaptchaShape"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "BWqAvnVOl31d"
},
"outputs": [],
"source": [
"from pathlib import Path\n",
"\n",
"class GoogleDriveManager:\n",
" \n",
" _googleDriveFolder = Path('/content/gdrive')\n",
" _baseFolder = _googleDriveFolder / 'MyDrive/CAPTCHA/models/'\n",
"\n",
" @staticmethod\n",
" def mount():\n",
" from google.colab import drive\n",
" drive.mount(str(GoogleDriveManager._googleDriveFolder))\n",
"\n",
" @staticmethod\n",
" def uploadFolderToGoogleDrive(folder):\n",
" basename = !basename {folder}\n",
" basename = basename[0]\n",
" !cd {folder}/..; zip -r {basename}.zip {basename}/\n",
" !cd {folder}/..; cp {basename}.zip {GoogleDriveManager._baseFolder}\n",
" \n",
" @staticmethod\n",
" def downloadFolderFromGoogleDrive(folder):\n",
" !cp {GoogleDriveManager._baseFolder}/{folder}.zip .\n",
" !rm -rf {folder}\n",
" !unzip {folder}.zip\n"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
@@ -138,7 +247,9 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"id": "HEKh6eval31k"
},
"outputs": [], "outputs": [],
"source": [ "source": [
"def printLayers(model):\n", "def printLayers(model):\n",
@@ -173,17 +284,6 @@
"## Preparation" "## Preparation"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "NZrKXF6P3MGY"
},
"outputs": [],
"source": [
"inColab = 'google.colab' in str(get_ipython())"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
@@ -200,28 +300,28 @@
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {
"id": "S_4hl4S4BmZK" "id": "WmUghcQaMf3y"
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"if inColab:\n", "modelDAO = ModelDAO()\n",
" !cp {GoogleDriveManager._baseFolder}/captchas.zip .\n", "charNumConverter = CharNumConverter(CaptchaGenerator.characters)\n",
" !unzip captchas.zip" "predictionsDecoder = PredictionsDecoder(CaptchaGenerator.captchaLength, charNumConverter.num_to_char)\n",
"captchaShape = CaptchaShape()\n",
"datasetFactory = DatasetFactory(captchaShape, charNumConverter.char_to_num, batch_size = 64)"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {},
"id": "WmUghcQaMf3y"
},
"outputs": [], "outputs": [],
"source": [ "source": [
"modelDAO = ModelDAO(inColab)\n", "def saveModel(model):\n",
"charNumConverter = CharNumConverter(CaptchaGenerator.characters)\n", " modelFilepath = f'{srcPath}/captcha/{model.name}'\n",
"predictionsDecoder = PredictionsDecoder(CaptchaGenerator.captchaLength, charNumConverter.num_to_char)\n", " modelDAO.saveModel(model, modelFilepath)\n",
"captchaShape = CaptchaShape()\n", " if inColab:\n",
"datasetFactory = DatasetFactory(captchaShape, charNumConverter.char_to_num, batch_size = 64)" " GoogleDriveManager.uploadFolderToGoogleDrive(modelFilepath)"
] ]
}, },
{ {
@@ -242,6 +342,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"if inColab:\n", "if inColab:\n",
" !apt-get update\n",
" !sudo apt install ttf-mscorefonts-installer\n", " !sudo apt install ttf-mscorefonts-installer\n",
" !sudo fc-cache -f\n", " !sudo fc-cache -f\n",
" !fc-match Arial" " !fc-match Arial"
@@ -257,8 +358,8 @@
"source": [ "source": [
"# \"We generate 200,000 images for base model pre-training\"\n", "# \"We generate 200,000 images for base model pre-training\"\n",
"captchaGenerator = CaptchaGenerator(\n", "captchaGenerator = CaptchaGenerator(\n",
" numCaptchas = 50, # 50, # 200000,\n", " numCaptchas = 200000, # 50, # 200000,\n",
" dataDir = Path(\"captchas/generated/VAERS/\"))" " dataDir = Path(srcPath + '/captchas/generated/VAERS/'))"
] ]
}, },
{ {
@@ -334,7 +435,7 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"modelDAO.saveModel(model)" "saveModel(model)"
] ]
}, },
{ {
@@ -397,7 +498,9 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"id": "qZvn1k2Ul31v"
},
"outputs": [], "outputs": [],
"source": [ "source": [
"modelName, numTrainableLayers = 'MobileNetV3Small', 104\n", "modelName, numTrainableLayers = 'MobileNetV3Small', 104\n",
@@ -413,7 +516,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"# FK-TODO: DRY with VAERSFileDownloader\n", "# FK-TODO: DRY with VAERSFileDownloader\n",
"modelFilepath = f'{os.getcwd()}/captcha/{modelName}'\n", "modelFilepath = f'{srcPath}/captcha/{modelName}'\n",
"model = modelDAO.loadModel(modelFilepath)\n", "model = modelDAO.loadModel(modelFilepath)\n",
"model.summary(show_trainable=True)" "model.summary(show_trainable=True)"
] ]
@@ -461,7 +564,7 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"train_dataset, validation_dataset, test_dataset = getTrainValidationTestDatasets(Path(\"captcha/captchas/VAERS/\"), datasetFactory)" "train_dataset, validation_dataset, test_dataset = getTrainValidationTestDatasets(Path(f\"{srcPath}/captcha/captchas/VAERS/\"), datasetFactory)"
] ]
}, },
{ {
@@ -522,18 +625,18 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"id": "FpJTHU6dxOVy"
},
"outputs": [], "outputs": [],
"source": [ "source": [
"modelDAO.saveModel(model)" "saveModel(model)"
] ]
} }
], ],
"metadata": { "metadata": {
"accelerator": "GPU", "accelerator": "GPU",
"colab": { "colab": {
"collapsed_sections": [],
"name": "captcha.ipynb",
"private_outputs": true, "private_outputs": true,
"provenance": [] "provenance": []
}, },

View File

@@ -2,6 +2,7 @@ from PIL import Image, ImageDraw, ImageFont
import random import random
import string import string
import shutil import shutil
from tqdm import tqdm
class CaptchaGenerator: class CaptchaGenerator:
@@ -15,7 +16,7 @@ class CaptchaGenerator:
def createAndSaveCaptchas(self): def createAndSaveCaptchas(self):
self._prepareDataDir() self._prepareDataDir()
for _ in range(self.numCaptchas): for _ in tqdm(range(self.numCaptchas)):
self._createAndSaveCaptcha() self._createAndSaveCaptcha()
def _prepareDataDir(self): def _prepareDataDir(self):

View File

@@ -28,4 +28,4 @@ class CaptchaReader:
return PredictionsDecoder(CaptchaGenerator.captchaLength, CharNumConverter(CaptchaGenerator.characters).num_to_char).decode_batch_predictions(preds) return PredictionsDecoder(CaptchaGenerator.captchaLength, CharNumConverter(CaptchaGenerator.characters).num_to_char).decode_batch_predictions(preds)
def _createPredictionModel(self): def _createPredictionModel(self):
return ModelFactory.createPredictionModel(ModelDAO(inColab=False).loadModel(self.modelFilepath)) return ModelFactory.createPredictionModel(ModelDAO().loadModel(self.modelFilepath))

View File

@@ -1,27 +0,0 @@
from pathlib import Path
class GoogleDriveManager:
_googleDriveFolder = Path('/content/gdrive')
_baseFolder = _googleDriveFolder / 'MyDrive/CAPTCHA/models/'
@staticmethod
def mount():
from google.colab import drive
drive.mount(str(GoogleDriveManager._googleDriveFolder))
@staticmethod
def uploadFolderToGoogleDrive(folder):
pass
# FK-FIXME:
# !zip -r {folder}.zip {folder}/
# !cp {folder}.zip {GoogleDriveManager._baseFolder}
@staticmethod
def downloadFolderFromGoogleDrive(folder):
pass
# FK-FIXME:
# !cp {GoogleDriveManager._baseFolder}/{folder}.zip .
# !rm -rf {folder}
# !unzip {folder}.zip

View File

@@ -1,20 +1,12 @@
from tensorflow import keras from tensorflow import keras
from captcha.GoogleDriveManager import GoogleDriveManager
import shutil import shutil
class ModelDAO: class ModelDAO:
def __init__(self, inColab): def saveModel(self, model, modelFilepath):
self.inColab = inColab shutil.rmtree(modelFilepath, ignore_errors = True)
model.save(modelFilepath)
def saveModel(self, model):
shutil.rmtree(model.name, ignore_errors = True)
model.save(model.name)
if self.inColab:
GoogleDriveManager.uploadFolderToGoogleDrive(model.name)
def loadModel(self, modelFilepath): def loadModel(self, modelFilepath):
if self.inColab:
GoogleDriveManager.downloadFolderFromGoogleDrive(modelFilepath)
return keras.models.load_model(modelFilepath) return keras.models.load_model(modelFilepath)