Merge branch 'main' into pages

This commit is contained in:
frankknoll
2023-03-18 01:07:38 +01:00
6 changed files with 146 additions and 76 deletions

View File

@@ -27,15 +27,90 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "zZSwQragIS_v"
"id": "ioGwCR3Xl31V"
},
"outputs": [],
"source": [
"import sys\n",
"sys.argv = sys.argv[:1]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "l-coMy_2l31X"
},
"outputs": [],
"source": [
"def isInColab():\n",
" try:\n",
" import colab\n",
" return True\n",
" except:\n",
" return False"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "goO0feQwl31Y"
},
"outputs": [],
"source": [
"inColab = isInColab()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "nsE9VWCel31Z"
},
"outputs": [],
"source": [
"if inColab:\n",
" branch = 'read-captcha'\n",
" !git clone https://github.com/KnollFrank/HowBadIsMyBatch.git\n",
" !cd HowBadIsMyBatch; git checkout $branch"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "l9qhlDVNl31b"
},
"outputs": [],
"source": [
"import os\n",
"srcPath = '/content/HowBadIsMyBatch/src' if inColab else os.getcwd()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "c-2fE6vZsD7a"
},
"outputs": [],
"source": [
"if inColab:\n",
" sys.path.insert(0, srcPath)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "zZSwQragIS_v"
},
"outputs": [],
"source": [
"import numpy as np\n",
"from pathlib import Path\n",
"import tensorflow as tf\n",
"from captcha.GoogleDriveManager import GoogleDriveManager\n",
"from captcha.CaptchaGenerator import CaptchaGenerator\n",
"from captcha.CharNumConverter import CharNumConverter\n",
"from captcha.DataSplitter import DataSplitter\n",
@@ -46,6 +121,40 @@
"from captcha.CaptchaShape import CaptchaShape"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "BWqAvnVOl31d"
},
"outputs": [],
"source": [
"from pathlib import Path\n",
"\n",
"class GoogleDriveManager:\n",
" \n",
" _googleDriveFolder = Path('/content/gdrive')\n",
" _baseFolder = _googleDriveFolder / 'MyDrive/CAPTCHA/models/'\n",
"\n",
" @staticmethod\n",
" def mount():\n",
" from google.colab import drive\n",
" drive.mount(str(GoogleDriveManager._googleDriveFolder))\n",
"\n",
" @staticmethod\n",
" def uploadFolderToGoogleDrive(folder):\n",
" basename = !basename {folder}\n",
" basename = basename[0]\n",
" !cd {folder}/..; zip -r {basename}.zip {basename}/\n",
" !cd {folder}/..; cp {basename}.zip {GoogleDriveManager._baseFolder}\n",
" \n",
" @staticmethod\n",
" def downloadFolderFromGoogleDrive(folder):\n",
" !cp {GoogleDriveManager._baseFolder}/{folder}.zip .\n",
" !rm -rf {folder}\n",
" !unzip {folder}.zip\n"
]
},
{
"cell_type": "code",
"execution_count": null,
@@ -138,7 +247,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"id": "HEKh6eval31k"
},
"outputs": [],
"source": [
"def printLayers(model):\n",
@@ -173,17 +284,6 @@
"## Preparation"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "NZrKXF6P3MGY"
},
"outputs": [],
"source": [
"inColab = 'google.colab' in str(get_ipython())"
]
},
{
"cell_type": "code",
"execution_count": null,
@@ -200,28 +300,28 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "S_4hl4S4BmZK"
"id": "WmUghcQaMf3y"
},
"outputs": [],
"source": [
"if inColab:\n",
" !cp {GoogleDriveManager._baseFolder}/captchas.zip .\n",
" !unzip captchas.zip"
"modelDAO = ModelDAO()\n",
"charNumConverter = CharNumConverter(CaptchaGenerator.characters)\n",
"predictionsDecoder = PredictionsDecoder(CaptchaGenerator.captchaLength, charNumConverter.num_to_char)\n",
"captchaShape = CaptchaShape()\n",
"datasetFactory = DatasetFactory(captchaShape, charNumConverter.char_to_num, batch_size = 64)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "WmUghcQaMf3y"
},
"metadata": {},
"outputs": [],
"source": [
"modelDAO = ModelDAO(inColab)\n",
"charNumConverter = CharNumConverter(CaptchaGenerator.characters)\n",
"predictionsDecoder = PredictionsDecoder(CaptchaGenerator.captchaLength, charNumConverter.num_to_char)\n",
"captchaShape = CaptchaShape()\n",
"datasetFactory = DatasetFactory(captchaShape, charNumConverter.char_to_num, batch_size = 64)"
"def saveModel(model):\n",
" modelFilepath = f'{srcPath}/captcha/{model.name}'\n",
" modelDAO.saveModel(model, modelFilepath)\n",
" if inColab:\n",
" GoogleDriveManager.uploadFolderToGoogleDrive(modelFilepath)"
]
},
{
@@ -242,6 +342,7 @@
"outputs": [],
"source": [
"if inColab:\n",
" !apt-get update\n",
" !sudo apt install ttf-mscorefonts-installer\n",
" !sudo fc-cache -f\n",
" !fc-match Arial"
@@ -257,8 +358,8 @@
"source": [
"# \"We generate 200,000 images for base model pre-training\"\n",
"captchaGenerator = CaptchaGenerator(\n",
" numCaptchas = 50, # 50, # 200000,\n",
" dataDir = Path(\"captchas/generated/VAERS/\"))"
" numCaptchas = 200000, # 50, # 200000,\n",
" dataDir = Path(srcPath + '/captchas/generated/VAERS/'))"
]
},
{
@@ -334,7 +435,7 @@
},
"outputs": [],
"source": [
"modelDAO.saveModel(model)"
"saveModel(model)"
]
},
{
@@ -397,7 +498,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"id": "qZvn1k2Ul31v"
},
"outputs": [],
"source": [
"modelName, numTrainableLayers = 'MobileNetV3Small', 104\n",
@@ -413,7 +516,7 @@
"outputs": [],
"source": [
"# FK-TODO: DRY with VAERSFileDownloader\n",
"modelFilepath = f'{os.getcwd()}/captcha/{modelName}'\n",
"modelFilepath = f'{srcPath}/captcha/{modelName}'\n",
"model = modelDAO.loadModel(modelFilepath)\n",
"model.summary(show_trainable=True)"
]
@@ -461,7 +564,7 @@
},
"outputs": [],
"source": [
"train_dataset, validation_dataset, test_dataset = getTrainValidationTestDatasets(Path(\"captcha/captchas/VAERS/\"), datasetFactory)"
"train_dataset, validation_dataset, test_dataset = getTrainValidationTestDatasets(Path(f\"{srcPath}/captcha/captchas/VAERS/\"), datasetFactory)"
]
},
{
@@ -522,18 +625,18 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"id": "FpJTHU6dxOVy"
},
"outputs": [],
"source": [
"modelDAO.saveModel(model)"
"saveModel(model)"
]
}
],
"metadata": {
"accelerator": "GPU",
"colab": {
"collapsed_sections": [],
"name": "captcha.ipynb",
"private_outputs": true,
"provenance": []
},

View File

@@ -2,6 +2,7 @@ from PIL import Image, ImageDraw, ImageFont
import random
import string
import shutil
from tqdm import tqdm
class CaptchaGenerator:
@@ -15,7 +16,7 @@ class CaptchaGenerator:
def createAndSaveCaptchas(self):
self._prepareDataDir()
for _ in range(self.numCaptchas):
for _ in tqdm(range(self.numCaptchas)):
self._createAndSaveCaptcha()
def _prepareDataDir(self):

View File

@@ -28,4 +28,4 @@ class CaptchaReader:
return PredictionsDecoder(CaptchaGenerator.captchaLength, CharNumConverter(CaptchaGenerator.characters).num_to_char).decode_batch_predictions(preds)
def _createPredictionModel(self):
return ModelFactory.createPredictionModel(ModelDAO(inColab=False).loadModel(self.modelFilepath))
return ModelFactory.createPredictionModel(ModelDAO().loadModel(self.modelFilepath))

View File

@@ -1,27 +0,0 @@
from pathlib import Path
class GoogleDriveManager:
_googleDriveFolder = Path('/content/gdrive')
_baseFolder = _googleDriveFolder / 'MyDrive/CAPTCHA/models/'
@staticmethod
def mount():
from google.colab import drive
drive.mount(str(GoogleDriveManager._googleDriveFolder))
@staticmethod
def uploadFolderToGoogleDrive(folder):
pass
# FK-FIXME:
# !zip -r {folder}.zip {folder}/
# !cp {folder}.zip {GoogleDriveManager._baseFolder}
@staticmethod
def downloadFolderFromGoogleDrive(folder):
pass
# FK-FIXME:
# !cp {GoogleDriveManager._baseFolder}/{folder}.zip .
# !rm -rf {folder}
# !unzip {folder}.zip

View File

@@ -1,20 +1,12 @@
from tensorflow import keras
from captcha.GoogleDriveManager import GoogleDriveManager
import shutil
class ModelDAO:
def __init__(self, inColab):
self.inColab = inColab
def saveModel(self, model):
shutil.rmtree(model.name, ignore_errors = True)
model.save(model.name)
if self.inColab:
GoogleDriveManager.uploadFolderToGoogleDrive(model.name)
def saveModel(self, model, modelFilepath):
shutil.rmtree(modelFilepath, ignore_errors = True)
model.save(modelFilepath)
def loadModel(self, modelFilepath):
if self.inColab:
GoogleDriveManager.downloadFolderFromGoogleDrive(modelFilepath)
return keras.models.load_model(modelFilepath)