Merge branch 'main' into pages
This commit is contained in:
@@ -27,15 +27,90 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "zZSwQragIS_v"
|
||||
"id": "ioGwCR3Xl31V"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"sys.argv = sys.argv[:1]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "l-coMy_2l31X"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def isInColab():\n",
|
||||
" try:\n",
|
||||
" import colab\n",
|
||||
" return True\n",
|
||||
" except:\n",
|
||||
" return False"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "goO0feQwl31Y"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"inColab = isInColab()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "nsE9VWCel31Z"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"if inColab:\n",
|
||||
" branch = 'read-captcha'\n",
|
||||
" !git clone https://github.com/KnollFrank/HowBadIsMyBatch.git\n",
|
||||
" !cd HowBadIsMyBatch; git checkout $branch"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "l9qhlDVNl31b"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"srcPath = '/content/HowBadIsMyBatch/src' if inColab else os.getcwd()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "c-2fE6vZsD7a"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"if inColab:\n",
|
||||
" sys.path.insert(0, srcPath)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "zZSwQragIS_v"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"from pathlib import Path\n",
|
||||
"import tensorflow as tf\n",
|
||||
"from captcha.GoogleDriveManager import GoogleDriveManager\n",
|
||||
"from captcha.CaptchaGenerator import CaptchaGenerator\n",
|
||||
"from captcha.CharNumConverter import CharNumConverter\n",
|
||||
"from captcha.DataSplitter import DataSplitter\n",
|
||||
@@ -46,6 +121,40 @@
|
||||
"from captcha.CaptchaShape import CaptchaShape"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "BWqAvnVOl31d"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from pathlib import Path\n",
|
||||
"\n",
|
||||
"class GoogleDriveManager:\n",
|
||||
" \n",
|
||||
" _googleDriveFolder = Path('/content/gdrive')\n",
|
||||
" _baseFolder = _googleDriveFolder / 'MyDrive/CAPTCHA/models/'\n",
|
||||
"\n",
|
||||
" @staticmethod\n",
|
||||
" def mount():\n",
|
||||
" from google.colab import drive\n",
|
||||
" drive.mount(str(GoogleDriveManager._googleDriveFolder))\n",
|
||||
"\n",
|
||||
" @staticmethod\n",
|
||||
" def uploadFolderToGoogleDrive(folder):\n",
|
||||
" basename = !basename {folder}\n",
|
||||
" basename = basename[0]\n",
|
||||
" !cd {folder}/..; zip -r {basename}.zip {basename}/\n",
|
||||
" !cd {folder}/..; cp {basename}.zip {GoogleDriveManager._baseFolder}\n",
|
||||
" \n",
|
||||
" @staticmethod\n",
|
||||
" def downloadFolderFromGoogleDrive(folder):\n",
|
||||
" !cp {GoogleDriveManager._baseFolder}/{folder}.zip .\n",
|
||||
" !rm -rf {folder}\n",
|
||||
" !unzip {folder}.zip\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -138,7 +247,9 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"id": "HEKh6eval31k"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def printLayers(model):\n",
|
||||
@@ -173,17 +284,6 @@
|
||||
"## Preparation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "NZrKXF6P3MGY"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"inColab = 'google.colab' in str(get_ipython())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -200,28 +300,28 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "S_4hl4S4BmZK"
|
||||
"id": "WmUghcQaMf3y"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"if inColab:\n",
|
||||
" !cp {GoogleDriveManager._baseFolder}/captchas.zip .\n",
|
||||
" !unzip captchas.zip"
|
||||
"modelDAO = ModelDAO()\n",
|
||||
"charNumConverter = CharNumConverter(CaptchaGenerator.characters)\n",
|
||||
"predictionsDecoder = PredictionsDecoder(CaptchaGenerator.captchaLength, charNumConverter.num_to_char)\n",
|
||||
"captchaShape = CaptchaShape()\n",
|
||||
"datasetFactory = DatasetFactory(captchaShape, charNumConverter.char_to_num, batch_size = 64)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "WmUghcQaMf3y"
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"modelDAO = ModelDAO(inColab)\n",
|
||||
"charNumConverter = CharNumConverter(CaptchaGenerator.characters)\n",
|
||||
"predictionsDecoder = PredictionsDecoder(CaptchaGenerator.captchaLength, charNumConverter.num_to_char)\n",
|
||||
"captchaShape = CaptchaShape()\n",
|
||||
"datasetFactory = DatasetFactory(captchaShape, charNumConverter.char_to_num, batch_size = 64)"
|
||||
"def saveModel(model):\n",
|
||||
" modelFilepath = f'{srcPath}/captcha/{model.name}'\n",
|
||||
" modelDAO.saveModel(model, modelFilepath)\n",
|
||||
" if inColab:\n",
|
||||
" GoogleDriveManager.uploadFolderToGoogleDrive(modelFilepath)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -242,6 +342,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"if inColab:\n",
|
||||
" !apt-get update\n",
|
||||
" !sudo apt install ttf-mscorefonts-installer\n",
|
||||
" !sudo fc-cache -f\n",
|
||||
" !fc-match Arial"
|
||||
@@ -257,8 +358,8 @@
|
||||
"source": [
|
||||
"# \"We generate 200,000 images for base model pre-training\"\n",
|
||||
"captchaGenerator = CaptchaGenerator(\n",
|
||||
" numCaptchas = 50, # 50, # 200000,\n",
|
||||
" dataDir = Path(\"captchas/generated/VAERS/\"))"
|
||||
" numCaptchas = 200000, # 50, # 200000,\n",
|
||||
" dataDir = Path(srcPath + '/captchas/generated/VAERS/'))"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -334,7 +435,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"modelDAO.saveModel(model)"
|
||||
"saveModel(model)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -397,7 +498,9 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"id": "qZvn1k2Ul31v"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"modelName, numTrainableLayers = 'MobileNetV3Small', 104\n",
|
||||
@@ -413,7 +516,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# FK-TODO: DRY with VAERSFileDownloader\n",
|
||||
"modelFilepath = f'{os.getcwd()}/captcha/{modelName}'\n",
|
||||
"modelFilepath = f'{srcPath}/captcha/{modelName}'\n",
|
||||
"model = modelDAO.loadModel(modelFilepath)\n",
|
||||
"model.summary(show_trainable=True)"
|
||||
]
|
||||
@@ -461,7 +564,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"train_dataset, validation_dataset, test_dataset = getTrainValidationTestDatasets(Path(\"captcha/captchas/VAERS/\"), datasetFactory)"
|
||||
"train_dataset, validation_dataset, test_dataset = getTrainValidationTestDatasets(Path(f\"{srcPath}/captcha/captchas/VAERS/\"), datasetFactory)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -522,18 +625,18 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"id": "FpJTHU6dxOVy"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"modelDAO.saveModel(model)"
|
||||
"saveModel(model)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"accelerator": "GPU",
|
||||
"colab": {
|
||||
"collapsed_sections": [],
|
||||
"name": "captcha.ipynb",
|
||||
"private_outputs": true,
|
||||
"provenance": []
|
||||
},
|
||||
|
||||
@@ -2,6 +2,7 @@ from PIL import Image, ImageDraw, ImageFont
|
||||
import random
|
||||
import string
|
||||
import shutil
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
class CaptchaGenerator:
|
||||
@@ -15,7 +16,7 @@ class CaptchaGenerator:
|
||||
|
||||
def createAndSaveCaptchas(self):
|
||||
self._prepareDataDir()
|
||||
for _ in range(self.numCaptchas):
|
||||
for _ in tqdm(range(self.numCaptchas)):
|
||||
self._createAndSaveCaptcha()
|
||||
|
||||
def _prepareDataDir(self):
|
||||
|
||||
@@ -28,4 +28,4 @@ class CaptchaReader:
|
||||
return PredictionsDecoder(CaptchaGenerator.captchaLength, CharNumConverter(CaptchaGenerator.characters).num_to_char).decode_batch_predictions(preds)
|
||||
|
||||
def _createPredictionModel(self):
|
||||
return ModelFactory.createPredictionModel(ModelDAO(inColab=False).loadModel(self.modelFilepath))
|
||||
return ModelFactory.createPredictionModel(ModelDAO().loadModel(self.modelFilepath))
|
||||
|
||||
@@ -1,27 +0,0 @@
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class GoogleDriveManager:
|
||||
|
||||
_googleDriveFolder = Path('/content/gdrive')
|
||||
_baseFolder = _googleDriveFolder / 'MyDrive/CAPTCHA/models/'
|
||||
|
||||
@staticmethod
|
||||
def mount():
|
||||
from google.colab import drive
|
||||
drive.mount(str(GoogleDriveManager._googleDriveFolder))
|
||||
|
||||
@staticmethod
|
||||
def uploadFolderToGoogleDrive(folder):
|
||||
pass
|
||||
# FK-FIXME:
|
||||
# !zip -r {folder}.zip {folder}/
|
||||
# !cp {folder}.zip {GoogleDriveManager._baseFolder}
|
||||
|
||||
@staticmethod
|
||||
def downloadFolderFromGoogleDrive(folder):
|
||||
pass
|
||||
# FK-FIXME:
|
||||
# !cp {GoogleDriveManager._baseFolder}/{folder}.zip .
|
||||
# !rm -rf {folder}
|
||||
# !unzip {folder}.zip
|
||||
@@ -1,20 +1,12 @@
|
||||
from tensorflow import keras
|
||||
from captcha.GoogleDriveManager import GoogleDriveManager
|
||||
import shutil
|
||||
|
||||
|
||||
class ModelDAO:
|
||||
|
||||
def __init__(self, inColab):
|
||||
self.inColab = inColab
|
||||
|
||||
def saveModel(self, model):
|
||||
shutil.rmtree(model.name, ignore_errors = True)
|
||||
model.save(model.name)
|
||||
if self.inColab:
|
||||
GoogleDriveManager.uploadFolderToGoogleDrive(model.name)
|
||||
def saveModel(self, model, modelFilepath):
|
||||
shutil.rmtree(modelFilepath, ignore_errors = True)
|
||||
model.save(modelFilepath)
|
||||
|
||||
def loadModel(self, modelFilepath):
|
||||
if self.inColab:
|
||||
GoogleDriveManager.downloadFolderFromGoogleDrive(modelFilepath)
|
||||
return keras.models.load_model(modelFilepath)
|
||||
|
||||
Reference in New Issue
Block a user