Merge branch 'main' into pages
This commit is contained in:
@@ -16,6 +16,7 @@ dependencies:
|
|||||||
- nb_conda_kernels
|
- nb_conda_kernels
|
||||||
- pillow
|
- pillow
|
||||||
- openpyxl
|
- openpyxl
|
||||||
|
- tqdm
|
||||||
# - python-decouple
|
# - python-decouple
|
||||||
# - selenium
|
# - selenium
|
||||||
# - webdriver-manager
|
# - webdriver-manager
|
||||||
|
|||||||
@@ -27,15 +27,90 @@
|
|||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"id": "zZSwQragIS_v"
|
"id": "ioGwCR3Xl31V"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import sys\n",
|
||||||
|
"sys.argv = sys.argv[:1]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"id": "l-coMy_2l31X"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def isInColab():\n",
|
||||||
|
" try:\n",
|
||||||
|
" import colab\n",
|
||||||
|
" return True\n",
|
||||||
|
" except:\n",
|
||||||
|
" return False"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"id": "goO0feQwl31Y"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"inColab = isInColab()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"id": "nsE9VWCel31Z"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"if inColab:\n",
|
||||||
|
" branch = 'read-captcha'\n",
|
||||||
|
" !git clone https://github.com/KnollFrank/HowBadIsMyBatch.git\n",
|
||||||
|
" !cd HowBadIsMyBatch; git checkout $branch"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"id": "l9qhlDVNl31b"
|
||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import os\n",
|
"import os\n",
|
||||||
|
"srcPath = '/content/HowBadIsMyBatch/src' if inColab else os.getcwd()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"id": "c-2fE6vZsD7a"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"if inColab:\n",
|
||||||
|
" sys.path.insert(0, srcPath)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"id": "zZSwQragIS_v"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
"import numpy as np\n",
|
"import numpy as np\n",
|
||||||
"from pathlib import Path\n",
|
"from pathlib import Path\n",
|
||||||
"import tensorflow as tf\n",
|
"import tensorflow as tf\n",
|
||||||
"from captcha.GoogleDriveManager import GoogleDriveManager\n",
|
|
||||||
"from captcha.CaptchaGenerator import CaptchaGenerator\n",
|
"from captcha.CaptchaGenerator import CaptchaGenerator\n",
|
||||||
"from captcha.CharNumConverter import CharNumConverter\n",
|
"from captcha.CharNumConverter import CharNumConverter\n",
|
||||||
"from captcha.DataSplitter import DataSplitter\n",
|
"from captcha.DataSplitter import DataSplitter\n",
|
||||||
@@ -46,6 +121,40 @@
|
|||||||
"from captcha.CaptchaShape import CaptchaShape"
|
"from captcha.CaptchaShape import CaptchaShape"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"id": "BWqAvnVOl31d"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from pathlib import Path\n",
|
||||||
|
"\n",
|
||||||
|
"class GoogleDriveManager:\n",
|
||||||
|
" \n",
|
||||||
|
" _googleDriveFolder = Path('/content/gdrive')\n",
|
||||||
|
" _baseFolder = _googleDriveFolder / 'MyDrive/CAPTCHA/models/'\n",
|
||||||
|
"\n",
|
||||||
|
" @staticmethod\n",
|
||||||
|
" def mount():\n",
|
||||||
|
" from google.colab import drive\n",
|
||||||
|
" drive.mount(str(GoogleDriveManager._googleDriveFolder))\n",
|
||||||
|
"\n",
|
||||||
|
" @staticmethod\n",
|
||||||
|
" def uploadFolderToGoogleDrive(folder):\n",
|
||||||
|
" basename = !basename {folder}\n",
|
||||||
|
" basename = basename[0]\n",
|
||||||
|
" !cd {folder}/..; zip -r {basename}.zip {basename}/\n",
|
||||||
|
" !cd {folder}/..; cp {basename}.zip {GoogleDriveManager._baseFolder}\n",
|
||||||
|
" \n",
|
||||||
|
" @staticmethod\n",
|
||||||
|
" def downloadFolderFromGoogleDrive(folder):\n",
|
||||||
|
" !cp {GoogleDriveManager._baseFolder}/{folder}.zip .\n",
|
||||||
|
" !rm -rf {folder}\n",
|
||||||
|
" !unzip {folder}.zip\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
@@ -138,7 +247,9 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"id": "HEKh6eval31k"
|
||||||
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"def printLayers(model):\n",
|
"def printLayers(model):\n",
|
||||||
@@ -173,17 +284,6 @@
|
|||||||
"## Preparation"
|
"## Preparation"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"id": "NZrKXF6P3MGY"
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"inColab = 'google.colab' in str(get_ipython())"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
@@ -200,28 +300,28 @@
|
|||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"id": "S_4hl4S4BmZK"
|
"id": "WmUghcQaMf3y"
|
||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"if inColab:\n",
|
"modelDAO = ModelDAO()\n",
|
||||||
" !cp {GoogleDriveManager._baseFolder}/captchas.zip .\n",
|
"charNumConverter = CharNumConverter(CaptchaGenerator.characters)\n",
|
||||||
" !unzip captchas.zip"
|
"predictionsDecoder = PredictionsDecoder(CaptchaGenerator.captchaLength, charNumConverter.num_to_char)\n",
|
||||||
|
"captchaShape = CaptchaShape()\n",
|
||||||
|
"datasetFactory = DatasetFactory(captchaShape, charNumConverter.char_to_num, batch_size = 64)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {
|
"metadata": {},
|
||||||
"id": "WmUghcQaMf3y"
|
|
||||||
},
|
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"modelDAO = ModelDAO(inColab)\n",
|
"def saveModel(model):\n",
|
||||||
"charNumConverter = CharNumConverter(CaptchaGenerator.characters)\n",
|
" modelFilepath = f'{srcPath}/captcha/{model.name}'\n",
|
||||||
"predictionsDecoder = PredictionsDecoder(CaptchaGenerator.captchaLength, charNumConverter.num_to_char)\n",
|
" modelDAO.saveModel(model, modelFilepath)\n",
|
||||||
"captchaShape = CaptchaShape()\n",
|
" if inColab:\n",
|
||||||
"datasetFactory = DatasetFactory(captchaShape, charNumConverter.char_to_num, batch_size = 64)"
|
" GoogleDriveManager.uploadFolderToGoogleDrive(modelFilepath)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -242,6 +342,7 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"if inColab:\n",
|
"if inColab:\n",
|
||||||
|
" !apt-get update\n",
|
||||||
" !sudo apt install ttf-mscorefonts-installer\n",
|
" !sudo apt install ttf-mscorefonts-installer\n",
|
||||||
" !sudo fc-cache -f\n",
|
" !sudo fc-cache -f\n",
|
||||||
" !fc-match Arial"
|
" !fc-match Arial"
|
||||||
@@ -257,8 +358,8 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# \"We generate 200,000 images for base model pre-training\"\n",
|
"# \"We generate 200,000 images for base model pre-training\"\n",
|
||||||
"captchaGenerator = CaptchaGenerator(\n",
|
"captchaGenerator = CaptchaGenerator(\n",
|
||||||
" numCaptchas = 50, # 50, # 200000,\n",
|
" numCaptchas = 200000, # 50, # 200000,\n",
|
||||||
" dataDir = Path(\"captchas/generated/VAERS/\"))"
|
" dataDir = Path(srcPath + '/captchas/generated/VAERS/'))"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -334,7 +435,7 @@
|
|||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"modelDAO.saveModel(model)"
|
"saveModel(model)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -397,7 +498,9 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"id": "qZvn1k2Ul31v"
|
||||||
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"modelName, numTrainableLayers = 'MobileNetV3Small', 104\n",
|
"modelName, numTrainableLayers = 'MobileNetV3Small', 104\n",
|
||||||
@@ -413,7 +516,7 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# FK-TODO: DRY with VAERSFileDownloader\n",
|
"# FK-TODO: DRY with VAERSFileDownloader\n",
|
||||||
"modelFilepath = f'{os.getcwd()}/captcha/{modelName}'\n",
|
"modelFilepath = f'{srcPath}/captcha/{modelName}'\n",
|
||||||
"model = modelDAO.loadModel(modelFilepath)\n",
|
"model = modelDAO.loadModel(modelFilepath)\n",
|
||||||
"model.summary(show_trainable=True)"
|
"model.summary(show_trainable=True)"
|
||||||
]
|
]
|
||||||
@@ -461,7 +564,7 @@
|
|||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"train_dataset, validation_dataset, test_dataset = getTrainValidationTestDatasets(Path(\"captcha/captchas/VAERS/\"), datasetFactory)"
|
"train_dataset, validation_dataset, test_dataset = getTrainValidationTestDatasets(Path(f\"{srcPath}/captcha/captchas/VAERS/\"), datasetFactory)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -522,18 +625,18 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"id": "FpJTHU6dxOVy"
|
||||||
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"modelDAO.saveModel(model)"
|
"saveModel(model)"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"accelerator": "GPU",
|
"accelerator": "GPU",
|
||||||
"colab": {
|
"colab": {
|
||||||
"collapsed_sections": [],
|
|
||||||
"name": "captcha.ipynb",
|
|
||||||
"private_outputs": true,
|
"private_outputs": true,
|
||||||
"provenance": []
|
"provenance": []
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ from PIL import Image, ImageDraw, ImageFont
|
|||||||
import random
|
import random
|
||||||
import string
|
import string
|
||||||
import shutil
|
import shutil
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
|
||||||
class CaptchaGenerator:
|
class CaptchaGenerator:
|
||||||
@@ -15,7 +16,7 @@ class CaptchaGenerator:
|
|||||||
|
|
||||||
def createAndSaveCaptchas(self):
|
def createAndSaveCaptchas(self):
|
||||||
self._prepareDataDir()
|
self._prepareDataDir()
|
||||||
for _ in range(self.numCaptchas):
|
for _ in tqdm(range(self.numCaptchas)):
|
||||||
self._createAndSaveCaptcha()
|
self._createAndSaveCaptcha()
|
||||||
|
|
||||||
def _prepareDataDir(self):
|
def _prepareDataDir(self):
|
||||||
|
|||||||
@@ -28,4 +28,4 @@ class CaptchaReader:
|
|||||||
return PredictionsDecoder(CaptchaGenerator.captchaLength, CharNumConverter(CaptchaGenerator.characters).num_to_char).decode_batch_predictions(preds)
|
return PredictionsDecoder(CaptchaGenerator.captchaLength, CharNumConverter(CaptchaGenerator.characters).num_to_char).decode_batch_predictions(preds)
|
||||||
|
|
||||||
def _createPredictionModel(self):
|
def _createPredictionModel(self):
|
||||||
return ModelFactory.createPredictionModel(ModelDAO(inColab=False).loadModel(self.modelFilepath))
|
return ModelFactory.createPredictionModel(ModelDAO().loadModel(self.modelFilepath))
|
||||||
|
|||||||
@@ -1,27 +0,0 @@
|
|||||||
from pathlib import Path
|
|
||||||
|
|
||||||
|
|
||||||
class GoogleDriveManager:
|
|
||||||
|
|
||||||
_googleDriveFolder = Path('/content/gdrive')
|
|
||||||
_baseFolder = _googleDriveFolder / 'MyDrive/CAPTCHA/models/'
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def mount():
|
|
||||||
from google.colab import drive
|
|
||||||
drive.mount(str(GoogleDriveManager._googleDriveFolder))
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def uploadFolderToGoogleDrive(folder):
|
|
||||||
pass
|
|
||||||
# FK-FIXME:
|
|
||||||
# !zip -r {folder}.zip {folder}/
|
|
||||||
# !cp {folder}.zip {GoogleDriveManager._baseFolder}
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def downloadFolderFromGoogleDrive(folder):
|
|
||||||
pass
|
|
||||||
# FK-FIXME:
|
|
||||||
# !cp {GoogleDriveManager._baseFolder}/{folder}.zip .
|
|
||||||
# !rm -rf {folder}
|
|
||||||
# !unzip {folder}.zip
|
|
||||||
@@ -1,20 +1,12 @@
|
|||||||
from tensorflow import keras
|
from tensorflow import keras
|
||||||
from captcha.GoogleDriveManager import GoogleDriveManager
|
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
|
|
||||||
class ModelDAO:
|
class ModelDAO:
|
||||||
|
|
||||||
def __init__(self, inColab):
|
def saveModel(self, model, modelFilepath):
|
||||||
self.inColab = inColab
|
shutil.rmtree(modelFilepath, ignore_errors = True)
|
||||||
|
model.save(modelFilepath)
|
||||||
def saveModel(self, model):
|
|
||||||
shutil.rmtree(model.name, ignore_errors = True)
|
|
||||||
model.save(model.name)
|
|
||||||
if self.inColab:
|
|
||||||
GoogleDriveManager.uploadFolderToGoogleDrive(model.name)
|
|
||||||
|
|
||||||
def loadModel(self, modelFilepath):
|
def loadModel(self, modelFilepath):
|
||||||
if self.inColab:
|
|
||||||
GoogleDriveManager.downloadFolderFromGoogleDrive(modelFilepath)
|
|
||||||
return keras.models.load_model(modelFilepath)
|
return keras.models.load_model(modelFilepath)
|
||||||
|
|||||||
Reference in New Issue
Block a user