Expand 3D captcha into three subtypes: 3d_text, 3d_rotate, 3d_slider

Split the single "3d" captcha type into three independent expert models:
- 3d_text: 3D perspective text OCR (renamed from old "3d", CTC-based ThreeDCNN)
- 3d_rotate: rotation angle regression (new RegressionCNN, circular loss)
- 3d_slider: slider offset regression (new RegressionCNN, SmoothL1 loss)

CAPTCHA_TYPES expanded from 3 to 5 classes. Classifier samples updated
to 50000 (10000 per class). New generators, model, dataset, training
utilities, and full pipeline/export/CLI support for all subtypes.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Hua
2026-03-11 13:55:53 +08:00
parent 760b80ee5e
commit f5be7671bc
20 changed files with 1109 additions and 142 deletions

View File

@@ -1,16 +1,18 @@
"""
训练调度分类器 (CaptchaClassifier)
从各类型验证码数据中混合采样,训练分类器区分 normal / math / 3d。
从各类型验证码数据中混合采样,训练分类器区分 normal / math / 3d_text / 3d_rotate / 3d_slider
数据来源: data/classifier/ 目录 (按类型子目录组织)
用法: python -m training.train_classifier
"""
import os
import random
import shutil
from pathlib import Path
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, random_split
@@ -24,15 +26,20 @@ from config import (
CLASSIFIER_DIR,
SYNTHETIC_NORMAL_DIR,
SYNTHETIC_MATH_DIR,
SYNTHETIC_3D_DIR,
SYNTHETIC_3D_TEXT_DIR,
SYNTHETIC_3D_ROTATE_DIR,
SYNTHETIC_3D_SLIDER_DIR,
CHECKPOINTS_DIR,
ONNX_DIR,
ONNX_CONFIG,
RANDOM_SEED,
get_device,
)
from generators.normal_gen import NormalCaptchaGenerator
from generators.math_gen import MathCaptchaGenerator
from generators.threed_gen import ThreeDCaptchaGenerator
from generators.threed_rotate_gen import ThreeDRotateGenerator
from generators.threed_slider_gen import ThreeDSliderGenerator
from models.classifier import CaptchaClassifier
from training.dataset import CaptchaDataset, build_train_transform, build_val_transform
@@ -52,7 +59,9 @@ def _prepare_classifier_data():
type_info = [
("normal", SYNTHETIC_NORMAL_DIR, NormalCaptchaGenerator),
("math", SYNTHETIC_MATH_DIR, MathCaptchaGenerator),
("3d", SYNTHETIC_3D_DIR, ThreeDCaptchaGenerator),
("3d_text", SYNTHETIC_3D_TEXT_DIR, ThreeDCaptchaGenerator),
("3d_rotate", SYNTHETIC_3D_ROTATE_DIR, ThreeDRotateGenerator),
("3d_slider", SYNTHETIC_3D_SLIDER_DIR, ThreeDSliderGenerator),
]
for cls_name, syn_dir, gen_cls in type_info:
@@ -95,6 +104,13 @@ def main():
img_h, img_w = IMAGE_SIZE["classifier"]
device = get_device()
# 设置随机种子
random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
if torch.cuda.is_available():
torch.cuda.manual_seed_all(RANDOM_SEED)
print("=" * 60)
print("训练调度分类器 (CaptchaClassifier)")
print(f" 类别: {CAPTCHA_TYPES}")
@@ -128,11 +144,11 @@ def main():
train_loader = DataLoader(
train_ds, batch_size=cfg["batch_size"], shuffle=True,
num_workers=2, pin_memory=True,
num_workers=0, pin_memory=True,
)
val_loader = DataLoader(
val_ds_clean, batch_size=cfg["batch_size"], shuffle=False,
num_workers=2, pin_memory=True,
num_workers=0, pin_memory=True,
)
print(f"[数据] 训练: {train_size} 验证: {val_size}")