Expand 3D captcha into three subtypes: 3d_text, 3d_rotate, 3d_slider

Split the single "3d" captcha type into three independent expert models:
- 3d_text: 3D perspective text OCR (renamed from old "3d", CTC-based ThreeDCNN)
- 3d_rotate: rotation angle regression (new RegressionCNN, circular loss)
- 3d_slider: slider offset regression (new RegressionCNN, SmoothL1 loss)

CAPTCHA_TYPES expanded from 3 to 5 classes. Classifier samples updated
to 50000 (10000 per class). New generators, model, dataset, training
utilities, and full pipeline/export/CLI support for all subtypes.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Hua
2026-03-11 13:55:53 +08:00
parent 760b80ee5e
commit f5be7671bc
20 changed files with 1109 additions and 142 deletions

View File

@@ -23,12 +23,16 @@ CLASSIFIER_DIR = DATA_DIR / "classifier"
# 合成数据子目录
SYNTHETIC_NORMAL_DIR = SYNTHETIC_DIR / "normal"
SYNTHETIC_MATH_DIR = SYNTHETIC_DIR / "math"
SYNTHETIC_3D_DIR = SYNTHETIC_DIR / "3d"
SYNTHETIC_3D_TEXT_DIR = SYNTHETIC_DIR / "3d_text"
SYNTHETIC_3D_ROTATE_DIR = SYNTHETIC_DIR / "3d_rotate"
SYNTHETIC_3D_SLIDER_DIR = SYNTHETIC_DIR / "3d_slider"
# 真实数据子目录
REAL_NORMAL_DIR = REAL_DIR / "normal"
REAL_MATH_DIR = REAL_DIR / "math"
REAL_3D_DIR = REAL_DIR / "3d"
REAL_3D_TEXT_DIR = REAL_DIR / "3d_text"
REAL_3D_ROTATE_DIR = REAL_DIR / "3d_rotate"
REAL_3D_SLIDER_DIR = REAL_DIR / "3d_slider"
# ============================================================
# 模型输出目录
@@ -38,8 +42,10 @@ ONNX_DIR = PROJECT_ROOT / "onnx_models"
# 确保关键目录存在
for _dir in [
SYNTHETIC_NORMAL_DIR, SYNTHETIC_MATH_DIR, SYNTHETIC_3D_DIR,
REAL_NORMAL_DIR, REAL_MATH_DIR, REAL_3D_DIR,
SYNTHETIC_NORMAL_DIR, SYNTHETIC_MATH_DIR,
SYNTHETIC_3D_TEXT_DIR, SYNTHETIC_3D_ROTATE_DIR, SYNTHETIC_3D_SLIDER_DIR,
REAL_NORMAL_DIR, REAL_MATH_DIR,
REAL_3D_TEXT_DIR, REAL_3D_ROTATE_DIR, REAL_3D_SLIDER_DIR,
CLASSIFIER_DIR, CHECKPOINTS_DIR, ONNX_DIR,
]:
_dir.mkdir(parents=True, exist_ok=True)
@@ -57,7 +63,7 @@ MATH_CHARS = "0123456789+-×÷=?"
THREED_CHARS = "23456789ABCDEFGHJKMNPQRSTUVWXYZ"
# 验证码类型列表 (调度分类器输出)
CAPTCHA_TYPES = ["normal", "math", "3d"]
CAPTCHA_TYPES = ["normal", "math", "3d_text", "3d_rotate", "3d_slider"]
NUM_CAPTCHA_TYPES = len(CAPTCHA_TYPES)
# ============================================================
@@ -67,7 +73,9 @@ IMAGE_SIZE = {
"classifier": (64, 128), # 调度分类器输入
"normal": (40, 120), # 普通字符识别
"math": (40, 160), # 算式识别 (更宽)
"3d": (60, 160), # 3D 立体识别
"3d_text": (60, 160), # 3D 立体文字识别
"3d_rotate": (80, 80), # 3D 旋转角度回归 (正方形)
"3d_slider": (80, 240), # 3D 滑块偏移回归
}
# ============================================================
@@ -91,11 +99,25 @@ GENERATE_CONFIG = {
"rotation_range": (-10, 10),
"noise_line_range": (2, 4),
},
"3d": {
"3d_text": {
"char_count_range": (4, 5),
"image_size": (160, 60), # 生成图片尺寸 (W, H)
"shadow_offset": (3, 3), # 阴影偏移
"perspective_intensity": 0.3, # 透视变换强度
"rotation_range": (-20, 20), # 字符旋转角度
},
"3d_rotate": {
"image_size": (80, 80), # 生成图片尺寸 (W, H)
"disc_radius": 35, # 圆盘半径
"marker_size": 8, # 方向标记大小
"bg_color_range": (200, 240), # 背景色范围
},
"3d_slider": {
"image_size": (240, 80), # 生成图片尺寸 (W, H)
"puzzle_size": (40, 40), # 拼图块大小 (W, H)
"gap_x_range": (50, 200), # 缺口 x 坐标范围
"piece_left_margin": 5, # 拼图块左侧留白
"bg_noise_intensity": 30, # 背景纹理噪声强度
},
}
@@ -108,7 +130,7 @@ TRAIN_CONFIG = {
"batch_size": 128,
"lr": 1e-3,
"scheduler": "cosine",
"synthetic_samples": 30000, # 每类 10000
"synthetic_samples": 50000, # 每类 10000 × 5 类
"val_split": 0.1, # 验证集比例
},
"normal": {
@@ -129,7 +151,7 @@ TRAIN_CONFIG = {
"loss": "CTCLoss",
"val_split": 0.1,
},
"threed": {
"3d_text": {
"epochs": 80,
"batch_size": 64,
"lr": 5e-4,
@@ -138,6 +160,24 @@ TRAIN_CONFIG = {
"loss": "CTCLoss",
"val_split": 0.1,
},
"3d_rotate": {
"epochs": 60,
"batch_size": 128,
"lr": 1e-3,
"scheduler": "cosine",
"synthetic_samples": 60000,
"loss": "SmoothL1",
"val_split": 0.1,
},
"3d_slider": {
"epochs": 60,
"batch_size": 128,
"lr": 1e-3,
"scheduler": "cosine",
"synthetic_samples": 60000,
"loss": "SmoothL1",
"val_split": 0.1,
},
}
# ============================================================
@@ -163,6 +203,14 @@ ONNX_CONFIG = {
"dynamic_batch": True, # 支持动态 batch size
}
# ============================================================
# 回归模型标签范围
# ============================================================
REGRESSION_RANGE = {
"3d_rotate": (0, 360), # 旋转角度 0-359°
"3d_slider": (10, 200), # 滑块 x 偏移 (像素)
}
# ============================================================
# 推理配置
# ============================================================