Expand 3D captcha into three subtypes: 3d_text, 3d_rotate, 3d_slider
Split the single "3d" captcha type into three independent expert models: - 3d_text: 3D perspective text OCR (renamed from old "3d", CTC-based ThreeDCNN) - 3d_rotate: rotation angle regression (new RegressionCNN, circular loss) - 3d_slider: slider offset regression (new RegressionCNN, SmoothL1 loss) CAPTCHA_TYPES expanded from 3 to 5 classes. Classifier samples updated to 50000 (10000 per class). New generators, model, dataset, training utilities, and full pipeline/export/CLI support for all subtypes. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
66
config.py
66
config.py
@@ -23,12 +23,16 @@ CLASSIFIER_DIR = DATA_DIR / "classifier"
|
||||
# 合成数据子目录
|
||||
SYNTHETIC_NORMAL_DIR = SYNTHETIC_DIR / "normal"
|
||||
SYNTHETIC_MATH_DIR = SYNTHETIC_DIR / "math"
|
||||
SYNTHETIC_3D_DIR = SYNTHETIC_DIR / "3d"
|
||||
SYNTHETIC_3D_TEXT_DIR = SYNTHETIC_DIR / "3d_text"
|
||||
SYNTHETIC_3D_ROTATE_DIR = SYNTHETIC_DIR / "3d_rotate"
|
||||
SYNTHETIC_3D_SLIDER_DIR = SYNTHETIC_DIR / "3d_slider"
|
||||
|
||||
# 真实数据子目录
|
||||
REAL_NORMAL_DIR = REAL_DIR / "normal"
|
||||
REAL_MATH_DIR = REAL_DIR / "math"
|
||||
REAL_3D_DIR = REAL_DIR / "3d"
|
||||
REAL_3D_TEXT_DIR = REAL_DIR / "3d_text"
|
||||
REAL_3D_ROTATE_DIR = REAL_DIR / "3d_rotate"
|
||||
REAL_3D_SLIDER_DIR = REAL_DIR / "3d_slider"
|
||||
|
||||
# ============================================================
|
||||
# 模型输出目录
|
||||
@@ -38,8 +42,10 @@ ONNX_DIR = PROJECT_ROOT / "onnx_models"
|
||||
|
||||
# 确保关键目录存在
|
||||
for _dir in [
|
||||
SYNTHETIC_NORMAL_DIR, SYNTHETIC_MATH_DIR, SYNTHETIC_3D_DIR,
|
||||
REAL_NORMAL_DIR, REAL_MATH_DIR, REAL_3D_DIR,
|
||||
SYNTHETIC_NORMAL_DIR, SYNTHETIC_MATH_DIR,
|
||||
SYNTHETIC_3D_TEXT_DIR, SYNTHETIC_3D_ROTATE_DIR, SYNTHETIC_3D_SLIDER_DIR,
|
||||
REAL_NORMAL_DIR, REAL_MATH_DIR,
|
||||
REAL_3D_TEXT_DIR, REAL_3D_ROTATE_DIR, REAL_3D_SLIDER_DIR,
|
||||
CLASSIFIER_DIR, CHECKPOINTS_DIR, ONNX_DIR,
|
||||
]:
|
||||
_dir.mkdir(parents=True, exist_ok=True)
|
||||
@@ -57,7 +63,7 @@ MATH_CHARS = "0123456789+-×÷=?"
|
||||
THREED_CHARS = "23456789ABCDEFGHJKMNPQRSTUVWXYZ"
|
||||
|
||||
# 验证码类型列表 (调度分类器输出)
|
||||
CAPTCHA_TYPES = ["normal", "math", "3d"]
|
||||
CAPTCHA_TYPES = ["normal", "math", "3d_text", "3d_rotate", "3d_slider"]
|
||||
NUM_CAPTCHA_TYPES = len(CAPTCHA_TYPES)
|
||||
|
||||
# ============================================================
|
||||
@@ -67,7 +73,9 @@ IMAGE_SIZE = {
|
||||
"classifier": (64, 128), # 调度分类器输入
|
||||
"normal": (40, 120), # 普通字符识别
|
||||
"math": (40, 160), # 算式识别 (更宽)
|
||||
"3d": (60, 160), # 3D 立体识别
|
||||
"3d_text": (60, 160), # 3D 立体文字识别
|
||||
"3d_rotate": (80, 80), # 3D 旋转角度回归 (正方形)
|
||||
"3d_slider": (80, 240), # 3D 滑块偏移回归
|
||||
}
|
||||
|
||||
# ============================================================
|
||||
@@ -91,11 +99,25 @@ GENERATE_CONFIG = {
|
||||
"rotation_range": (-10, 10),
|
||||
"noise_line_range": (2, 4),
|
||||
},
|
||||
"3d": {
|
||||
"3d_text": {
|
||||
"char_count_range": (4, 5),
|
||||
"image_size": (160, 60), # 生成图片尺寸 (W, H)
|
||||
"shadow_offset": (3, 3), # 阴影偏移
|
||||
"perspective_intensity": 0.3, # 透视变换强度
|
||||
"rotation_range": (-20, 20), # 字符旋转角度
|
||||
},
|
||||
"3d_rotate": {
|
||||
"image_size": (80, 80), # 生成图片尺寸 (W, H)
|
||||
"disc_radius": 35, # 圆盘半径
|
||||
"marker_size": 8, # 方向标记大小
|
||||
"bg_color_range": (200, 240), # 背景色范围
|
||||
},
|
||||
"3d_slider": {
|
||||
"image_size": (240, 80), # 生成图片尺寸 (W, H)
|
||||
"puzzle_size": (40, 40), # 拼图块大小 (W, H)
|
||||
"gap_x_range": (50, 200), # 缺口 x 坐标范围
|
||||
"piece_left_margin": 5, # 拼图块左侧留白
|
||||
"bg_noise_intensity": 30, # 背景纹理噪声强度
|
||||
},
|
||||
}
|
||||
|
||||
@@ -108,7 +130,7 @@ TRAIN_CONFIG = {
|
||||
"batch_size": 128,
|
||||
"lr": 1e-3,
|
||||
"scheduler": "cosine",
|
||||
"synthetic_samples": 30000, # 每类 10000
|
||||
"synthetic_samples": 50000, # 每类 10000 × 5 类
|
||||
"val_split": 0.1, # 验证集比例
|
||||
},
|
||||
"normal": {
|
||||
@@ -129,7 +151,7 @@ TRAIN_CONFIG = {
|
||||
"loss": "CTCLoss",
|
||||
"val_split": 0.1,
|
||||
},
|
||||
"threed": {
|
||||
"3d_text": {
|
||||
"epochs": 80,
|
||||
"batch_size": 64,
|
||||
"lr": 5e-4,
|
||||
@@ -138,6 +160,24 @@ TRAIN_CONFIG = {
|
||||
"loss": "CTCLoss",
|
||||
"val_split": 0.1,
|
||||
},
|
||||
"3d_rotate": {
|
||||
"epochs": 60,
|
||||
"batch_size": 128,
|
||||
"lr": 1e-3,
|
||||
"scheduler": "cosine",
|
||||
"synthetic_samples": 60000,
|
||||
"loss": "SmoothL1",
|
||||
"val_split": 0.1,
|
||||
},
|
||||
"3d_slider": {
|
||||
"epochs": 60,
|
||||
"batch_size": 128,
|
||||
"lr": 1e-3,
|
||||
"scheduler": "cosine",
|
||||
"synthetic_samples": 60000,
|
||||
"loss": "SmoothL1",
|
||||
"val_split": 0.1,
|
||||
},
|
||||
}
|
||||
|
||||
# ============================================================
|
||||
@@ -163,6 +203,14 @@ ONNX_CONFIG = {
|
||||
"dynamic_batch": True, # 支持动态 batch size
|
||||
}
|
||||
|
||||
# ============================================================
|
||||
# 回归模型标签范围
|
||||
# ============================================================
|
||||
REGRESSION_RANGE = {
|
||||
"3d_rotate": (0, 360), # 旋转角度 0-359°
|
||||
"3d_slider": (10, 200), # 滑块 x 偏移 (像素)
|
||||
}
|
||||
|
||||
# ============================================================
|
||||
# 推理配置
|
||||
# ============================================================
|
||||
|
||||
Reference in New Issue
Block a user