Initialize repository

This commit is contained in:
Hua
2026-03-10 18:47:29 +08:00
commit 760b80ee5e
32 changed files with 4343 additions and 0 deletions

195
config.py Normal file
View File

@@ -0,0 +1,195 @@
"""
全局配置 - 验证码识别多模型系统 (CaptchaBreaker)
定义字符集、图片尺寸、路径、训练超参等所有全局常量。
"""
import os
from pathlib import Path
# ============================================================
# 项目根目录
# ============================================================
PROJECT_ROOT = Path(__file__).resolve().parent
# ============================================================
# 数据目录
# ============================================================
DATA_DIR = PROJECT_ROOT / "data"
SYNTHETIC_DIR = DATA_DIR / "synthetic"
REAL_DIR = DATA_DIR / "real"
CLASSIFIER_DIR = DATA_DIR / "classifier"
# 合成数据子目录
SYNTHETIC_NORMAL_DIR = SYNTHETIC_DIR / "normal"
SYNTHETIC_MATH_DIR = SYNTHETIC_DIR / "math"
SYNTHETIC_3D_DIR = SYNTHETIC_DIR / "3d"
# 真实数据子目录
REAL_NORMAL_DIR = REAL_DIR / "normal"
REAL_MATH_DIR = REAL_DIR / "math"
REAL_3D_DIR = REAL_DIR / "3d"
# ============================================================
# 模型输出目录
# ============================================================
CHECKPOINTS_DIR = PROJECT_ROOT / "checkpoints"
ONNX_DIR = PROJECT_ROOT / "onnx_models"
# 确保关键目录存在
for _dir in [
SYNTHETIC_NORMAL_DIR, SYNTHETIC_MATH_DIR, SYNTHETIC_3D_DIR,
REAL_NORMAL_DIR, REAL_MATH_DIR, REAL_3D_DIR,
CLASSIFIER_DIR, CHECKPOINTS_DIR, ONNX_DIR,
]:
_dir.mkdir(parents=True, exist_ok=True)
# ============================================================
# 字符集定义
# ============================================================
# 普通字符验证码: 按当前本地配置保留易混淆字符,覆盖完整数字 + 大写字母
NORMAL_CHARS = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
# 算式验证码: 数字 + 运算符
MATH_CHARS = "0123456789+-×÷=?"
# 3D 验证码: 继续使用去掉易混淆字符的精简字符集
THREED_CHARS = "23456789ABCDEFGHJKMNPQRSTUVWXYZ"
# 验证码类型列表 (调度分类器输出)
CAPTCHA_TYPES = ["normal", "math", "3d"]
NUM_CAPTCHA_TYPES = len(CAPTCHA_TYPES)
# ============================================================
# 图片尺寸配置 (H, W)
# ============================================================
IMAGE_SIZE = {
"classifier": (64, 128), # 调度分类器输入
"normal": (40, 120), # 普通字符识别
"math": (40, 160), # 算式识别 (更宽)
"3d": (60, 160), # 3D 立体识别
}
# ============================================================
# 验证码生成参数
# ============================================================
GENERATE_CONFIG = {
"normal": {
"char_count_range": (4, 5), # 字符数量: 4-5 个
"bg_color_range": (230, 255), # 浅色背景 RGB 各通道
"rotation_range": (-15, 15), # 字符旋转角度
"noise_line_range": (2, 5), # 干扰线数量
"noise_point_num": 100, # 噪点数量
"blur_radius": 0.8, # 高斯模糊半径
"image_size": (120, 40), # 生成图片尺寸 (W, H)
},
"math": {
"operand_range": (1, 30), # 操作数范围
"operators": ["+", "-", "×"], # 支持的运算符 (除法只生成能整除的)
"image_size": (160, 40), # 生成图片尺寸 (W, H)
"bg_color_range": (230, 255),
"rotation_range": (-10, 10),
"noise_line_range": (2, 4),
},
"3d": {
"char_count_range": (4, 5),
"image_size": (160, 60), # 生成图片尺寸 (W, H)
"shadow_offset": (3, 3), # 阴影偏移
"perspective_intensity": 0.3, # 透视变换强度
},
}
# ============================================================
# 训练配置
# ============================================================
TRAIN_CONFIG = {
"classifier": {
"epochs": 30,
"batch_size": 128,
"lr": 1e-3,
"scheduler": "cosine",
"synthetic_samples": 30000, # 每类 10000
"val_split": 0.1, # 验证集比例
},
"normal": {
"epochs": 50,
"batch_size": 128,
"lr": 1e-3,
"scheduler": "cosine",
"synthetic_samples": 60000,
"loss": "CTCLoss",
"val_split": 0.1,
},
"math": {
"epochs": 50,
"batch_size": 128,
"lr": 1e-3,
"scheduler": "cosine",
"synthetic_samples": 60000,
"loss": "CTCLoss",
"val_split": 0.1,
},
"threed": {
"epochs": 80,
"batch_size": 64,
"lr": 5e-4,
"scheduler": "cosine",
"synthetic_samples": 80000,
"loss": "CTCLoss",
"val_split": 0.1,
},
}
# ============================================================
# 数据增强参数 (训练时使用)
# ============================================================
AUGMENT_CONFIG = {
"degrees": 8, # RandomAffine 旋转范围
"translate": (0.05, 0.05), # 平移范围
"scale": (0.95, 1.05), # 缩放范围
"brightness": 0.3, # ColorJitter 亮度
"contrast": 0.3, # ColorJitter 对比度
"blur_kernel": 3, # GaussianBlur 核大小
"blur_sigma": (0.1, 0.5), # GaussianBlur sigma
"erasing_prob": 0.15, # RandomErasing 概率
"erasing_scale": (0.01, 0.05), # RandomErasing 面积比
}
# ============================================================
# ONNX 导出配置
# ============================================================
ONNX_CONFIG = {
"opset_version": 18,
"dynamic_batch": True, # 支持动态 batch size
}
# ============================================================
# 推理配置
# ============================================================
INFERENCE_CONFIG = {
"default_models_dir": str(ONNX_DIR),
"normalize_mean": 0.5,
"normalize_std": 0.5,
}
# ============================================================
# 随机种子 (保证数据生成可复现)
# ============================================================
RANDOM_SEED = 42
# ============================================================
# 设备配置 (优先 GPU回退 CPU)
# 延迟导入 torch避免仅使用生成器时必须安装 torch
# ============================================================
def get_device():
"""返回可用的 torch 设备,优先 GPU。"""
import torch
return torch.device("cuda" if torch.cuda.is_available() else "cpu")
# ============================================================
# 服务配置 (可选 HTTP 服务)
# ============================================================
SERVER_CONFIG = {
"host": "0.0.0.0",
"port": 8080,
}