Initialize repository
This commit is contained in:
195
config.py
Normal file
195
config.py
Normal file
@@ -0,0 +1,195 @@
|
||||
"""
|
||||
全局配置 - 验证码识别多模型系统 (CaptchaBreaker)
|
||||
|
||||
定义字符集、图片尺寸、路径、训练超参等所有全局常量。
|
||||
"""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
# ============================================================
|
||||
# 项目根目录
|
||||
# ============================================================
|
||||
PROJECT_ROOT = Path(__file__).resolve().parent
|
||||
|
||||
# ============================================================
|
||||
# 数据目录
|
||||
# ============================================================
|
||||
DATA_DIR = PROJECT_ROOT / "data"
|
||||
SYNTHETIC_DIR = DATA_DIR / "synthetic"
|
||||
REAL_DIR = DATA_DIR / "real"
|
||||
CLASSIFIER_DIR = DATA_DIR / "classifier"
|
||||
|
||||
# 合成数据子目录
|
||||
SYNTHETIC_NORMAL_DIR = SYNTHETIC_DIR / "normal"
|
||||
SYNTHETIC_MATH_DIR = SYNTHETIC_DIR / "math"
|
||||
SYNTHETIC_3D_DIR = SYNTHETIC_DIR / "3d"
|
||||
|
||||
# 真实数据子目录
|
||||
REAL_NORMAL_DIR = REAL_DIR / "normal"
|
||||
REAL_MATH_DIR = REAL_DIR / "math"
|
||||
REAL_3D_DIR = REAL_DIR / "3d"
|
||||
|
||||
# ============================================================
|
||||
# 模型输出目录
|
||||
# ============================================================
|
||||
CHECKPOINTS_DIR = PROJECT_ROOT / "checkpoints"
|
||||
ONNX_DIR = PROJECT_ROOT / "onnx_models"
|
||||
|
||||
# 确保关键目录存在
|
||||
for _dir in [
|
||||
SYNTHETIC_NORMAL_DIR, SYNTHETIC_MATH_DIR, SYNTHETIC_3D_DIR,
|
||||
REAL_NORMAL_DIR, REAL_MATH_DIR, REAL_3D_DIR,
|
||||
CLASSIFIER_DIR, CHECKPOINTS_DIR, ONNX_DIR,
|
||||
]:
|
||||
_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# ============================================================
|
||||
# 字符集定义
|
||||
# ============================================================
|
||||
# 普通字符验证码: 按当前本地配置保留易混淆字符,覆盖完整数字 + 大写字母
|
||||
NORMAL_CHARS = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
|
||||
# 算式验证码: 数字 + 运算符
|
||||
MATH_CHARS = "0123456789+-×÷=?"
|
||||
|
||||
# 3D 验证码: 继续使用去掉易混淆字符的精简字符集
|
||||
THREED_CHARS = "23456789ABCDEFGHJKMNPQRSTUVWXYZ"
|
||||
|
||||
# 验证码类型列表 (调度分类器输出)
|
||||
CAPTCHA_TYPES = ["normal", "math", "3d"]
|
||||
NUM_CAPTCHA_TYPES = len(CAPTCHA_TYPES)
|
||||
|
||||
# ============================================================
|
||||
# 图片尺寸配置 (H, W)
|
||||
# ============================================================
|
||||
IMAGE_SIZE = {
|
||||
"classifier": (64, 128), # 调度分类器输入
|
||||
"normal": (40, 120), # 普通字符识别
|
||||
"math": (40, 160), # 算式识别 (更宽)
|
||||
"3d": (60, 160), # 3D 立体识别
|
||||
}
|
||||
|
||||
# ============================================================
|
||||
# 验证码生成参数
|
||||
# ============================================================
|
||||
GENERATE_CONFIG = {
|
||||
"normal": {
|
||||
"char_count_range": (4, 5), # 字符数量: 4-5 个
|
||||
"bg_color_range": (230, 255), # 浅色背景 RGB 各通道
|
||||
"rotation_range": (-15, 15), # 字符旋转角度
|
||||
"noise_line_range": (2, 5), # 干扰线数量
|
||||
"noise_point_num": 100, # 噪点数量
|
||||
"blur_radius": 0.8, # 高斯模糊半径
|
||||
"image_size": (120, 40), # 生成图片尺寸 (W, H)
|
||||
},
|
||||
"math": {
|
||||
"operand_range": (1, 30), # 操作数范围
|
||||
"operators": ["+", "-", "×"], # 支持的运算符 (除法只生成能整除的)
|
||||
"image_size": (160, 40), # 生成图片尺寸 (W, H)
|
||||
"bg_color_range": (230, 255),
|
||||
"rotation_range": (-10, 10),
|
||||
"noise_line_range": (2, 4),
|
||||
},
|
||||
"3d": {
|
||||
"char_count_range": (4, 5),
|
||||
"image_size": (160, 60), # 生成图片尺寸 (W, H)
|
||||
"shadow_offset": (3, 3), # 阴影偏移
|
||||
"perspective_intensity": 0.3, # 透视变换强度
|
||||
},
|
||||
}
|
||||
|
||||
# ============================================================
|
||||
# 训练配置
|
||||
# ============================================================
|
||||
TRAIN_CONFIG = {
|
||||
"classifier": {
|
||||
"epochs": 30,
|
||||
"batch_size": 128,
|
||||
"lr": 1e-3,
|
||||
"scheduler": "cosine",
|
||||
"synthetic_samples": 30000, # 每类 10000
|
||||
"val_split": 0.1, # 验证集比例
|
||||
},
|
||||
"normal": {
|
||||
"epochs": 50,
|
||||
"batch_size": 128,
|
||||
"lr": 1e-3,
|
||||
"scheduler": "cosine",
|
||||
"synthetic_samples": 60000,
|
||||
"loss": "CTCLoss",
|
||||
"val_split": 0.1,
|
||||
},
|
||||
"math": {
|
||||
"epochs": 50,
|
||||
"batch_size": 128,
|
||||
"lr": 1e-3,
|
||||
"scheduler": "cosine",
|
||||
"synthetic_samples": 60000,
|
||||
"loss": "CTCLoss",
|
||||
"val_split": 0.1,
|
||||
},
|
||||
"threed": {
|
||||
"epochs": 80,
|
||||
"batch_size": 64,
|
||||
"lr": 5e-4,
|
||||
"scheduler": "cosine",
|
||||
"synthetic_samples": 80000,
|
||||
"loss": "CTCLoss",
|
||||
"val_split": 0.1,
|
||||
},
|
||||
}
|
||||
|
||||
# ============================================================
|
||||
# 数据增强参数 (训练时使用)
|
||||
# ============================================================
|
||||
AUGMENT_CONFIG = {
|
||||
"degrees": 8, # RandomAffine 旋转范围
|
||||
"translate": (0.05, 0.05), # 平移范围
|
||||
"scale": (0.95, 1.05), # 缩放范围
|
||||
"brightness": 0.3, # ColorJitter 亮度
|
||||
"contrast": 0.3, # ColorJitter 对比度
|
||||
"blur_kernel": 3, # GaussianBlur 核大小
|
||||
"blur_sigma": (0.1, 0.5), # GaussianBlur sigma
|
||||
"erasing_prob": 0.15, # RandomErasing 概率
|
||||
"erasing_scale": (0.01, 0.05), # RandomErasing 面积比
|
||||
}
|
||||
|
||||
# ============================================================
|
||||
# ONNX 导出配置
|
||||
# ============================================================
|
||||
ONNX_CONFIG = {
|
||||
"opset_version": 18,
|
||||
"dynamic_batch": True, # 支持动态 batch size
|
||||
}
|
||||
|
||||
# ============================================================
|
||||
# 推理配置
|
||||
# ============================================================
|
||||
INFERENCE_CONFIG = {
|
||||
"default_models_dir": str(ONNX_DIR),
|
||||
"normalize_mean": 0.5,
|
||||
"normalize_std": 0.5,
|
||||
}
|
||||
|
||||
# ============================================================
|
||||
# 随机种子 (保证数据生成可复现)
|
||||
# ============================================================
|
||||
RANDOM_SEED = 42
|
||||
|
||||
# ============================================================
|
||||
# 设备配置 (优先 GPU,回退 CPU)
|
||||
# 延迟导入 torch,避免仅使用生成器时必须安装 torch
|
||||
# ============================================================
|
||||
def get_device():
|
||||
"""返回可用的 torch 设备,优先 GPU。"""
|
||||
import torch
|
||||
return torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
|
||||
# ============================================================
|
||||
# 服务配置 (可选 HTTP 服务)
|
||||
# ============================================================
|
||||
SERVER_CONFIG = {
|
||||
"host": "0.0.0.0",
|
||||
"port": 8080,
|
||||
}
|
||||
Reference in New Issue
Block a user