Files
CaptchBreaker/inference/export_onnx.py
Hua f5be7671bc Expand 3D captcha into three subtypes: 3d_text, 3d_rotate, 3d_slider
Split the single "3d" captcha type into three independent expert models:
- 3d_text: 3D perspective text OCR (renamed from old "3d", CTC-based ThreeDCNN)
- 3d_rotate: rotation angle regression (new RegressionCNN, circular loss)
- 3d_slider: slider offset regression (new RegressionCNN, SmoothL1 loss)

CAPTCHA_TYPES expanded from 3 to 5 classes. Classifier samples updated
to 50000 (10000 per class). New generators, model, dataset, training
utilities, and full pipeline/export/CLI support for all subtypes.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-11 13:55:53 +08:00

133 lines
4.1 KiB
Python

"""
ONNX 导出脚本
从 checkpoints/ 加载训练好的 PyTorch 模型,导出为 ONNX 格式到 onnx_models/。
支持逐个导出或一次导出全部。
"""
import torch
import torch.nn as nn
from config import (
CHECKPOINTS_DIR,
ONNX_DIR,
ONNX_CONFIG,
IMAGE_SIZE,
NORMAL_CHARS,
MATH_CHARS,
THREED_CHARS,
NUM_CAPTCHA_TYPES,
REGRESSION_RANGE,
)
from models.classifier import CaptchaClassifier
from models.lite_crnn import LiteCRNN
from models.threed_cnn import ThreeDCNN
from models.regression_cnn import RegressionCNN
def export_model(
model: nn.Module,
model_name: str,
input_shape: tuple,
onnx_dir: str | None = None,
):
"""
导出单个模型为 ONNX。
Args:
model: 已加载权重的 PyTorch 模型
model_name: 模型名 (classifier / normal / math / threed_text / threed_rotate / threed_slider)
input_shape: 输入形状 (C, H, W)
onnx_dir: 输出目录 (默认使用 config.ONNX_DIR)
"""
from pathlib import Path
out_dir = Path(onnx_dir) if onnx_dir else ONNX_DIR
out_dir.mkdir(parents=True, exist_ok=True)
onnx_path = out_dir / f"{model_name}.onnx"
model.eval()
model.cpu()
dummy = torch.randn(1, *input_shape)
# 分类器和识别器的 dynamic_axes 不同
if model_name == "classifier" or model_name in ("threed_rotate", "threed_slider"):
dynamic_axes = {"input": {0: "batch"}, "output": {0: "batch"}}
else:
# CTC 模型: output shape = (T, B, C)
dynamic_axes = {"input": {0: "batch"}, "output": {1: "batch"}}
torch.onnx.export(
model,
dummy,
str(onnx_path),
opset_version=ONNX_CONFIG["opset_version"],
input_names=["input"],
output_names=["output"],
dynamic_axes=dynamic_axes if ONNX_CONFIG["dynamic_batch"] else None,
)
size_kb = onnx_path.stat().st_size / 1024
print(f"[ONNX] 导出完成: {onnx_path} ({size_kb:.1f} KB)")
def _load_and_export(model_name: str):
"""从 checkpoint 加载模型并导出 ONNX。"""
ckpt_path = CHECKPOINTS_DIR / f"{model_name}.pth"
if not ckpt_path.exists():
print(f"[跳过] {model_name}: checkpoint 不存在 ({ckpt_path})")
return
ckpt = torch.load(ckpt_path, map_location="cpu", weights_only=True)
acc_info = ckpt.get('best_acc') or ckpt.get('best_tol_acc', '?')
print(f"[加载] {model_name}: epoch={ckpt.get('epoch', '?')} acc={acc_info}")
if model_name == "classifier":
model = CaptchaClassifier(num_types=NUM_CAPTCHA_TYPES)
h, w = IMAGE_SIZE["classifier"]
input_shape = (1, h, w)
elif model_name == "normal":
chars = ckpt.get("chars", NORMAL_CHARS)
h, w = IMAGE_SIZE["normal"]
model = LiteCRNN(chars=chars, img_h=h, img_w=w)
input_shape = (1, h, w)
elif model_name == "math":
chars = ckpt.get("chars", MATH_CHARS)
h, w = IMAGE_SIZE["math"]
model = LiteCRNN(chars=chars, img_h=h, img_w=w)
input_shape = (1, h, w)
elif model_name == "threed_text":
chars = ckpt.get("chars", THREED_CHARS)
h, w = IMAGE_SIZE["3d_text"]
model = ThreeDCNN(chars=chars, img_h=h, img_w=w)
input_shape = (1, h, w)
elif model_name == "threed_rotate":
h, w = IMAGE_SIZE["3d_rotate"]
model = RegressionCNN(img_h=h, img_w=w)
input_shape = (1, h, w)
elif model_name == "threed_slider":
h, w = IMAGE_SIZE["3d_slider"]
model = RegressionCNN(img_h=h, img_w=w)
input_shape = (1, h, w)
else:
print(f"[错误] 未知模型: {model_name}")
return
model.load_state_dict(ckpt["model_state_dict"])
export_model(model, model_name, input_shape)
def export_all():
"""依次导出 classifier, normal, math, threed_text, threed_rotate, threed_slider 六个模型。"""
print("=" * 50)
print("导出全部 ONNX 模型")
print("=" * 50)
for name in ["classifier", "normal", "math", "threed_text", "threed_rotate", "threed_slider"]:
_load_and_export(name)
print("\n全部导出完成。")
if __name__ == "__main__":
export_all()