Add slide and rotate interactive captcha solvers

New solver subsystem with independent models:
- GapDetectorCNN (1x128x256 grayscale → sigmoid) for slide gap detection
- RotationRegressor (3x128x128 RGB → sin/cos via tanh) for rotation angle prediction
- SlideSolver with 3-tier strategy: template match → edge detect → CNN fallback
- RotateSolver with ONNX sin/cos → atan2 inference
- Generators, training scripts, CLI commands, and slide track utility

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Hua
2026-03-11 18:07:06 +08:00
parent 90d6423551
commit 9b5f29083e
20 changed files with 1440 additions and 10 deletions

108
cli.py
View File

@@ -13,6 +13,11 @@ CaptchaBreaker 命令行入口
python cli.py predict image.png --type normal
python cli.py predict-dir ./test_images/
python cli.py serve --port 8080
python cli.py generate-solver slide --num 30000
python cli.py train-solver slide
python cli.py train-solver rotate
python cli.py solve slide --bg bg.png [--tpl tpl.png]
python cli.py solve rotate --image img.png
"""
import argparse
@@ -195,6 +200,90 @@ def cmd_serve(args):
uvicorn.run(app, host=args.host, port=args.port)
def cmd_generate_solver(args):
"""生成 solver 训练数据。"""
from config import SLIDE_DATA_DIR, ROTATE_SOLVER_DATA_DIR
from generators.slide_gen import SlideDataGenerator
from generators.rotate_solver_gen import RotateSolverDataGenerator
solver_type = args.type
num = args.num
gen_map = {
"slide": (SlideDataGenerator, SLIDE_DATA_DIR),
"rotate": (RotateSolverDataGenerator, ROTATE_SOLVER_DATA_DIR),
}
if solver_type not in gen_map:
print(f"未知 solver 类型: {solver_type} 可选: {', '.join(gen_map.keys())}")
sys.exit(1)
gen_cls, out_dir = gen_map[solver_type]
out_dir.mkdir(parents=True, exist_ok=True)
print(f"生成 solver/{solver_type} 数据: {num} 张 → {out_dir}")
gen = gen_cls()
gen.generate_dataset(num, str(out_dir))
def cmd_train_solver(args):
"""训练 solver 模型。"""
solver_type = args.type
if solver_type == "slide":
from training.train_slide import main as train_fn
elif solver_type == "rotate":
from training.train_rotate_solver import main as train_fn
else:
print(f"未知 solver 类型: {solver_type} 可选: slide, rotate")
sys.exit(1)
train_fn()
def cmd_solve(args):
"""求解验证码。"""
solver_type = args.type
if solver_type == "slide":
from solvers.slide_solver import SlideSolver
bg_path = args.bg
tpl_path = getattr(args, "tpl", None)
if not Path(bg_path).exists():
print(f"文件不存在: {bg_path}")
sys.exit(1)
solver = SlideSolver()
result = solver.solve(bg_path, template_image=tpl_path)
print(f"背景图: {bg_path}")
if tpl_path:
print(f"模板图: {tpl_path}")
print(f"缺口 x: {result['gap_x']} px")
print(f"缺口 x%: {result['gap_x_percent']:.4f}")
print(f"置信度: {result['confidence']:.4f}")
print(f"方法: {result['method']}")
elif solver_type == "rotate":
from solvers.rotate_solver import RotateSolver
image_path = args.image
if not Path(image_path).exists():
print(f"文件不存在: {image_path}")
sys.exit(1)
solver = RotateSolver()
result = solver.solve(image_path)
print(f"图片: {image_path}")
print(f"角度: {result['angle']}°")
print(f"置信度: {result['confidence']}")
else:
print(f"未知 solver 类型: {solver_type} 可选: slide, rotate")
sys.exit(1)
def main():
parser = argparse.ArgumentParser(
prog="captcha-breaker",
@@ -247,6 +336,22 @@ def main():
p_serve.add_argument("--host", default="0.0.0.0", help="监听地址 (默认 0.0.0.0)")
p_serve.add_argument("--port", type=int, default=8080, help="监听端口 (默认 8080)")
# ---- generate-solver ----
p_gen_solver = subparsers.add_parser("generate-solver", help="生成 solver 训练数据")
p_gen_solver.add_argument("type", help="solver 类型: slide, rotate")
p_gen_solver.add_argument("--num", type=int, required=True, help="生成数量")
# ---- train-solver ----
p_train_solver = subparsers.add_parser("train-solver", help="训练 solver 模型")
p_train_solver.add_argument("type", help="solver 类型: slide, rotate")
# ---- solve ----
p_solve = subparsers.add_parser("solve", help="求解交互式验证码")
p_solve.add_argument("type", help="solver 类型: slide, rotate")
p_solve.add_argument("--bg", help="背景图路径 (slide 必需)")
p_solve.add_argument("--tpl", default=None, help="模板图路径 (slide 可选)")
p_solve.add_argument("--image", help="图片路径 (rotate 必需)")
args = parser.parse_args()
if args.command is None:
@@ -260,6 +365,9 @@ def main():
"predict": cmd_predict,
"predict-dir": cmd_predict_dir,
"serve": cmd_serve,
"generate-solver": cmd_generate_solver,
"train-solver": cmd_train_solver,
"solve": cmd_solve,
}
cmd_map[args.command](args)