Add slide and rotate interactive captcha solvers
New solver subsystem with independent models: - GapDetectorCNN (1x128x256 grayscale → sigmoid) for slide gap detection - RotationRegressor (3x128x128 RGB → sin/cos via tanh) for rotation angle prediction - SlideSolver with 3-tier strategy: template match → edge detect → CNN fallback - RotateSolver with ONNX sin/cos → atan2 inference - Generators, training scripts, CLI commands, and slide track utility Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
17
solvers/__init__.py
Normal file
17
solvers/__init__.py
Normal file
@@ -0,0 +1,17 @@
|
||||
"""
|
||||
验证码求解器包
|
||||
|
||||
提供两种交互式验证码求解器:
|
||||
- SlideSolver: 滑块验证码求解 (OpenCV 优先 + CNN 兜底)
|
||||
- RotateSolver: 旋转验证码求解 (ONNX sin/cos 回归)
|
||||
"""
|
||||
|
||||
from solvers.base import BaseSolver
|
||||
from solvers.slide_solver import SlideSolver
|
||||
from solvers.rotate_solver import RotateSolver
|
||||
|
||||
__all__ = [
|
||||
"BaseSolver",
|
||||
"SlideSolver",
|
||||
"RotateSolver",
|
||||
]
|
||||
21
solvers/base.py
Normal file
21
solvers/base.py
Normal file
@@ -0,0 +1,21 @@
|
||||
"""
|
||||
求解器基类
|
||||
"""
|
||||
|
||||
from PIL import Image
|
||||
|
||||
|
||||
class BaseSolver:
|
||||
"""验证码求解器基类。"""
|
||||
|
||||
def solve(self, image: Image.Image, **kwargs) -> dict:
|
||||
"""
|
||||
求解验证码。
|
||||
|
||||
Args:
|
||||
image: 输入图片
|
||||
|
||||
Returns:
|
||||
包含求解结果的字典
|
||||
"""
|
||||
raise NotImplementedError
|
||||
80
solvers/rotate_solver.py
Normal file
80
solvers/rotate_solver.py
Normal file
@@ -0,0 +1,80 @@
|
||||
"""
|
||||
旋转验证码求解器
|
||||
|
||||
ONNX 推理 → (sin, cos) → atan2 → 角度
|
||||
"""
|
||||
|
||||
import math
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
from config import ONNX_DIR, SOLVER_CONFIG
|
||||
from solvers.base import BaseSolver
|
||||
|
||||
|
||||
class RotateSolver(BaseSolver):
|
||||
"""旋转验证码求解器。"""
|
||||
|
||||
def __init__(self, onnx_path: str | Path | None = None):
|
||||
self.cfg = SOLVER_CONFIG["rotate"]
|
||||
self._onnx_session = None
|
||||
self._onnx_path = Path(onnx_path) if onnx_path else ONNX_DIR / "rotation_regressor.onnx"
|
||||
|
||||
def _load_onnx(self):
|
||||
"""延迟加载 ONNX 模型。"""
|
||||
if self._onnx_session is not None:
|
||||
return
|
||||
if not self._onnx_path.exists():
|
||||
raise FileNotFoundError(f"ONNX 模型不存在: {self._onnx_path}")
|
||||
import onnxruntime as ort
|
||||
self._onnx_session = ort.InferenceSession(
|
||||
str(self._onnx_path), providers=["CPUExecutionProvider"]
|
||||
)
|
||||
|
||||
def solve(self, image: Image.Image | str | Path, **kwargs) -> dict:
|
||||
"""
|
||||
求解旋转验证码。
|
||||
|
||||
Args:
|
||||
image: 输入图片 (RGB)
|
||||
|
||||
Returns:
|
||||
{"angle": float, "confidence": float}
|
||||
"""
|
||||
if isinstance(image, (str, Path)):
|
||||
image = Image.open(str(image)).convert("RGB")
|
||||
else:
|
||||
image = image.convert("RGB")
|
||||
|
||||
self._load_onnx()
|
||||
|
||||
h, w = self.cfg["input_size"]
|
||||
|
||||
# 预处理: RGB resize + normalize
|
||||
img = image.resize((w, h))
|
||||
arr = np.array(img, dtype=np.float32) / 255.0
|
||||
# Normalize per channel: (x - 0.5) / 0.5
|
||||
arr = (arr - 0.5) / 0.5
|
||||
# HWC → CHW → NCHW
|
||||
arr = arr.transpose(2, 0, 1)[np.newaxis, :, :, :]
|
||||
|
||||
outputs = self._onnx_session.run(None, {"input": arr})
|
||||
sin_val = float(outputs[0][0][0])
|
||||
cos_val = float(outputs[0][0][1])
|
||||
|
||||
# atan2 → 角度
|
||||
angle_rad = math.atan2(sin_val, cos_val)
|
||||
angle_deg = math.degrees(angle_rad)
|
||||
if angle_deg < 0:
|
||||
angle_deg += 360.0
|
||||
|
||||
# 置信度: sin^2 + cos^2 接近 1 表示预测稳定
|
||||
magnitude = math.sqrt(sin_val ** 2 + cos_val ** 2)
|
||||
confidence = min(magnitude, 1.0)
|
||||
|
||||
return {
|
||||
"angle": round(angle_deg, 1),
|
||||
"confidence": round(confidence, 3),
|
||||
}
|
||||
179
solvers/slide_solver.py
Normal file
179
solvers/slide_solver.py
Normal file
@@ -0,0 +1,179 @@
|
||||
"""
|
||||
滑块验证码求解器
|
||||
|
||||
三种求解方法 (按优先级):
|
||||
1. 模板匹配: 背景图 + 模板图 → Canny → matchTemplate
|
||||
2. 边缘检测: 单图 Canny → findContours → 筛选方形轮廓
|
||||
3. CNN 兜底: ONNX 推理 → sigmoid → x 百分比 → 像素
|
||||
|
||||
OpenCV 延迟导入,未安装时退化到 CNN only。
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
from config import ONNX_DIR, SOLVER_CONFIG
|
||||
from solvers.base import BaseSolver
|
||||
|
||||
|
||||
class SlideSolver(BaseSolver):
|
||||
"""滑块验证码求解器。"""
|
||||
|
||||
def __init__(self, onnx_path: str | Path | None = None):
|
||||
self.cfg = SOLVER_CONFIG["slide"]
|
||||
self._onnx_session = None
|
||||
self._onnx_path = Path(onnx_path) if onnx_path else ONNX_DIR / "gap_detector.onnx"
|
||||
|
||||
# 检测 OpenCV 可用性
|
||||
self._cv2_available = False
|
||||
try:
|
||||
import cv2
|
||||
self._cv2_available = True
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
def _load_onnx(self):
|
||||
"""延迟加载 ONNX 模型。"""
|
||||
if self._onnx_session is not None:
|
||||
return
|
||||
if not self._onnx_path.exists():
|
||||
raise FileNotFoundError(f"ONNX 模型不存在: {self._onnx_path}")
|
||||
import onnxruntime as ort
|
||||
self._onnx_session = ort.InferenceSession(
|
||||
str(self._onnx_path), providers=["CPUExecutionProvider"]
|
||||
)
|
||||
|
||||
def solve(
|
||||
self,
|
||||
bg_image: Image.Image | str | Path,
|
||||
template_image: Image.Image | str | Path | None = None,
|
||||
**kwargs,
|
||||
) -> dict:
|
||||
"""
|
||||
求解滑块验证码。
|
||||
|
||||
Args:
|
||||
bg_image: 背景图 (必需)
|
||||
template_image: 模板/拼图块图 (可选,有则优先模板匹配)
|
||||
|
||||
Returns:
|
||||
{"gap_x": int, "gap_x_percent": float, "confidence": float, "method": str}
|
||||
"""
|
||||
bg = self._load_image(bg_image)
|
||||
|
||||
# 方法 1: 模板匹配
|
||||
if template_image is not None and self._cv2_available:
|
||||
tpl = self._load_image(template_image)
|
||||
result = self._template_match(bg, tpl)
|
||||
if result is not None:
|
||||
return result
|
||||
|
||||
# 方法 2: 边缘检测
|
||||
if self._cv2_available:
|
||||
result = self._edge_detect(bg)
|
||||
if result is not None:
|
||||
return result
|
||||
|
||||
# 方法 3: CNN 兜底
|
||||
return self._cnn_predict(bg)
|
||||
|
||||
def _load_image(self, img: Image.Image | str | Path) -> Image.Image:
|
||||
if isinstance(img, (str, Path)):
|
||||
return Image.open(str(img)).convert("RGB")
|
||||
return img.convert("RGB")
|
||||
|
||||
def _template_match(self, bg: Image.Image, tpl: Image.Image) -> dict | None:
|
||||
"""模板匹配法。"""
|
||||
import cv2
|
||||
|
||||
bg_gray = np.array(bg.convert("L"))
|
||||
tpl_gray = np.array(tpl.convert("L"))
|
||||
|
||||
# Canny 边缘
|
||||
bg_edges = cv2.Canny(bg_gray, self.cfg["canny_low"], self.cfg["canny_high"])
|
||||
tpl_edges = cv2.Canny(tpl_gray, self.cfg["canny_low"], self.cfg["canny_high"])
|
||||
|
||||
if tpl_edges.sum() == 0:
|
||||
return None
|
||||
|
||||
result = cv2.matchTemplate(bg_edges, tpl_edges, cv2.TM_CCOEFF_NORMED)
|
||||
_, max_val, _, max_loc = cv2.minMaxLoc(result)
|
||||
|
||||
if max_val < 0.3:
|
||||
return None
|
||||
|
||||
gap_x = max_loc[0] + tpl_gray.shape[1] // 2
|
||||
return {
|
||||
"gap_x": int(gap_x),
|
||||
"gap_x_percent": gap_x / bg_gray.shape[1],
|
||||
"confidence": float(max_val),
|
||||
"method": "template_match",
|
||||
}
|
||||
|
||||
def _edge_detect(self, bg: Image.Image) -> dict | None:
|
||||
"""边缘检测法:找方形轮廓。"""
|
||||
import cv2
|
||||
|
||||
bg_gray = np.array(bg.convert("L"))
|
||||
h, w = bg_gray.shape
|
||||
|
||||
edges = cv2.Canny(bg_gray, self.cfg["canny_low"], self.cfg["canny_high"])
|
||||
contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
best = None
|
||||
best_score = 0
|
||||
|
||||
for cnt in contours:
|
||||
area = cv2.contourArea(cnt)
|
||||
# 面积筛选: 缺口大小在合理范围
|
||||
if area < (h * w * 0.005) or area > (h * w * 0.15):
|
||||
continue
|
||||
|
||||
x, y, cw, ch = cv2.boundingRect(cnt)
|
||||
aspect = min(cw, ch) / max(cw, ch) if max(cw, ch) > 0 else 0
|
||||
# 近似方形
|
||||
if aspect < 0.5:
|
||||
continue
|
||||
|
||||
# 评分: 面积适中 + 近似方形
|
||||
score = aspect * (area / (h * w * 0.05))
|
||||
if score > best_score:
|
||||
best_score = score
|
||||
best = (x + cw // 2, cw, ch, score)
|
||||
|
||||
if best is None:
|
||||
return None
|
||||
|
||||
gap_x, _, _, score = best
|
||||
return {
|
||||
"gap_x": int(gap_x),
|
||||
"gap_x_percent": gap_x / w,
|
||||
"confidence": min(float(score), 1.0),
|
||||
"method": "edge_detect",
|
||||
}
|
||||
|
||||
def _cnn_predict(self, bg: Image.Image) -> dict:
|
||||
"""CNN 推理兜底。"""
|
||||
self._load_onnx()
|
||||
|
||||
h, w = self.cfg["cnn_input_size"]
|
||||
orig_w = bg.width
|
||||
|
||||
# 预处理: 灰度 + resize + normalize
|
||||
img = bg.convert("L").resize((w, h))
|
||||
arr = np.array(img, dtype=np.float32) / 255.0
|
||||
arr = (arr - 0.5) / 0.5
|
||||
arr = arr[np.newaxis, np.newaxis, :, :] # (1, 1, H, W)
|
||||
|
||||
outputs = self._onnx_session.run(None, {"input": arr})
|
||||
percent = float(outputs[0][0][0])
|
||||
|
||||
gap_x = int(percent * orig_w)
|
||||
return {
|
||||
"gap_x": gap_x,
|
||||
"gap_x_percent": percent,
|
||||
"confidence": 0.5, # CNN 无置信度
|
||||
"method": "cnn",
|
||||
}
|
||||
Reference in New Issue
Block a user