CaptchBreaker/solvers/slide_solver.py

"""
滑块验证码求解器

三种求解方法 (按优先级):
1. 模板匹配: 背景图 + 模板图 → Canny → matchTemplate
2. 边缘检测: 单图 Canny → findContours → 筛选方形轮廓
3. CNN 兜底: ONNX 推理 → sigmoid → x 百分比 → 像素

OpenCV 延迟导入，未安装时退化到 CNN only。
"""

from pathlib import Path

import numpy as np
from PIL import Image

from config import ONNX_DIR, SOLVER_CONFIG
from solvers.base import BaseSolver


class SlideSolver(BaseSolver):
    """滑块验证码求解器。"""

    def __init__(self, onnx_path: str | Path | None = None):
        self.cfg = SOLVER_CONFIG["slide"]
        self._onnx_session = None
        self._onnx_path = Path(onnx_path) if onnx_path else ONNX_DIR / "gap_detector.onnx"

        # 检测 OpenCV 可用性
        self._cv2_available = False
        try:
            import cv2
            self._cv2_available = True
        except ImportError:
            pass

    def _load_onnx(self):
        """延迟加载 ONNX 模型。"""
        if self._onnx_session is not None:
            return
        if not self._onnx_path.exists():
            raise FileNotFoundError(f"ONNX 模型不存在: {self._onnx_path}")
        import onnxruntime as ort
        self._onnx_session = ort.InferenceSession(
            str(self._onnx_path), providers=["CPUExecutionProvider"]
        )

    def solve(
        self,
        bg_image: Image.Image | str | Path,
        template_image: Image.Image | str | Path | None = None,
        **kwargs,
    ) -> dict:
        """
        求解滑块验证码。

        Args:
            bg_image:       背景图 (必需)
            template_image: 模板/拼图块图 (可选，有则优先模板匹配)

        Returns:
            {"gap_x": int, "gap_x_percent": float, "confidence": float, "method": str}
        """
        bg = self._load_image(bg_image)

        # 方法 1: 模板匹配
        if template_image is not None and self._cv2_available:
            tpl = self._load_image(template_image)
            result = self._template_match(bg, tpl)
            if result is not None:
                return result

        # 方法 2: 边缘检测
        if self._cv2_available:
            result = self._edge_detect(bg)
            if result is not None:
                return result

        # 方法 3: CNN 兜底
        return self._cnn_predict(bg)

    def _load_image(self, img: Image.Image | str | Path) -> Image.Image:
        if isinstance(img, (str, Path)):
            return Image.open(str(img)).convert("RGB")
        return img.convert("RGB")

    def _template_match(self, bg: Image.Image, tpl: Image.Image) -> dict | None:
        """模板匹配法。"""
        import cv2

        bg_gray = np.array(bg.convert("L"))
        tpl_gray = np.array(tpl.convert("L"))

        # Canny 边缘
        bg_edges = cv2.Canny(bg_gray, self.cfg["canny_low"], self.cfg["canny_high"])
        tpl_edges = cv2.Canny(tpl_gray, self.cfg["canny_low"], self.cfg["canny_high"])

        if tpl_edges.sum() == 0:
            return None

        result = cv2.matchTemplate(bg_edges, tpl_edges, cv2.TM_CCOEFF_NORMED)
        _, max_val, _, max_loc = cv2.minMaxLoc(result)

        if max_val < 0.3:
            return None

        gap_x = max_loc[0] + tpl_gray.shape[1] // 2
        return {
            "gap_x": int(gap_x),
            "gap_x_percent": gap_x / bg_gray.shape[1],
            "confidence": float(max_val),
            "method": "template_match",
        }

    def _edge_detect(self, bg: Image.Image) -> dict | None:
        """边缘检测法：找方形轮廓。"""
        import cv2

        bg_gray = np.array(bg.convert("L"))
        h, w = bg_gray.shape

        edges = cv2.Canny(bg_gray, self.cfg["canny_low"], self.cfg["canny_high"])
        contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        best = None
        best_score = 0

        for cnt in contours:
            area = cv2.contourArea(cnt)
            # 面积筛选: 缺口大小在合理范围
            if area < (h * w * 0.005) or area > (h * w * 0.15):
                continue

            x, y, cw, ch = cv2.boundingRect(cnt)
            aspect = min(cw, ch) / max(cw, ch) if max(cw, ch) > 0 else 0
            # 近似方形
            if aspect < 0.5:
                continue

            # 评分: 面积适中 + 近似方形
            score = aspect * (area / (h * w * 0.05))
            if score > best_score:
                best_score = score
                best = (x + cw // 2, cw, ch, score)

        if best is None:
            return None

        gap_x, _, _, score = best
        return {
            "gap_x": int(gap_x),
            "gap_x_percent": gap_x / w,
            "confidence": min(float(score), 1.0),
            "method": "edge_detect",
        }

    def _cnn_predict(self, bg: Image.Image) -> dict:
        """CNN 推理兜底。"""
        self._load_onnx()

        h, w = self.cfg["cnn_input_size"]
        orig_w = bg.width

        # 预处理: 灰度 + resize + normalize
        img = bg.convert("L").resize((w, h))
        arr = np.array(img, dtype=np.float32) / 255.0
        arr = (arr - 0.5) / 0.5
        arr = arr[np.newaxis, np.newaxis, :, :]  # (1, 1, H, W)

        outputs = self._onnx_session.run(None, {"input": arr})
        percent = float(outputs[0][0][0])

        gap_x = int(percent * orig_w)
        return {
            "gap_x": gap_x,
            "gap_x_percent": percent,
            "confidence": 0.5,  # CNN 无置信度
            "method": "cnn",
        }