Add slide and rotate interactive captcha solvers

New solver subsystem with independent models: - GapDetectorCNN (1x128x256 grayscale → sigmoid) for slide gap detection - RotationRegressor (3x128x128 RGB → sin/cos via tanh) for rotation angle prediction - SlideSolver with 3-tier strategy: template match → edge detect → CNN fallback - RotateSolver with ONNX sin/cos → atan2 inference - Generators, training scripts, CLI commands, and slide track utility Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-11 18:07:06 +08:00
parent 90d6423551
commit 9b5f29083e
20 changed files with 1440 additions and 10 deletions
--- a/models/init.py
+++ b/models/init.py
@@ -1,21 +1,27 @@
 """
 模型定义包

-提供四种模型：
+提供六种模型：
 - CaptchaClassifier: 调度分类器 (轻量 CNN, < 500KB)
 - LiteCRNN: 轻量 CRNN (普通字符 + 算式, < 2MB)
 - ThreeDCNN: 3D 文字验证码专用模型 (ResNet-lite + BiLSTM, < 5MB)
 - RegressionCNN: 回归 CNN (3D 旋转 + 滑块, ~1MB)
+- GapDetectorCNN: 滑块缺口检测 CNN (~1MB)
+- RotationRegressor: 旋转角度回归 sin/cos 编码 (~2MB)
 """

 from models.classifier import CaptchaClassifier
 from models.lite_crnn import LiteCRNN
 from models.threed_cnn import ThreeDCNN
 from models.regression_cnn import RegressionCNN
+from models.gap_detector import GapDetectorCNN
+from models.rotation_regressor import RotationRegressor

 __all__ = [
    "CaptchaClassifier",
    "LiteCRNN",
    "ThreeDCNN",
    "RegressionCNN",
+    "GapDetectorCNN",
+    "RotationRegressor",
 ]
--- a/models/gap_detector.py
+++ b/models/gap_detector.py
@@ -0,0 +1,82 @@
+"""
+滑块缺口检测 CNN (GapDetectorCNN)
+
+用于检测滑块验证码中缺口的 x 坐标位置。
+输出 sigmoid 归一化到 [0,1]，推理时按图片宽度缩放回像素坐标。
+
+架构:
+    Conv(1→32) + BN + ReLU + Pool
+    Conv(32→64) + BN + ReLU + Pool
+    Conv(64→128) + BN + ReLU + Pool
+    Conv(128→128) + BN + ReLU + Pool
+    AdaptiveAvgPool2d(1) → FC(128→64) → ReLU → Dropout(0.2) → FC(64→1) → Sigmoid
+
+约 250K 参数，~1MB。
+"""
+
+import torch
+import torch.nn as nn
+
+
+class GapDetectorCNN(nn.Module):
+    """
+    滑块缺口检测 CNN，输出缺口 x 坐标的归一化百分比 [0,1]。
+
+    与 RegressionCNN 架构相同，但语义上专用于滑块缺口检测，
+    默认输入尺寸 1x128x256 (灰度)。
+    """
+
+    def __init__(self, img_h: int = 128, img_w: int = 256):
+        super().__init__()
+        self.img_h = img_h
+        self.img_w = img_w
+
+        self.features = nn.Sequential(
+            # block 1: 1 → 32, H/2, W/2
+            nn.Conv2d(1, 32, kernel_size=3, padding=1, bias=False),
+            nn.BatchNorm2d(32),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(2, 2),
+
+            # block 2: 32 → 64, H/4, W/4
+            nn.Conv2d(32, 64, kernel_size=3, padding=1, bias=False),
+            nn.BatchNorm2d(64),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(2, 2),
+
+            # block 3: 64 → 128, H/8, W/8
+            nn.Conv2d(64, 128, kernel_size=3, padding=1, bias=False),
+            nn.BatchNorm2d(128),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(2, 2),
+
+            # block 4: 128 → 128, H/16, W/16
+            nn.Conv2d(128, 128, kernel_size=3, padding=1, bias=False),
+            nn.BatchNorm2d(128),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(2, 2),
+        )
+
+        self.pool = nn.AdaptiveAvgPool2d(1)
+
+        self.regressor = nn.Sequential(
+            nn.Linear(128, 64),
+            nn.ReLU(inplace=True),
+            nn.Dropout(0.2),
+            nn.Linear(64, 1),
+            nn.Sigmoid(),
+        )
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        Args:
+            x: (batch, 1, H, W) 灰度图
+
+        Returns:
+            output: (batch, 1) sigmoid 输出 [0, 1]，表示缺口 x 坐标百分比
+        """
+        feat = self.features(x)
+        feat = self.pool(feat)          # (B, 128, 1, 1)
+        feat = feat.flatten(1)          # (B, 128)
+        out = self.regressor(feat)      # (B, 1)
+        return out
--- a/models/rotation_regressor.py
+++ b/models/rotation_regressor.py
@@ -0,0 +1,82 @@
+"""
+旋转角度回归模型 (RotationRegressor)
+
+用于预测旋转验证码的正确旋转角度。
+使用 sin/cos 编码避免 0°/360° 边界问题。
+RGB 输入，输出 (sin θ, cos θ) ∈ [-1,1]。
+
+架构:
+    Conv(3→32) + BN + ReLU + Pool
+    Conv(32→64) + BN + ReLU + Pool
+    Conv(64→128) + BN + ReLU + Pool
+    Conv(128→256) + BN + ReLU + Pool
+    AdaptiveAvgPool2d(1) → FC(256→128) → ReLU → FC(128→2) → Tanh
+
+约 400K 参数，~2MB。
+"""
+
+import torch
+import torch.nn as nn
+
+
+class RotationRegressor(nn.Module):
+    """
+    旋转角度回归模型。
+
+    RGB 输入 3x128x128，输出 (sin θ, cos θ)。
+    推理时用 atan2(sin, cos) 转换为角度。
+    """
+
+    def __init__(self, img_h: int = 128, img_w: int = 128):
+        super().__init__()
+        self.img_h = img_h
+        self.img_w = img_w
+
+        self.features = nn.Sequential(
+            # block 1: 3 → 32, H/2, W/2
+            nn.Conv2d(3, 32, kernel_size=3, padding=1, bias=False),
+            nn.BatchNorm2d(32),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(2, 2),
+
+            # block 2: 32 → 64, H/4, W/4
+            nn.Conv2d(32, 64, kernel_size=3, padding=1, bias=False),
+            nn.BatchNorm2d(64),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(2, 2),
+
+            # block 3: 64 → 128, H/8, W/8
+            nn.Conv2d(64, 128, kernel_size=3, padding=1, bias=False),
+            nn.BatchNorm2d(128),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(2, 2),
+
+            # block 4: 128 → 256, H/16, W/16
+            nn.Conv2d(128, 256, kernel_size=3, padding=1, bias=False),
+            nn.BatchNorm2d(256),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(2, 2),
+        )
+
+        self.pool = nn.AdaptiveAvgPool2d(1)
+
+        self.regressor = nn.Sequential(
+            nn.Linear(256, 128),
+            nn.ReLU(inplace=True),
+            nn.Linear(128, 2),
+            nn.Tanh(),
+        )
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        Args:
+            x: (batch, 3, H, W) RGB 图
+
+        Returns:
+            output: (batch, 2) → (sin θ, cos θ) ∈ [-1, 1]
+        """
+        feat = self.features(x)
+        feat = self.pool(feat)          # (B, 256, 1, 1)
+        feat = feat.flatten(1)          # (B, 256)
+        out = self.regressor(feat)      # (B, 2)
+        return out