Expand 3D captcha into three subtypes: 3d_text, 3d_rotate, 3d_slider
Split the single "3d" captcha type into three independent expert models: - 3d_text: 3D perspective text OCR (renamed from old "3d", CTC-based ThreeDCNN) - 3d_rotate: rotation angle regression (new RegressionCNN, circular loss) - 3d_slider: slider offset regression (new RegressionCNN, SmoothL1 loss) CAPTCHA_TYPES expanded from 3 to 5 classes. Classifier samples updated to 50000 (10000 per class). New generators, model, dataset, training utilities, and full pipeline/export/CLI support for all subtypes. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
86
models/regression_cnn.py
Normal file
86
models/regression_cnn.py
Normal file
@@ -0,0 +1,86 @@
|
||||
"""
|
||||
回归 CNN 模型
|
||||
|
||||
3d_rotate 和 3d_slider 共用的回归模型。
|
||||
输出 sigmoid 归一化到 [0,1],推理时按 label_range 缩放回原始范围。
|
||||
|
||||
架构:
|
||||
Conv(1→32) + BN + ReLU + Pool
|
||||
Conv(32→64) + BN + ReLU + Pool
|
||||
Conv(64→128) + BN + ReLU + Pool
|
||||
Conv(128→128) + BN + ReLU + Pool
|
||||
AdaptiveAvgPool2d(1) → FC(128→64) → ReLU → Dropout(0.2) → FC(64→1) → Sigmoid
|
||||
|
||||
约 250K 参数,~1MB。
|
||||
"""
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
|
||||
class RegressionCNN(nn.Module):
|
||||
"""
|
||||
轻量回归 CNN,用于 3d_rotate (角度) 和 3d_slider (偏移) 预测。
|
||||
|
||||
输出 [0, 1] 范围的 sigmoid 值,需要按 label_range 缩放到实际范围。
|
||||
"""
|
||||
|
||||
def __init__(self, img_h: int = 80, img_w: int = 80):
|
||||
"""
|
||||
Args:
|
||||
img_h: 输入图片高度
|
||||
img_w: 输入图片宽度
|
||||
"""
|
||||
super().__init__()
|
||||
self.img_h = img_h
|
||||
self.img_w = img_w
|
||||
|
||||
self.features = nn.Sequential(
|
||||
# block 1: 1 → 32, H/2, W/2
|
||||
nn.Conv2d(1, 32, kernel_size=3, padding=1, bias=False),
|
||||
nn.BatchNorm2d(32),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.MaxPool2d(2, 2),
|
||||
|
||||
# block 2: 32 → 64, H/4, W/4
|
||||
nn.Conv2d(32, 64, kernel_size=3, padding=1, bias=False),
|
||||
nn.BatchNorm2d(64),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.MaxPool2d(2, 2),
|
||||
|
||||
# block 3: 64 → 128, H/8, W/8
|
||||
nn.Conv2d(64, 128, kernel_size=3, padding=1, bias=False),
|
||||
nn.BatchNorm2d(128),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.MaxPool2d(2, 2),
|
||||
|
||||
# block 4: 128 → 128, H/16, W/16
|
||||
nn.Conv2d(128, 128, kernel_size=3, padding=1, bias=False),
|
||||
nn.BatchNorm2d(128),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.MaxPool2d(2, 2),
|
||||
)
|
||||
|
||||
self.pool = nn.AdaptiveAvgPool2d(1)
|
||||
|
||||
self.regressor = nn.Sequential(
|
||||
nn.Linear(128, 64),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.Dropout(0.2),
|
||||
nn.Linear(64, 1),
|
||||
nn.Sigmoid(),
|
||||
)
|
||||
|
||||
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
||||
"""
|
||||
Args:
|
||||
x: (batch, 1, H, W) 灰度图
|
||||
|
||||
Returns:
|
||||
output: (batch, 1) sigmoid 输出 [0, 1]
|
||||
"""
|
||||
feat = self.features(x)
|
||||
feat = self.pool(feat) # (B, 128, 1, 1)
|
||||
feat = feat.flatten(1) # (B, 128)
|
||||
out = self.regressor(feat) # (B, 1)
|
||||
return out
|
||||
Reference in New Issue
Block a user