Expand 3D captcha into three subtypes: 3d_text, 3d_rotate, 3d_slider

Split the single "3d" captcha type into three independent expert models: - 3d_text: 3D perspective text OCR (renamed from old "3d", CTC-based ThreeDCNN) - 3d_rotate: rotation angle regression (new RegressionCNN, circular loss) - 3d_slider: slider offset regression (new RegressionCNN, SmoothL1 loss) CAPTCHA_TYPES expanded from 3 to 5 classes. Classifier samples updated to 50000 (10000 per class). New generators, model, dataset, training utilities, and full pipeline/export/CLI support for all subtypes. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-11 13:55:53 +08:00
parent 760b80ee5e
commit f5be7671bc
20 changed files with 1109 additions and 142 deletions
--- a/training/train_3d_rotate.py
+++ b/training/train_3d_rotate.py
@@ -0,0 +1,38 @@
+"""
+训练 3D 旋转验证码回归模型 (RegressionCNN)
+
+用法: python -m training.train_3d_rotate
+"""
+
+from config import (
+    IMAGE_SIZE,
+    SYNTHETIC_3D_ROTATE_DIR,
+    REAL_3D_ROTATE_DIR,
+)
+from generators.threed_rotate_gen import ThreeDRotateGenerator
+from models.regression_cnn import RegressionCNN
+from training.train_regression_utils import train_regression_model
+
+
+def main():
+    img_h, img_w = IMAGE_SIZE["3d_rotate"]
+    model = RegressionCNN(img_h=img_h, img_w=img_w)
+
+    print("=" * 60)
+    print("训练 3D 旋转验证码回归模型 (RegressionCNN)")
+    print(f"  输入尺寸: {img_h}×{img_w}")
+    print(f"  任务: 预测旋转角度 0-359°")
+    print("=" * 60)
+
+    train_regression_model(
+        model_name="threed_rotate",
+        model=model,
+        synthetic_dir=SYNTHETIC_3D_ROTATE_DIR,
+        real_dir=REAL_3D_ROTATE_DIR,
+        generator_cls=ThreeDRotateGenerator,
+        config_key="3d_rotate",
+    )
+
+
+if __name__ == "__main__":
+    main()