""" 回归 CNN 模型 3d_rotate 和 3d_slider 共用的回归模型。 输出 sigmoid 归一化到 [0,1],推理时按 label_range 缩放回原始范围。 架构: Conv(1→32) + BN + ReLU + Pool Conv(32→64) + BN + ReLU + Pool Conv(64→128) + BN + ReLU + Pool Conv(128→128) + BN + ReLU + Pool AdaptiveAvgPool2d(1) → FC(128→64) → ReLU → Dropout(0.2) → FC(64→1) → Sigmoid 约 250K 参数,~1MB。 """ import torch import torch.nn as nn class RegressionCNN(nn.Module): """ 轻量回归 CNN,用于 3d_rotate (角度) 和 3d_slider (偏移) 预测。 输出 [0, 1] 范围的 sigmoid 值,需要按 label_range 缩放到实际范围。 """ def __init__(self, img_h: int = 80, img_w: int = 80): """ Args: img_h: 输入图片高度 img_w: 输入图片宽度 """ super().__init__() self.img_h = img_h self.img_w = img_w self.features = nn.Sequential( # block 1: 1 → 32, H/2, W/2 nn.Conv2d(1, 32, kernel_size=3, padding=1, bias=False), nn.BatchNorm2d(32), nn.ReLU(inplace=True), nn.MaxPool2d(2, 2), # block 2: 32 → 64, H/4, W/4 nn.Conv2d(32, 64, kernel_size=3, padding=1, bias=False), nn.BatchNorm2d(64), nn.ReLU(inplace=True), nn.MaxPool2d(2, 2), # block 3: 64 → 128, H/8, W/8 nn.Conv2d(64, 128, kernel_size=3, padding=1, bias=False), nn.BatchNorm2d(128), nn.ReLU(inplace=True), nn.MaxPool2d(2, 2), # block 4: 128 → 128, H/16, W/16 nn.Conv2d(128, 128, kernel_size=3, padding=1, bias=False), nn.BatchNorm2d(128), nn.ReLU(inplace=True), nn.MaxPool2d(2, 2), ) self.pool = nn.AdaptiveAvgPool2d(1) self.regressor = nn.Sequential( nn.Linear(128, 64), nn.ReLU(inplace=True), nn.Dropout(0.2), nn.Linear(64, 1), nn.Sigmoid(), ) def forward(self, x: torch.Tensor) -> torch.Tensor: """ Args: x: (batch, 1, H, W) 灰度图 Returns: output: (batch, 1) sigmoid 输出 [0, 1] """ feat = self.features(x) feat = self.pool(feat) # (B, 128, 1, 1) feat = feat.flatten(1) # (B, 128) out = self.regressor(feat) # (B, 1) return out