当前位置：首页 > news >正文

深度学习常见模块实现001

news 2025/7/6 8:10:38

文章目录

- 1.学习目的
- 2.常见模块使用与实现
- - 2.1 ResNet18实现
  - 2.2 SeNet模块
  - 2.3 CBAM模块

1.学习目的

深度学习在图像处理这块，很多模块已经成型，并没有很多新的东西，更多的是不同的模块堆叠，所以需要我们不断总结，动手实现他们，而不是知道。

2.常见模块使用与实现

2.1 ResNet18实现

import torch
from torch import nn# 定义一个函数实现CONV操作
def conv33(input_channels, output_channels, stride=1):return nn.Conv2d(input_channels, output_channels, kernel_size=3, stride=stride, padding=1, bias=False)# 定义经典的CBS卷积结构==为了适应 CBRCB+downsample
class BasicModule(nn.Module):def __init__(self, c1, c2, k=3, s=1, act=True, downSample=None, expansion=1):default_activation = nn.ReLU()self.c1 = c1self.c2 = c2self.act = actself.k = kself.s = sself.expansion = expansionsuper(BasicModule, self).__init__()# 定义一个3*3卷积self.conv1 = conv33(c1, c2, s)self.bn1 = nn.BatchNorm2d(c2)self.act1 = default_activation if self.act else nn.Identity()self.conv2 = conv33(c2, c2)self.bn2 = nn.BatchNorm2d(c2)self.act2 = default_activation if self.act else nn.Identity()# 初始化下采样变量self.downSample = downSampledef forward(self, x):identity = xx = self.conv1(x)x = self.bn1(x)x = self.act1(x)x = self.conv2(x)x = self.bn2(x)if self.downSample is not None:identity = self.downSample(identity)x = x + identityx = self.act2(x)return xclass ResNet18(nn.Module):def __init__(self, c1=3, c2=64, output_layers=10):super(ResNet18, self).__init__()self.outputLayers = output_layers# 下采样一次 卷积模块参数初始化self.conv1 = nn.Conv2d(c1, c2, 7, 2, 3)self.bn1 = nn.BatchNorm2d(c2)# 定义relu激活函数self.relu1 = nn.ReLU()self.maxPool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)self.c1 = c1self.c2 = c2# 创建四个不同尺寸的卷积模块，一共4个，进行模块的堆叠以及最后的前向传播self.layer1 = self._makeLayer(c2, c2, 2)self.layer2 = self._makeLayer(c2, 2 * c2, 2, 2)self.layer3 = self._makeLayer(2 * c2, 4 * c2, 2, 2)self.layer4 = self._makeLayer(4 * c2, 8 * c2, 2, 2)self.avgPool = nn.AdaptiveAvgPool2d((1, 1))self.fc = nn.Linear(8 * c2, self.outputLayers)def _makeLayer(self, c1, c2, blocks, stride=1):downSample = Nonebm = BasicModule(c1, c2)if stride != 1 or c1 != c2 * bm.expansion:downSample = nn.Sequential(conv33(c1, c2 * bm.expansion, stride),nn.BatchNorm2d(c2 * bm.expansion))layers = []layers.append(BasicModule(c1, c2, 3, stride, downSample=downSample))for _ in range(1, blocks):# 实际上这里只循环一次layers.append(BasicModule(c2, c2))return nn.Sequential(*layers)def forward(self, x):# 1.第一次卷积 获得第一次输出x = self.conv1(x)x = self.bn1(x)x = self.relu1(x)x = self.maxPool(x)x = self.layer1(x)x = self.layer2(x)x = self.layer3(x)x = self.layer4(x)x = self.avgPool(x)# 在第一个维度上展平x = torch.flatten(x, 1)x = self.fc(x)return xif __name__ == '__main__':input_tensor = torch.randn((1, 3, 640, 640))n, c, h, w = input_tensor.shapemodel = ResNet18(c, 2 * c, 10)output = model(input_tensor)print(output.shape)

2.2 SeNet模块

import torch
import torch.nn as nnclass SELayer(nn.Module):def __init__(self, channel, reduction=16):super(SELayer, self).__init__()self.avg_pool = nn.AdaptiveAvgPool2d(1)self.fc = nn.Sequential(nn.Linear(channel, channel // reduction, bias=False),nn.ReLU(inplace=True),nn.Linear(channel // reduction, channel, bias=False),nn.Sigmoid())def forward(self, x):b, c, _, _ = x.size()y = self.avg_pool(x).view(b, c)y = self.fc(y).view(b, c, 1, 1)return x * y.expand_as(x)

2.3 CBAM模块

import torch
import torch.nn as nnclass ChannelAttention(nn.Module):def __init__(self, in_planes, ratio=16):super(ChannelAttention, self).__init__()self.avg_pool = nn.AdaptiveAvgPool2d(1)self.max_pool = nn.AdaptiveMaxPool2d(1)self.fc1 = nn.Conv2d(in_planes, in_planes // ratio, 1, bias=False)self.relu1 = nn.ReLU()self.fc2 = nn.Conv2d(in_planes // ratio, in_planes, 1, bias=False)self.sigmoid = nn.Sigmoid()def forward(self, x):avg_out = self.fc2(self.relu1(self.fc1(self.avg_pool(x))))max_out = self.fc2(self.relu1(self.fc1(self.max_pool(x))))out = avg_out + max_outreturn self.sigmoid(out)class SpatialAttention(nn.Module):def __init__(self, kernel_size=7):super(SpatialAttention, self).__init__()assert kernel_size in (3, 7), 'kernel size must be 3 or 7'padding = 3 if kernel_size == 7 else 1self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)self.sigmoid = nn.Sigmoid()def forward(self, x):avg_out = torch.mean(x, dim=1, keepdim=True)max_out, _ = torch.max(x, dim=1, keepdim=True)x = torch.cat([avg_out, max_out], dim=1)x = self.conv1(x)return self.sigmoid(x)class CBAM(nn.Module):def __init__(self, in_planes):super(CBAM, self).__init__()self.ca = ChannelAttention(in_planes)self.sa = SpatialAttention()def forward(self, x):x = self.ca(x) * xx = self.sa(x) * xreturn x