Pipline:

准备数据
调整输入
搭建网络
设定损失
设定优化器
开始迭代训练
执行前向传播 model.forward(inputs)
计算损失
损失反向传播 loss.backward()
执行优化
保存模型
验证集上测试效果, 获得指标 F_measure, mae
存储最好的模型
直到迭代结束

Tips

在GPU和CPU进行张量的转换的时候，如果张量过大，会有 pin memory 不足的现象：

建议在频繁在GPU和CPU传递数据的部分，使用下面的方法：

ori_img = ori_img.cpu().clone().numpy().squeeze()

使用 .cpu().clone() 代替 .cpu() ，克隆之后原来的 Pinned Tensor 就会释放。 Pinned Memory 就会空出来。

Pyramid Pooling Module（PPM）

Paper(PSPNet): https://arxiv.org/pdf/1612.01105.pdf

不改变特征图尺寸的情况下，扩大感受野

采用四种尺寸的池化层，将特征图尺寸池化到[1×1, 2×2, 3×3, 4×4]
通过1×1卷积将特征图尺寸减少到channel/4
将上面的特征利用bilinear 插值的方法，恢复特征图尺寸到原特征图尺寸
将池化后的特征和原来的特征图 concat 到一起，此时channel为原来的两倍
通过1×1卷积将通道恢复到源特征图的通道数

class PyramidPooling(nn.Module):
    """Pyramid pooling module"""
    def __init__(self, in_channels, out_channels, **kwargs):
        super(PyramidPooling, self).__init__()
        inter_channels = int(in_channels / 4)   #这里N=4与原文一致
        self.conv1 = _ConvBNReLU(in_channels, inter_channels, 1, **kwargs)  # 四个1x1卷积用来减小channel为原来的1/N
        self.conv2 = _ConvBNReLU(in_channels, inter_channels, 1, **kwargs)
        self.conv3 = _ConvBNReLU(in_channels, inter_channels, 1, **kwargs)
        self.conv4 = _ConvBNReLU(in_channels, inter_channels, 1, **kwargs)
        self.out = _ConvBNReLU(in_channels * 2, out_channels, 1)  #最后的1x1卷积缩小为原来的channel
    def pool(self, x, size):
        avgpool = nn.AdaptiveAvgPool2d(size)   # 自适应的平均池化，目标size分别为1x1,2x2,3x3,6x6
        return avgpool(x)
    def upsample(self, x, size):    #上采样使用双线性插值
        return F.interpolate(x, size, mode='bilinear', align_corners=True)
    def forward(self, x):
        size = x.size()[2:]
        feat1 = self.upsample(self.conv1(self.pool(x, 1)), size)
        feat2 = self.upsample(self.conv2(self.pool(x, 2)), size)
        feat3 = self.upsample(self.conv3(self.pool(x, 3)), size)
        feat4 = self.upsample(self.conv4(self.pool(x, 6)), size)
        x = torch.cat([x, feat1, feat2, feat3, feat4], dim=1)   #concat 四个池化的结果
        x = self.out(x)
        return x

Feature Pyramid Networks(FPN)

特征金字塔:多尺度的信息融合，结合不同层次的语义信息，对特征进行融合

共有四个部分：

自下而上：c2-c5表示ResNet的卷积组，卷积组包含了多个Bottleneck结构
自上而下：首先对C5 进行1*1的卷积降低通道数得到P5，依次上采样
横向连接：将上采样后的高层语义信息与浅层的位置细节进行融合。将上一层上采样后的，通道数相同的特征直接进行相加
卷积融合：用3*3的卷积对相加后的特征进行融合，目的在于消除上采样过程的重叠效应，生成最终的特征图 ```

import torch.nn as nn import torch.nn.functional as F import math

ResNet的基本Bottleneck类

class Bottleneck(nn.Module): expansion=4#通道倍增数 def init(self,inplanes,planes,stride=1,downsample=None): super(Bottleneck,self)._init() self.bottleneck=nn.Sequential( nn.Conv2d(in_planes,planes,1,bias=False), nn.BatchNorm2d(planes), nn.ReLU(inplace=True), nn.Conv2d(planes,planes,3,stride,1,bias=False), nn.BatchNorm2d(planes), nn.ReLU(inplace=True), nn.Conv2d(planes,self.expansionplanes,1,bias=False), nn.BatchNorm2d(self.expansionplanes), ) self.relu=nn.ReLU(inplace=True) self.downsample=downsample def forward(self,x): identity=x out=self.bottleneck(x) if self.expansion is not None: identity=self.downsample(x) out+=identity out=self.relu(out) return out

FNP的类，初始化需要一个list，代表RESNET的每一个阶段的Bottleneck的数量

class FPN(nn.Module): def init(self,layers): super(FPN,self).init() self.inplanes=64

    #处理输入的C1模块（C1代表了RestNet的前几个卷积与池化层）
    self.conv1=nn.Conv2d(3,64,7,2,3,bias=False)
    self.bn1=nn.BatchNorm2d(64)
    self.relu=nn.ReLU(inplace=True)
    self.maxpool=nn.MaxPool2d(3,2,1)
    #搭建自下而上的C2，C3，C4，C5
    self.layer1=self._make_layer(64,layers[0])
    self.layer2=self._make_layer(128,layers[1],2)
    self.layer3=self._make_layer(256,layers[2],2)
    self.layer4=self._make_layer(512,layers[3],2)
    #对C5减少通道数，得到P5
    self.toplayer=nn.Conv2d(2048,256,1,1,0)
    #3x3卷积融合特征
    self.smooth1=nn.Conv2d(256,256,3,1,1)
    self.smooth2=nn.Conv2d(256,256,3,1,1)
    self.smooth3=nn.Conv2d(256,256,3,1,1)
    #横向连接，保证通道数相同
    self.latlayer1=nn.Conv2d(1024,256,1,1,0)
    self.latlayer2=nn.Conv2d(512,256,1,1,0)
    self.latlayer3=nn.Conv2d(256,256,1,1,0)
def _make_layer(self,planes,blocks,stride=1):
    downsample=None
    if stride!=1 or self.inplanes != Bottleneck.expansion*planes:
        downsample=nn.Sequential(
            nn.Conv2d(self.inplanes,Bottleneck.expansion*planes,1,stride,bias=False),
            nn.BatchNorm2d(Bottleneck.expansion*planes)
        )
    layers=[]
    layers.append(Bottleneck(self.inplanes,planes,stride,downsample))
    self.inplanes=planes*Bottleneck.expansion
    for i in range(1,blocks):
        layers.append(Bottleneck(self.inplanes,planes))
    return nn.Sequential(*layers)
#自上而下的采样模块
def _upsample_add(self,x,y):
    _,_,H,W=y.shape
    return F.upsample(x,size=(H,W),mode='bilinear')+y
def forward(self,x):
    #自下而上
    c1=self.maxpool(self.relu(self.bn1(self.conv1(x))))
    c2=self.layer1(c1)
    c3=self.layer2(c2)
    c4=self.layer3(c3)
    c5=self.layer4(c4)
    #自上而下
    p5=self.toplayer(c5)
    p4=self._upsample_add(p5,self.latlayer1(c4))
    p3=self._upsample_add(p4,self.latlayer2(c3))
    p2=self._upsample_add(p3,self.latlayer3(c2))
    #卷积的融合，平滑处理
    p4=self.smooth1(p4)
    p3=self.smooth2(p3)
    p2=self.smooth3(p2)
    return p2,p3,p4,p5

net=FPN([3,4,6,3]) # list中的数字表示重复多少个Bottleneck net

```

Pytorch代码与技巧

Pytorch常用代码片段