EfficientNetV2: Smaller Models and Faster Training


https://arxiv.org/abs/2104.00298

---------------------------------------------------------

2021-05-07

更快的训练速度  更好的参数效率

图像分辨率大,训练速度慢(batch-size小)

  训练中逐步增加图像分辨率,加快速度    自适应调整正则化

深度卷积在浅层训练速度慢

           

def make_divisible(channel,divisor,value=None):
    if value is None:
        value=divisor
    hidden=max(value,int(channel+divisor/2)//divisor*divisor)
    if hidden<0.9*channel:
        hidden+=divisor
    return hidden

class SELayer(nn.Module):
    def __init__(self,channel,value,reduction=4):
        super(SELayer,self).__init__()
        self.avgpool=nn.AdaptiveAvgPool2d(1)
        self.fc=nn.Sequential(
            nn.Linear(channel,make_divisible(value//reduction,8)),
            nn.SiLU(),
            nn.Linear(make_divisible(value//reduction,8),channel),
            nn.Sigmoid()
        )

    def forward(self,x):
        n,c,h,w=x.size()
        y=self.avgpool(x)
        y=self.fc(y).view(n,c)
        return x*y


class MBConv(nn.Module):
    def __init__(self,inp,oup,stride,ratio,fuse):
        super(MBConv,self).__init__()
        hidden=round(inp*ratio)
        self.identity= stride==1 and inp==oup
        if fuse:
            self.conv=nn.Sequential(
                nn.Conv2d(inp,hidden,3,stride,1,bias=False),
                nn.BatchNorm2d(hidden),
                nn.SiLU(),
                SELayer(hidden,inp),
                nn.Conv2d(hidden,oup,1,bias=False),
                nn.BatchNorm2d(oup)
            )
        else:
            self.conv=nn.Sequential(
                nn.Conv2d(inp,hidden,1,bias=False),
                nn.BatchNorm2d(hidden),
                nn.SiLU(),
                nn.Conv2d(hidden,hidden,3,stride,1,groups=hidden,bias=False),
                nn.BatchNorm2d(hidden),
                nn.SiLU(),
                SELayer(hidden,inp),
                nn.Conv2d(hidden,oup,1,bias=False),
                nn.BatchNorm2d(oup)
            )

    def forward(self,x):
        if self.identity:
            return x+self.conv(x)
        else:
            return self.conv(x)


class EfficientNetV2(nn.Module):
    def __init__(self,classes=1000,width=1):
        super(EfficientNetV2,self).__init__()
        self.cfgs=[
            # r, c, n, s, fuse
            [1, 24, 2, 1, 0],
            [4, 48, 4, 2, 0],
            [4, 64, 4, 2, 0],
            [4, 128, 6, 2, 1],
            [6, 160, 9, 1, 1],
            [6, 272, 15, 2, 1],
        ]

        inp=make_divisible(24*width,8)
        layers=[
            nn.Sequential(
            nn.Conv2d(3,inp,3,2,1,bias=False),
            nn.BatchNorm2d(inp),
            nn.SiLU()
        )
        ]
        
        for r, c, n, s, fuse in self.cfgs:
            oup=make_divisible(c*width,8)
            for i in range(n):
                layers.append(MBConv(inp,oup,s if i==0 else 1,r,fuse))
                inp=oup
                
        self.feat=nn.Sequential(*layers)
        oup=make_divisible(1792*width,8) if width>1.0 else 1792
        self.conv=nn.Sequential(
            nn.Conv2d(inp,oup,1,bias=False),
            nn.BatchNorm2d(oup),
            nn.SiLU()
        )
        self.avgpool=nn.AdaptiveAvgPool2d(1)
        self.cls=nn.Linear(oup,classes)
        
        self.init_weight()
        
    def init_weight(self):
        for m in self.modules():
            if isinstance(m,nn.Conv2d):
                n=m.kernel_size[0]*m.kernel_size[1]*m.out_channels
                m.weight.data.normal_(0,math.sqrt(2./n))
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m,nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m,nn.Linear):
                m.weight.data.normal_(0,0.001)
                m.bias.data.zero_()
        
    def forward(self,x):
        feat=self.feat(x)
        conv=self.conv(feat)
        avgpool=self.avgpool(conv)
        cls=self.cls(avgpool.view(avgpool.size(0),-1))
        return cls

相关