EfficientNetV2: Smaller Models and Faster Training
https://arxiv.org/abs/2104.00298
---------------------------------------------------------
2021-05-07
更快的训练速度 更好的参数效率
图像分辨率大,训练速度慢(batch-size小)
训练中逐步增加图像分辨率,加快速度 自适应调整正则化
深度卷积在浅层训练速度慢
def make_divisible(channel,divisor,value=None): if value is None: value=divisor hidden=max(value,int(channel+divisor/2)//divisor*divisor) if hidden<0.9*channel: hidden+=divisor return hidden class SELayer(nn.Module): def __init__(self,channel,value,reduction=4): super(SELayer,self).__init__() self.avgpool=nn.AdaptiveAvgPool2d(1) self.fc=nn.Sequential( nn.Linear(channel,make_divisible(value//reduction,8)), nn.SiLU(), nn.Linear(make_divisible(value//reduction,8),channel), nn.Sigmoid() ) def forward(self,x): n,c,h,w=x.size() y=self.avgpool(x) y=self.fc(y).view(n,c) return x*y class MBConv(nn.Module): def __init__(self,inp,oup,stride,ratio,fuse): super(MBConv,self).__init__() hidden=round(inp*ratio) self.identity= stride==1 and inp==oup if fuse: self.conv=nn.Sequential( nn.Conv2d(inp,hidden,3,stride,1,bias=False), nn.BatchNorm2d(hidden), nn.SiLU(), SELayer(hidden,inp), nn.Conv2d(hidden,oup,1,bias=False), nn.BatchNorm2d(oup) ) else: self.conv=nn.Sequential( nn.Conv2d(inp,hidden,1,bias=False), nn.BatchNorm2d(hidden), nn.SiLU(), nn.Conv2d(hidden,hidden,3,stride,1,groups=hidden,bias=False), nn.BatchNorm2d(hidden), nn.SiLU(), SELayer(hidden,inp), nn.Conv2d(hidden,oup,1,bias=False), nn.BatchNorm2d(oup) ) def forward(self,x): if self.identity: return x+self.conv(x) else: return self.conv(x) class EfficientNetV2(nn.Module): def __init__(self,classes=1000,width=1): super(EfficientNetV2,self).__init__() self.cfgs=[ # r, c, n, s, fuse [1, 24, 2, 1, 0], [4, 48, 4, 2, 0], [4, 64, 4, 2, 0], [4, 128, 6, 2, 1], [6, 160, 9, 1, 1], [6, 272, 15, 2, 1], ] inp=make_divisible(24*width,8) layers=[ nn.Sequential( nn.Conv2d(3,inp,3,2,1,bias=False), nn.BatchNorm2d(inp), nn.SiLU() ) ] for r, c, n, s, fuse in self.cfgs: oup=make_divisible(c*width,8) for i in range(n): layers.append(MBConv(inp,oup,s if i==0 else 1,r,fuse)) inp=oup self.feat=nn.Sequential(*layers) oup=make_divisible(1792*width,8) if width>1.0 else 1792 self.conv=nn.Sequential( nn.Conv2d(inp,oup,1,bias=False), nn.BatchNorm2d(oup), nn.SiLU() ) self.avgpool=nn.AdaptiveAvgPool2d(1) self.cls=nn.Linear(oup,classes) self.init_weight() def init_weight(self): for m in self.modules(): if isinstance(m,nn.Conv2d): n=m.kernel_size[0]*m.kernel_size[1]*m.out_channels m.weight.data.normal_(0,math.sqrt(2./n)) if m.bias is not None: m.bias.data.zero_() elif isinstance(m,nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() elif isinstance(m,nn.Linear): m.weight.data.normal_(0,0.001) m.bias.data.zero_() def forward(self,x): feat=self.feat(x) conv=self.conv(feat) avgpool=self.avgpool(conv) cls=self.cls(avgpool.view(avgpool.size(0),-1)) return cls