Activate or Not: Learning Customized Activation
https://arxiv.org/abs/2009.04759
-----------------------------------------------------------------------------
2021-04-28
swish:x*sigmoid(β*x)
maxout:max(f1(x),f2(x))
acon:(f1(x)-f2(x))*sigmoid(β(f1(x)-f2(x)))+f2(x)
acon:β固定
meta-acon:β自适应的选择是否激活神经元 层,通道,像素
class AconC(nn.Module): def __init__(self,channel): super(AconC, self).__init__() self.p1=nn.Parameter(torch.randn(1,channel,1,1)) self.p2=nn.Parameter(torch.randn(1,channel,1,1)) self.beta=nn.Parameter(torch.ones(1,channel,1,1)) def forward(self,x): return (self.p1*x-self.p2*x)*torch.sigmoid(self.beta*(self.p1*x-self.p2*x))+self.p2*x class MetaAconC(nn.Module): def __init__(self,channel,rate=16): super(MetaAconC, self).__init__() self.p1=nn.Parameter(torch.randn(1,channel,1,1)) self.p2=nn.Parameter(torch.randn(1,channel,1,1)) self.beta=nn.Sequential( nn.Conv2d(channel,max(rate,channel//rate),1,bias=True), nn.BatchNorm2d(max(rate,channel//rate)), nn.Conv2d(max(rate,channel//rate),channel,1,bias=True), nn.BatchNorm2d(channel) ) def forward(self,x): beta=torch.sigmoid(self.beta(x.mean(dim=2,keepdims=True).mean(dim=3,keepdims=True))) return (self.p1*x-self.p2*x)*torch.sigmoid(beta*(self.p1*x-self.p2*x))+self.p2*x