点击查看代码
#下面的代码中,可以看出并没有完全按论文中来,可以看出省略了相同的mlp,减少了层数
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.utils.data
from torch.autograd import Variable
import numpy as np
import torch.nn.functional as F
class STN3d(nn.Module): #实现三维空间转换网络
def __init__(self, channel):
super(STN3d, self).__init__()
self.conv1 = torch.nn.Conv1d(channel, 64, 1)
self.conv2 = torch.nn.Conv1d(64, 128, 1)
self.conv3 = torch.nn.Conv1d(128, 1024, 1)
self.fc1 = nn.Linear(1024, 512)
self.fc2 = nn.Linear(512, 256)
self.fc3 = nn.Linear(256, 9)
self.relu = nn.ReLU()
self.bn1 = nn.BatchNorm1d(64)
self.bn2 = nn.BatchNorm1d(128)
self.bn3 = nn.BatchNorm1d(1024)
self.bn4 = nn.BatchNorm1d(512)
self.bn5 = nn.BatchNorm1d(256)
def forward(self, x):
batchsize = x.size()[0] #第一个维度是batch的数量
x = F.relu(self.bn1(self.conv1(x)))
x = F.relu(self.bn2(self.conv2(x)))
x = F.relu(self.bn3(self.conv3(x))) #torch.max-返回两个tensor,第一个tensor是每行的最大值;第二个tensor是每行最大值的索引。
x = torch.max(x, 2, keepdim=True)[0] #此时的x是三维数据[B,D,N],最里面的维度数据包含了N个点分别在此维度下的值,该句代码即实现maxpool
x = x.view(-1, 1024) #转换成列为1024但行不定的数据
x = F.relu(self.bn4(self.fc1(x)))
x = F.relu(self.bn5(self.fc2(x)))
x = self.fc3(x)
iden = Variable(torch.from_numpy(np.array([1, 0, 0, 0, 1, 0, 0, 0, 1]).astype(np.float32))).view(1, 9).repeat(
batchsize, 1) #repeat()-沿着指定的维度重复tensor,此时就有batchsize个一维数组,该句代码对应论文中的初始化为对角单位阵
if x.is_cuda:
iden = iden.cuda()
x = x + iden
x = x.view(-1, 3, 3) #转换成batchsize个3×3的矩阵
return x
class STNkd(nn.Module):
def __init__(self, k=64):
super(STNkd, self).__init__()
self.conv1 = torch.nn.Conv1d(k, 64, 1)
self.conv2 = torch.nn.Conv1d(64, 128, 1)
self.conv3 = torch.nn.Conv1d(128, 1024, 1)
self.fc1 = nn.Linear(1024, 512)
self.fc2 = nn.Linear(512, 256)
self.fc3 = nn.Linear(256, k * k)
self.relu = nn.ReLU()
self.bn1 = nn.BatchNorm1d(64)
self.bn2 = nn.BatchNorm1d(128)
self.bn3 = nn.BatchNorm1d(1024)
self.bn4 = nn.BatchNorm1d(512)
self.bn5 = nn.BatchNorm1d(256)
self.k = k
def forward(self, x):
batchsize = x.size()[0]
x = F.relu(self.bn1(self.conv1(x)))
x = F.relu(self.bn2(self.conv2(x)))
x = F.relu(self.bn3(self.conv3(x)))
x = torch.max(x, 2, keepdim=True)[0]
x = x.view(-1, 1024)
x = F.relu(self.bn4(self.fc1(x)))
x = F.relu(self.bn5(self.fc2(x)))
x = self.fc3(x)
iden = Variable(torch.from_numpy(np.eye(self.k).flatten().astype(np.float32))).view(1, self.k * self.k).repeat(
batchsize, 1)
if x.is_cuda:
iden = iden.cuda()
x = x + iden
x = x.view(-1, self.k, self.k)
return x
class PointNetEncoder(nn.Module):
def __init__(self, global_feat=True, feature_transform=False, channel=3):
super(PointNetEncoder, self).__init__()
self.stn = STN3d(channel)
self.conv1 = torch.nn.Conv1d(channel, 64, 1)
self.conv2 = torch.nn.Conv1d(64, 128, 1)
self.conv3 = torch.nn.Conv1d(128, 1024, 1)
self.bn1 = nn.BatchNorm1d(64)
self.bn2 = nn.BatchNorm1d(128)
self.bn3 = nn.BatchNorm1d(1024)
self.global_feat = global_feat
self.feature_transform = feature_transform
if self.feature_transform:
self.fstn = STNkd(k=64)
def forward(self, x):
B, D, N = x.size() #B,D,N=24,3,1024
trans = self.stn(x)
x = x.transpose(2, 1) #transpose:只能选择tensor中两个维度进行转置,相应有permute-可以让tensor按照指定维度顺序(维度的个数就是该tensor的维度数)进行转置
if D > 3: #if D>3..-为保证每个点的channel>3时,空间转换仍能实现
feature = x[:, :, 3:]
x = x[:, :, :3]
x = torch.bmm(x, trans) #torch.bmm-计算两个tensor的矩阵乘法,此时tensor的维度必须是3
if D > 3:
x = torch.cat([x, feature], dim=2)
x = x.transpose(2, 1)
x = F.relu(self.bn1(self.conv1(x)))
if self.feature_transform:
trans_feat = self.fstn(x)
x = x.transpose(2, 1)
x = torch.bmm(x, trans_feat)
x = x.transpose(2, 1)
else:
trans_feat = None
pointfeat = x
x = F.relu(self.bn2(self.conv2(x)))
x = self.bn3(self.conv3(x))
x = torch.max(x, 2, keepdim=True)[0]
x = x.view(-1, 1024)
if self.global_feat:
return x, trans, trans_feat
else:
x = x.view(-1, 1024, 1).repeat(1, 1, N)
return torch.cat([x, pointfeat], 1), trans, trans_feat
def feature_transform_reguliarzer(trans): #此函数是将trans_feat尽力约束成正交矩阵
d = trans.size()[1]
I = torch.eye(d)[None, :, :]
if trans.is_cuda:
I = I.cuda()
loss = torch.mean(torch.norm(torch.bmm(trans, trans.transpose(2, 1)) - I, dim=(1, 2))) #torch.norm-返回所给tensor的矩阵范数或向量范数,此时的mean统计的是Batchsize个loss的平均值
return loss