575 lines
17 KiB
Python
575 lines
17 KiB
Python
import torch
|
|
import torch.nn as nn
|
|
|
|
|
|
class ResidualBlock(nn.Module):
|
|
"""Residual block base class. Implementation of a residual block.
|
|
|
|
Reference: https://arxiv.org/pdf/1512.03385.pdf : Equation #2
|
|
"""
|
|
|
|
def __init__(self, input_dim, output_dim, hidden_dim, spectral_norm=False):
|
|
"""Residual block constructor
|
|
|
|
:param input_dim: dimension of the input
|
|
:type input_dim: int
|
|
:param output_dim: dimension of the output
|
|
:type output_dim: int
|
|
:param hidden_dim: hidden dimension for mapping the input (first block)
|
|
:type hidden_dim: int
|
|
:param spectral_norm: apply spectral normalization, defaults to False
|
|
:type spectral_norm: bool, optional
|
|
"""
|
|
super().__init__()
|
|
|
|
self._spectral_norm = spectral_norm
|
|
self._input_dim = input_dim
|
|
self._output_dim = output_dim
|
|
self._hidden_dim = hidden_dim
|
|
|
|
self.l1 = self._spect_norm(nn.Linear(input_dim, hidden_dim))
|
|
self.l2 = self._spect_norm(nn.Linear(hidden_dim, output_dim))
|
|
self.l3 = self._spect_norm(nn.Linear(input_dim, output_dim))
|
|
|
|
def forward(self, x):
|
|
y = self.activation(self.l1(x))
|
|
y = self.l2(y)
|
|
x = self.l3(x)
|
|
return y + x
|
|
|
|
def _spect_norm(self, x):
|
|
return nn.utils.spectral_norm(x) if self._spectral_norm else x
|
|
|
|
@property
|
|
def spectral_norm(self):
|
|
return self._spectral_norm
|
|
|
|
@property
|
|
def input_dim(self):
|
|
return self._input_dim
|
|
|
|
@property
|
|
def output_dim(self):
|
|
return self._output_dim
|
|
|
|
@property
|
|
def hidden_dim(self):
|
|
return self._hidden_dim
|
|
|
|
|
|
class MLP(torch.nn.Module):
|
|
"""Multi-layer Perceptron base class"""
|
|
|
|
def __init__(
|
|
self,
|
|
input_dim,
|
|
output_dim,
|
|
inner_size=20,
|
|
n_layers=2,
|
|
func=nn.Tanh,
|
|
layers=None,
|
|
batch_norm=False,
|
|
spectral_norm=False,
|
|
):
|
|
"""Deep neural network model
|
|
|
|
:param input_dim: input channel for the network
|
|
:type input_dim: int
|
|
:param output_dim: output channel for the network
|
|
:type output_dim: int
|
|
:param inner_size: inner size of each hidden layer, defaults to 20
|
|
:type inner_size: int, optional
|
|
:param n_layers: number of layers in the network, defaults to 2
|
|
:type n_layers: int, optional
|
|
:param func: function(s) to pass to the network, defaults to nn.Tanh
|
|
:type func: (list of) torch.nn function(s), optional
|
|
:param layers: list of layers for the network, defaults to None
|
|
:type layers: list[int], optional
|
|
:param batch_norm: apply batch normalization layer
|
|
:type bool, default False
|
|
:param spectral_norm: apply spectral normalization layer
|
|
:type bool, default False
|
|
"""
|
|
super().__init__()
|
|
|
|
self._input_dim = input_dim
|
|
self._output_dim = output_dim
|
|
self._inner_size = inner_size
|
|
self._n_layers = n_layers
|
|
self._layers = layers
|
|
self._bnorm = batch_norm
|
|
self._spectnorm = spectral_norm
|
|
|
|
if layers is None:
|
|
layers = [inner_size] * n_layers
|
|
|
|
tmp_layers = layers.copy()
|
|
tmp_layers.insert(0, self._input_dim)
|
|
tmp_layers.append(self._output_dim)
|
|
|
|
self._layers = []
|
|
self._batchnorm = []
|
|
for i in range(len(tmp_layers) - 1):
|
|
|
|
self._layers.append(
|
|
self.spect_norm(nn.Linear(tmp_layers[i], tmp_layers[i + 1]))
|
|
)
|
|
|
|
self._batchnorm.append(nn.LazyBatchNorm1d())
|
|
|
|
if isinstance(func, list):
|
|
self._functions = func
|
|
else:
|
|
self._functions = [func for _ in range(len(self._layers) - 1)]
|
|
|
|
unique_list = []
|
|
for layer, func, bnorm in zip(
|
|
self._layers[:-1], self._functions, self._batchnorm
|
|
):
|
|
|
|
unique_list.append(layer)
|
|
if func is not None:
|
|
if batch_norm:
|
|
unique_list.append(bnorm)
|
|
unique_list.append(func())
|
|
|
|
unique_list.append(self._layers[-1])
|
|
|
|
self.model = nn.Sequential(*unique_list)
|
|
|
|
def spect_norm(self, x):
|
|
return nn.utils.spectral_norm(x) if self._spectnorm else x
|
|
|
|
def forward(self, x):
|
|
"""Forward method for NeuralNet class
|
|
|
|
:param x: network input data
|
|
:type x: torch.tensor
|
|
:return: network output
|
|
:rtype: torch.tensor
|
|
"""
|
|
return self.model(x)
|
|
|
|
@property
|
|
def input_dim(self):
|
|
return self._input_dim
|
|
|
|
@property
|
|
def output_dim(self):
|
|
return self._output_dim
|
|
|
|
@property
|
|
def inner_size(self):
|
|
return self._inner_size
|
|
|
|
@property
|
|
def n_layers(self):
|
|
return self._n_layers
|
|
|
|
@property
|
|
def functions(self):
|
|
return self._functions
|
|
|
|
@property
|
|
def layers(self):
|
|
return self._layers
|
|
|
|
|
|
class TNet(nn.Module):
|
|
"""T-Net base class. Implementation of T-Network.
|
|
|
|
Reference: Charles R. Qi et al. https://arxiv.org/pdf/1612.00593.pdf
|
|
"""
|
|
|
|
def __init__(self, input_dim):
|
|
"""T-Net block constructor
|
|
|
|
:param input_dim: input dimension of point cloud
|
|
:type input_dim: int
|
|
"""
|
|
super().__init__()
|
|
|
|
function = nn.Tanh
|
|
|
|
self._mlp1 = MLP(
|
|
input_dim=input_dim,
|
|
output_dim=1024,
|
|
layers=[64, 128],
|
|
func=function,
|
|
batch_norm=True,
|
|
)
|
|
|
|
self._mlp2 = MLP(
|
|
input_dim=1024,
|
|
output_dim=input_dim * input_dim,
|
|
layers=[512, 256],
|
|
func=function,
|
|
batch_norm=True,
|
|
)
|
|
|
|
self._function = function()
|
|
self._bn1 = nn.LazyBatchNorm1d()
|
|
|
|
def forward(self, X):
|
|
"""Forward pass for T-Net
|
|
|
|
:param X: input tensor, shape [batch, N, $input_{dim}$]
|
|
with batch the batch size, N number of points and $input_{dim}$
|
|
the input dimension of the point cloud.
|
|
:type X: torch.tensor
|
|
:return: output affine matrix transformation, shape
|
|
[batch, $input_{dim} \times input_{dim}$] with batch
|
|
the batch size and $input_{dim}$ the input dimension
|
|
of the point cloud.
|
|
:rtype: torch.tensor
|
|
"""
|
|
|
|
batch, input_dim = X.shape[0], X.shape[2]
|
|
|
|
# encoding using first MLP
|
|
X = self._mlp1(X)
|
|
X = self._function(self._bn1(X))
|
|
|
|
# applying symmetric function to aggregate information (using max as default)
|
|
X, _ = torch.max(X, dim=1)
|
|
|
|
# decoding using third MLP
|
|
X = self._mlp2(X)
|
|
|
|
return X.reshape(batch, input_dim, input_dim)
|
|
|
|
|
|
class PointNet(nn.Module):
|
|
"""Point-Net base class. Implementation of Point Network for segmentation.
|
|
|
|
Reference: Charles R. Qi et al. https://arxiv.org/pdf/1612.00593.pdf
|
|
"""
|
|
|
|
def __init__(self, input_dim, output_dim, tnet=False):
|
|
"""Point-Net block constructor
|
|
|
|
:param input_dim: input dimension of point cloud
|
|
:type input_dim: int
|
|
:param output_dim: output dimension of point cloud
|
|
:type output_dim: int
|
|
:param tnet: apply T-Net transformation, defaults to False
|
|
:type tnet: bool, optional
|
|
"""
|
|
super().__init__()
|
|
|
|
function = nn.Tanh
|
|
self._use_tnet = tnet
|
|
|
|
self._mlp1 = MLP(
|
|
input_dim=input_dim,
|
|
output_dim=64,
|
|
inner_size=64,
|
|
n_layers=1,
|
|
func=function,
|
|
batch_norm=True,
|
|
)
|
|
|
|
self._mlp2 = MLP(
|
|
input_dim=64,
|
|
output_dim=1024,
|
|
inner_size=128,
|
|
n_layers=1,
|
|
func=function,
|
|
batch_norm=True,
|
|
)
|
|
|
|
self._mlp3 = MLP(
|
|
input_dim=1088,
|
|
output_dim=128,
|
|
layers=[512, 256],
|
|
func=function,
|
|
batch_norm=True,
|
|
)
|
|
|
|
self._mlp4 = MLP(
|
|
input_dim=128,
|
|
output_dim=output_dim,
|
|
n_layers=0,
|
|
func=function,
|
|
batch_norm=True,
|
|
)
|
|
|
|
if self._use_tnet:
|
|
self._tnet_transform = TNet(input_dim=input_dim)
|
|
self._tnet_feature = TNet(input_dim=64)
|
|
|
|
self._function = function()
|
|
self._bn1 = nn.LazyBatchNorm1d()
|
|
self._bn2 = nn.LazyBatchNorm1d()
|
|
self._bn3 = nn.LazyBatchNorm1d()
|
|
|
|
def concat(self, embedding, input_):
|
|
"""Returns concatenation of global and local features for Point-Net
|
|
|
|
:param embedding: global features of Point-Net, shape [batch, $input_{dim}$]
|
|
with batch the batch size and $input_{dim}$ the input dimension
|
|
of the point cloud.
|
|
:type embedding: torch.tensor
|
|
:param input_: local features of Point-Net, shape [batch, N, $input_{dim}$]
|
|
with batch the batch size, N number of points and $input_{dim}$
|
|
the input dimension of the point cloud.
|
|
:type input_: torch.tensor
|
|
:return: concatenation vector, shape [batch, N, $input_{dim}$]
|
|
with batch the batch size, N number of points and $input_{dim}$
|
|
:rtype: torch.tensor
|
|
"""
|
|
n_points = input_.shape[1]
|
|
embedding = embedding.repeat(n_points, 1, 1).permute(1, 0, 2)
|
|
return torch.cat([embedding, input_], dim=2)
|
|
|
|
def forward(self, X):
|
|
"""Forward pass for Point-Net
|
|
|
|
:param X: input tensor, shape [batch, N, $input_{dim}$]
|
|
with batch the batch size, N number of points and $input_{dim}$
|
|
the input dimension of the point cloud.
|
|
:type X: torch.tensor
|
|
:return: segmentation vector, shape [batch, N, $output_{dim}$]
|
|
with batch the batch size, N number of points and $output_{dim}$
|
|
the output dimension of the point cloud.
|
|
:rtype: torch.tensor
|
|
"""
|
|
|
|
# using transform tnet if needed
|
|
if self._use_tnet:
|
|
transform = self._tnet_transform(X)
|
|
X = torch.matmul(X, transform)
|
|
|
|
# encoding using first MLP
|
|
X = self._mlp1(X)
|
|
X = self._function(self._bn1(X))
|
|
|
|
# using transform tnet if needed
|
|
if self._use_tnet:
|
|
transform = self._tnet_feature(X)
|
|
X = torch.matmul(X, transform)
|
|
|
|
# saving latent representation for later concatanation
|
|
latent = X
|
|
|
|
# encoding using second MLP
|
|
X = self._mlp2(X)
|
|
X = self._function(self._bn2(X))
|
|
|
|
# applying symmetric function to aggregate information (using max as default)
|
|
X, _ = torch.max(X, dim=1)
|
|
|
|
# concatenating with latent vector
|
|
X = self.concat(X, latent)
|
|
|
|
# decoding using third MLP
|
|
X = self._mlp3(X)
|
|
X = self._function(self._bn3(X))
|
|
|
|
# decoding using fourth MLP
|
|
X = self._mlp4(X)
|
|
|
|
return X
|
|
|
|
|
|
class ConvTNet(nn.Module):
|
|
"""T-Net base class. Implementation of T-Network with convolutional layers.
|
|
|
|
Reference: Ali Kashefi et al. https://arxiv.org/abs/2208.13434
|
|
"""
|
|
|
|
def __init__(self, input_dim):
|
|
"""T-Net block constructor
|
|
|
|
:param input_dim: input dimension of point cloud
|
|
:type input_dim: int
|
|
"""
|
|
super().__init__()
|
|
|
|
function = nn.Tanh
|
|
self._function = function()
|
|
|
|
self._block1 = nn.Sequential(
|
|
nn.Conv1d(input_dim, 64, 1),
|
|
nn.BatchNorm1d(64),
|
|
self._function,
|
|
nn.Conv1d(64, 128, 1),
|
|
nn.BatchNorm1d(128),
|
|
self._function,
|
|
nn.Conv1d(128, 1024, 1),
|
|
nn.BatchNorm1d(1024),
|
|
self._function,
|
|
)
|
|
|
|
self._block2 = MLP(
|
|
input_dim=1024,
|
|
output_dim=input_dim * input_dim,
|
|
layers=[512, 256],
|
|
func=function,
|
|
batch_norm=True,
|
|
)
|
|
|
|
def forward(self, X):
|
|
"""Forward pass for T-Net
|
|
|
|
:param X: input tensor, shape [batch, $input_{dim}$, N]
|
|
with batch the batch size, N number of points and $input_{dim}$
|
|
the input dimension of the point cloud.
|
|
:type X: torch.tensor
|
|
:return: output affine matrix transformation, shape
|
|
[batch, $input_{dim} \times input_{dim}$] with batch
|
|
the batch size and $input_{dim}$ the input dimension
|
|
of the point cloud.
|
|
:rtype: torch.tensor
|
|
"""
|
|
|
|
batch, input_dim = X.shape[0], X.shape[1]
|
|
|
|
# encoding using first MLP
|
|
X = self._block1(X)
|
|
|
|
# applying symmetric function to aggregate information (using max as default)
|
|
X, _ = torch.max(X, dim=-1)
|
|
|
|
# decoding using third MLP
|
|
X = self._block2(X)
|
|
|
|
return X.reshape(batch, input_dim, input_dim)
|
|
|
|
|
|
class ConvPointNet(nn.Module):
|
|
"""Point-Net base class. Implementation of Point Network for segmentation.
|
|
|
|
Reference: Ali Kashefi et al. https://arxiv.org/abs/2208.13434
|
|
"""
|
|
|
|
def __init__(self, input_dim, output_dim, tnet=False):
|
|
"""Point-Net block constructor
|
|
|
|
:param input_dim: input dimension of point cloud
|
|
:type input_dim: int
|
|
:param output_dim: output dimension of point cloud
|
|
:type output_dim: int
|
|
:param tnet: apply T-Net transformation, defaults to False
|
|
:type tnet: bool, optional
|
|
"""
|
|
super().__init__()
|
|
|
|
self._function = nn.Tanh()
|
|
self._use_tnet = tnet
|
|
|
|
self._block1 = nn.Sequential(
|
|
nn.Conv1d(input_dim, 64, 1),
|
|
nn.BatchNorm1d(64),
|
|
self._function,
|
|
nn.Conv1d(64, 64, 1),
|
|
nn.BatchNorm1d(64),
|
|
self._function,
|
|
)
|
|
|
|
self._block2 = nn.Sequential(
|
|
nn.Conv1d(64, 64, 1),
|
|
nn.BatchNorm1d(64),
|
|
self._function,
|
|
nn.Conv1d(64, 128, 1),
|
|
nn.BatchNorm1d(128),
|
|
self._function,
|
|
nn.Conv1d(128, 1024, 1),
|
|
nn.BatchNorm1d(1024),
|
|
self._function,
|
|
)
|
|
|
|
self._block3 = nn.Sequential(
|
|
nn.Conv1d(1088, 512, 1),
|
|
nn.BatchNorm1d(512),
|
|
self._function,
|
|
nn.Conv1d(512, 256, 1),
|
|
nn.BatchNorm1d(256),
|
|
self._function,
|
|
nn.Conv1d(256, 128, 1),
|
|
nn.BatchNorm1d(128),
|
|
self._function,
|
|
)
|
|
|
|
self._block4 = nn.Conv1d(128, output_dim, 1)
|
|
|
|
if self._use_tnet:
|
|
self._tnet_transform = ConvTNet(input_dim=input_dim)
|
|
self._tnet_feature = ConvTNet(input_dim=64)
|
|
|
|
def concat(self, embedding, input_):
|
|
"""
|
|
Returns concatenation of global and local features for Point-Net
|
|
|
|
:param embedding: global features of Point-Net, shape [batch, $input_{dim}$]
|
|
with batch the batch size and $input_{dim}$ the input dimension
|
|
of the point cloud.
|
|
:type embedding: torch.tensor
|
|
:param input_: local features of Point-Net, shape [batch, N, $input_{dim}$]
|
|
with batch the batch size, N number of points and $input_{dim}$
|
|
the input dimension of the point cloud.
|
|
:type input_: torch.tensor
|
|
:return: concatenation vector, shape [batch, N, $input_{dim}$]
|
|
with batch the batch size, N number of points and $input_{dim}$
|
|
:rtype: torch.tensor
|
|
"""
|
|
n_points = input_.shape[-1]
|
|
embedding = embedding.unsqueeze(2).repeat(1, 1, n_points)
|
|
return torch.cat([embedding, input_], dim=1)
|
|
|
|
def forward(self, X):
|
|
"""Forward pass for Point-Net
|
|
|
|
:param X: input tensor, shape [batch, N, $input_{dim}$]
|
|
with batch the batch size, N number of points and $input_{dim}$
|
|
the input dimension of the point cloud.
|
|
:type X: torch.tensor
|
|
:return: segmentation vector, shape [batch, N, $output_{dim}$]
|
|
with batch the batch size, N number of points and $output_{dim}$
|
|
the output dimension of the point cloud.
|
|
:rtype: torch.tensor
|
|
"""
|
|
|
|
# permuting indeces
|
|
X = X.permute(0, 2, 1)
|
|
|
|
# using transform tnet if needed
|
|
if self._use_tnet:
|
|
transform = self._tnet_transform(X)
|
|
X = X.transpose(2, 1)
|
|
X = torch.matmul(X, transform)
|
|
X = X.transpose(2, 1)
|
|
|
|
# encoding using first MLP
|
|
X = self._block1(X)
|
|
|
|
# using transform tnet if needed
|
|
if self._use_tnet:
|
|
transform = self._tnet_feature(X)
|
|
X = X.transpose(2, 1)
|
|
X = torch.matmul(X, transform)
|
|
X = X.transpose(2, 1)
|
|
|
|
# saving latent representation for later concatanation
|
|
latent = X
|
|
|
|
# encoding using second MLP
|
|
X = self._block2(X)
|
|
|
|
# applying symmetric function to aggregate information (using max as default)
|
|
X, _ = torch.max(X, dim=-1)
|
|
|
|
# concatenating with latent vector
|
|
X = self.concat(X, latent)
|
|
|
|
# decoding using third MLP
|
|
X = self._block3(X)
|
|
|
|
# decoding using fourth MLP
|
|
X = self._block4(X)
|
|
|
|
# permuting indeces
|
|
X = X.permute(0, 2, 1)
|
|
|
|
return X
|