import torch import torch.nn as nn class ResidualBlock(nn.Module): """Residual block base class. Implementation of a residual block. Reference: https://arxiv.org/pdf/1512.03385.pdf : Equation #2 """ def __init__(self, input_dim, output_dim, hidden_dim, spectral_norm=False): """Residual block constructor :param input_dim: dimension of the input :type input_dim: int :param output_dim: dimension of the output :type output_dim: int :param hidden_dim: hidden dimension for mapping the input (first block) :type hidden_dim: int :param spectral_norm: apply spectral normalization, defaults to False :type spectral_norm: bool, optional """ super().__init__() self._spectral_norm = spectral_norm self._input_dim = input_dim self._output_dim = output_dim self._hidden_dim = hidden_dim self.l1 = self._spect_norm(nn.Linear(input_dim, hidden_dim)) self.l2 = self._spect_norm(nn.Linear(hidden_dim, output_dim)) self.l3 = self._spect_norm(nn.Linear(input_dim, output_dim)) def forward(self, x): y = self.activation(self.l1(x)) y = self.l2(y) x = self.l3(x) return y + x def _spect_norm(self, x): return nn.utils.spectral_norm(x) if self._spectral_norm else x @property def spectral_norm(self): return self._spectral_norm @property def input_dim(self): return self._input_dim @property def output_dim(self): return self._output_dim @property def hidden_dim(self): return self._hidden_dim class MLP(torch.nn.Module): """Multi-layer Perceptron base class""" def __init__( self, input_dim, output_dim, inner_size=20, n_layers=2, func=nn.Tanh, layers=None, batch_norm=False, spectral_norm=False, ): """Deep neural network model :param input_dim: input channel for the network :type input_dim: int :param output_dim: output channel for the network :type output_dim: int :param inner_size: inner size of each hidden layer, defaults to 20 :type inner_size: int, optional :param n_layers: number of layers in the network, defaults to 2 :type n_layers: int, optional :param func: function(s) to pass to the network, defaults to nn.Tanh :type func: (list of) torch.nn function(s), optional :param layers: list of layers for the network, defaults to None :type layers: list[int], optional :param batch_norm: apply batch normalization layer :type bool, default False :param spectral_norm: apply spectral normalization layer :type bool, default False """ super().__init__() self._input_dim = input_dim self._output_dim = output_dim self._inner_size = inner_size self._n_layers = n_layers self._layers = layers self._bnorm = batch_norm self._spectnorm = spectral_norm if layers is None: layers = [inner_size] * n_layers tmp_layers = layers.copy() tmp_layers.insert(0, self._input_dim) tmp_layers.append(self._output_dim) self._layers = [] self._batchnorm = [] for i in range(len(tmp_layers) - 1): self._layers.append( self.spect_norm(nn.Linear(tmp_layers[i], tmp_layers[i + 1])) ) self._batchnorm.append(nn.LazyBatchNorm1d()) if isinstance(func, list): self._functions = func else: self._functions = [func for _ in range(len(self._layers) - 1)] unique_list = [] for layer, func, bnorm in zip( self._layers[:-1], self._functions, self._batchnorm ): unique_list.append(layer) if func is not None: if batch_norm: unique_list.append(bnorm) unique_list.append(func()) unique_list.append(self._layers[-1]) self.model = nn.Sequential(*unique_list) def spect_norm(self, x): return nn.utils.spectral_norm(x) if self._spectnorm else x def forward(self, x): """Forward method for NeuralNet class :param x: network input data :type x: torch.tensor :return: network output :rtype: torch.tensor """ return self.model(x) @property def input_dim(self): return self._input_dim @property def output_dim(self): return self._output_dim @property def inner_size(self): return self._inner_size @property def n_layers(self): return self._n_layers @property def functions(self): return self._functions @property def layers(self): return self._layers class TNet(nn.Module): """T-Net base class. Implementation of T-Network. Reference: Charles R. Qi et al. https://arxiv.org/pdf/1612.00593.pdf """ def __init__(self, input_dim): """T-Net block constructor :param input_dim: input dimension of point cloud :type input_dim: int """ super().__init__() function = nn.Tanh self._mlp1 = MLP( input_dim=input_dim, output_dim=1024, layers=[64, 128], func=function, batch_norm=True, ) self._mlp2 = MLP( input_dim=1024, output_dim=input_dim * input_dim, layers=[512, 256], func=function, batch_norm=True, ) self._function = function() self._bn1 = nn.LazyBatchNorm1d() def forward(self, X): """Forward pass for T-Net :param X: input tensor, shape [batch, N, $input_{dim}$] with batch the batch size, N number of points and $input_{dim}$ the input dimension of the point cloud. :type X: torch.tensor :return: output affine matrix transformation, shape [batch, $input_{dim} \times input_{dim}$] with batch the batch size and $input_{dim}$ the input dimension of the point cloud. :rtype: torch.tensor """ batch, input_dim = X.shape[0], X.shape[2] # encoding using first MLP X = self._mlp1(X) X = self._function(self._bn1(X)) # applying symmetric function to aggregate information (using max as default) X, _ = torch.max(X, dim=1) # decoding using third MLP X = self._mlp2(X) return X.reshape(batch, input_dim, input_dim) class PointNet(nn.Module): """Point-Net base class. Implementation of Point Network for segmentation. Reference: Charles R. Qi et al. https://arxiv.org/pdf/1612.00593.pdf """ def __init__(self, input_dim, output_dim, tnet=False): """Point-Net block constructor :param input_dim: input dimension of point cloud :type input_dim: int :param output_dim: output dimension of point cloud :type output_dim: int :param tnet: apply T-Net transformation, defaults to False :type tnet: bool, optional """ super().__init__() function = nn.Tanh self._use_tnet = tnet self._mlp1 = MLP( input_dim=input_dim, output_dim=64, inner_size=64, n_layers=1, func=function, batch_norm=True, ) self._mlp2 = MLP( input_dim=64, output_dim=1024, inner_size=128, n_layers=1, func=function, batch_norm=True, ) self._mlp3 = MLP( input_dim=1088, output_dim=128, layers=[512, 256], func=function, batch_norm=True, ) self._mlp4 = MLP( input_dim=128, output_dim=output_dim, n_layers=0, func=function, batch_norm=True, ) if self._use_tnet: self._tnet_transform = TNet(input_dim=input_dim) self._tnet_feature = TNet(input_dim=64) self._function = function() self._bn1 = nn.LazyBatchNorm1d() self._bn2 = nn.LazyBatchNorm1d() self._bn3 = nn.LazyBatchNorm1d() def concat(self, embedding, input_): """Returns concatenation of global and local features for Point-Net :param embedding: global features of Point-Net, shape [batch, $input_{dim}$] with batch the batch size and $input_{dim}$ the input dimension of the point cloud. :type embedding: torch.tensor :param input_: local features of Point-Net, shape [batch, N, $input_{dim}$] with batch the batch size, N number of points and $input_{dim}$ the input dimension of the point cloud. :type input_: torch.tensor :return: concatenation vector, shape [batch, N, $input_{dim}$] with batch the batch size, N number of points and $input_{dim}$ :rtype: torch.tensor """ n_points = input_.shape[1] embedding = embedding.repeat(n_points, 1, 1).permute(1, 0, 2) return torch.cat([embedding, input_], dim=2) def forward(self, X): """Forward pass for Point-Net :param X: input tensor, shape [batch, N, $input_{dim}$] with batch the batch size, N number of points and $input_{dim}$ the input dimension of the point cloud. :type X: torch.tensor :return: segmentation vector, shape [batch, N, $output_{dim}$] with batch the batch size, N number of points and $output_{dim}$ the output dimension of the point cloud. :rtype: torch.tensor """ # using transform tnet if needed if self._use_tnet: transform = self._tnet_transform(X) X = torch.matmul(X, transform) # encoding using first MLP X = self._mlp1(X) X = self._function(self._bn1(X)) # using transform tnet if needed if self._use_tnet: transform = self._tnet_feature(X) X = torch.matmul(X, transform) # saving latent representation for later concatanation latent = X # encoding using second MLP X = self._mlp2(X) X = self._function(self._bn2(X)) # applying symmetric function to aggregate information (using max as default) X, _ = torch.max(X, dim=1) # concatenating with latent vector X = self.concat(X, latent) # decoding using third MLP X = self._mlp3(X) X = self._function(self._bn3(X)) # decoding using fourth MLP X = self._mlp4(X) return X class ConvTNet(nn.Module): """T-Net base class. Implementation of T-Network with convolutional layers. Reference: Ali Kashefi et al. https://arxiv.org/abs/2208.13434 """ def __init__(self, input_dim): """T-Net block constructor :param input_dim: input dimension of point cloud :type input_dim: int """ super().__init__() function = nn.Tanh self._function = function() self._block1 = nn.Sequential( nn.Conv1d(input_dim, 64, 1), nn.BatchNorm1d(64), self._function, nn.Conv1d(64, 128, 1), nn.BatchNorm1d(128), self._function, nn.Conv1d(128, 1024, 1), nn.BatchNorm1d(1024), self._function, ) self._block2 = MLP( input_dim=1024, output_dim=input_dim * input_dim, layers=[512, 256], func=function, batch_norm=True, ) def forward(self, X): """Forward pass for T-Net :param X: input tensor, shape [batch, $input_{dim}$, N] with batch the batch size, N number of points and $input_{dim}$ the input dimension of the point cloud. :type X: torch.tensor :return: output affine matrix transformation, shape [batch, $input_{dim} \times input_{dim}$] with batch the batch size and $input_{dim}$ the input dimension of the point cloud. :rtype: torch.tensor """ batch, input_dim = X.shape[0], X.shape[1] # encoding using first MLP X = self._block1(X) # applying symmetric function to aggregate information (using max as default) X, _ = torch.max(X, dim=-1) # decoding using third MLP X = self._block2(X) return X.reshape(batch, input_dim, input_dim) class ConvPointNet(nn.Module): """Point-Net base class. Implementation of Point Network for segmentation. Reference: Ali Kashefi et al. https://arxiv.org/abs/2208.13434 """ def __init__(self, input_dim, output_dim, tnet=False): """Point-Net block constructor :param input_dim: input dimension of point cloud :type input_dim: int :param output_dim: output dimension of point cloud :type output_dim: int :param tnet: apply T-Net transformation, defaults to False :type tnet: bool, optional """ super().__init__() self._function = nn.Tanh() self._use_tnet = tnet self._block1 = nn.Sequential( nn.Conv1d(input_dim, 64, 1), nn.BatchNorm1d(), self._function, nn.Conv1d(64, 64, 1), nn.BatchNorm1d(64), self._function, ) self._block2 = nn.Sequential( nn.Conv1d(64, 64, 1), nn.BatchNorm1d(64), self._function, nn.Conv1d(64, 128, 1), nn.BatchNorm1d(128), self._function, nn.Conv1d(128, 1024, 1), nn.BatchNorm1d(1024), self._function, ) self._block3 = nn.Sequential( nn.Conv1d(1088, 512, 1), nn.BatchNorm1d(512), self._function, nn.Conv1d(512, 256, 1), nn.BatchNorm1d(256), self._function, nn.Conv1d(256, 128, 1), nn.BatchNorm1d(128), self._function, ) self._block4 = nn.Conv1d(128, output_dim, 1) if self._use_tnet: self._tnet_transform = ConvTNet(input_dim=input_dim) self._tnet_feature = ConvTNet(input_dim=64) def concat(self, embedding, input_): """ Returns concatenation of global and local features for Point-Net :param embedding: global features of Point-Net, shape [batch, $input_{dim}$] with batch the batch size and $input_{dim}$ the input dimension of the point cloud. :type embedding: torch.tensor :param input_: local features of Point-Net, shape [batch, N, $input_{dim}$] with batch the batch size, N number of points and $input_{dim}$ the input dimension of the point cloud. :type input_: torch.tensor :return: concatenation vector, shape [batch, N, $input_{dim}$] with batch the batch size, N number of points and $input_{dim}$ :rtype: torch.tensor """ n_points = input_.shape[-1] embedding = embedding.unsqueeze(2).repeat(1, 1, n_points) return torch.cat([embedding, input_], dim=1) def forward(self, X): """Forward pass for Point-Net :param X: input tensor, shape [batch, N, $input_{dim}$] with batch the batch size, N number of points and $input_{dim}$ the input dimension of the point cloud. :type X: torch.tensor :return: segmentation vector, shape [batch, N, $output_{dim}$] with batch the batch size, N number of points and $output_{dim}$ the output dimension of the point cloud. :rtype: torch.tensor """ # permuting indeces X = X.permute(0, 2, 1) # using transform tnet if needed if self._use_tnet: transform = self._tnet_transform(X) X = X.transpose(2, 1) X = torch.matmul(X, transform) X = X.transpose(2, 1) # encoding using first MLP X = self._block1(X) # using transform tnet if needed if self._use_tnet: transform = self._tnet_feature(X) X = X.transpose(2, 1) X = torch.matmul(X, transform) X = X.transpose(2, 1) # saving latent representation for later concatanation latent = X # encoding using second MLP X = self._block2(X) # applying symmetric function to aggregate information (using max as default) X, _ = torch.max(X, dim=-1) # concatenating with latent vector X = self.concat(X, latent) # decoding using third MLP X = self._block3(X) # decoding using fourth MLP X = self._block4(X) # permuting indeces X = X.permute(0, 2, 1) return X