Spaces:
Sleeping
Sleeping
| from typing import Callable, List, Optional | |
| import torch | |
| from torch import Tensor | |
| from .vision_transformer_utils import _log_api_usage_once | |
| interpolate = torch.nn.functional.interpolate | |
| # This is not in nn | |
| class FrozenBatchNorm2d(torch.nn.Module): | |
| """ | |
| BatchNorm2d where the batch statistics and the affine parameters are fixed | |
| Args: | |
| num_features (int): Number of features ``C`` from an expected input of size ``(N, C, H, W)`` | |
| eps (float): a value added to the denominator for numerical stability. Default: 1e-5 | |
| """ | |
| def __init__( | |
| self, | |
| num_features: int, | |
| eps: float = 1e-5, | |
| ): | |
| super().__init__() | |
| _log_api_usage_once(self) | |
| self.eps = eps | |
| self.register_buffer("weight", torch.ones(num_features)) | |
| self.register_buffer("bias", torch.zeros(num_features)) | |
| self.register_buffer("running_mean", torch.zeros(num_features)) | |
| self.register_buffer("running_var", torch.ones(num_features)) | |
| def _load_from_state_dict( | |
| self, | |
| state_dict: dict, | |
| prefix: str, | |
| local_metadata: dict, | |
| strict: bool, | |
| missing_keys: List[str], | |
| unexpected_keys: List[str], | |
| error_msgs: List[str], | |
| ): | |
| num_batches_tracked_key = prefix + "num_batches_tracked" | |
| if num_batches_tracked_key in state_dict: | |
| del state_dict[num_batches_tracked_key] | |
| super()._load_from_state_dict( | |
| state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs | |
| ) | |
| def forward(self, x: Tensor) -> Tensor: | |
| # move reshapes to the beginning | |
| # to make it fuser-friendly | |
| w = self.weight.reshape(1, -1, 1, 1) | |
| b = self.bias.reshape(1, -1, 1, 1) | |
| rv = self.running_var.reshape(1, -1, 1, 1) | |
| rm = self.running_mean.reshape(1, -1, 1, 1) | |
| scale = w * (rv + self.eps).rsqrt() | |
| bias = b - rm * scale | |
| return x * scale + bias | |
| def __repr__(self) -> str: | |
| return f"{self.__class__.__name__}({self.weight.shape[0]}, eps={self.eps})" | |
| class ConvNormActivation(torch.nn.Sequential): | |
| """ | |
| Configurable block used for Convolution-Normalzation-Activation blocks. | |
| Args: | |
| in_channels (int): Number of channels in the input image | |
| out_channels (int): Number of channels produced by the Convolution-Normalzation-Activation block | |
| kernel_size: (int, optional): Size of the convolving kernel. Default: 3 | |
| stride (int, optional): Stride of the convolution. Default: 1 | |
| padding (int, tuple or str, optional): Padding added to all four sides of the input. Default: None, in wich case it will calculated as ``padding = (kernel_size - 1) // 2 * dilation`` | |
| groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1 | |
| norm_layer (Callable[..., torch.nn.Module], optional): Norm layer that will be stacked on top of the convolutiuon layer. If ``None`` this layer wont be used. Default: ``torch.nn.BatchNorm2d`` | |
| activation_layer (Callable[..., torch.nn.Module], optinal): Activation function which will be stacked on top of the normalization layer (if not None), otherwise on top of the conv layer. If ``None`` this layer wont be used. Default: ``torch.nn.ReLU`` | |
| dilation (int): Spacing between kernel elements. Default: 1 | |
| inplace (bool): Parameter for the activation layer, which can optionally do the operation in-place. Default ``True`` | |
| bias (bool, optional): Whether to use bias in the convolution layer. By default, biases are included if ``norm_layer is None``. | |
| """ | |
| def __init__( | |
| self, | |
| in_channels: int, | |
| out_channels: int, | |
| kernel_size: int = 3, | |
| stride: int = 1, | |
| padding: Optional[int] = None, | |
| groups: int = 1, | |
| norm_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.BatchNorm2d, | |
| activation_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.ReLU, | |
| dilation: int = 1, | |
| inplace: Optional[bool] = True, | |
| bias: Optional[bool] = None, | |
| ) -> None: | |
| if padding is None: | |
| padding = (kernel_size - 1) // 2 * dilation | |
| if bias is None: | |
| bias = norm_layer is None | |
| layers = [ | |
| torch.nn.Conv2d( | |
| in_channels, | |
| out_channels, | |
| kernel_size, | |
| stride, | |
| padding, | |
| dilation=dilation, | |
| groups=groups, | |
| bias=bias, | |
| ) | |
| ] | |
| if norm_layer is not None: | |
| layers.append(norm_layer(out_channels)) | |
| if activation_layer is not None: | |
| params = {} if inplace is None else {"inplace": inplace} | |
| layers.append(activation_layer(**params)) | |
| super().__init__(*layers) | |
| _log_api_usage_once(self) | |
| self.out_channels = out_channels | |
| class SqueezeExcitation(torch.nn.Module): | |
| """ | |
| This block implements the Squeeze-and-Excitation block from https://arxiv.org/abs/1709.01507 (see Fig. 1). | |
| Parameters ``activation``, and ``scale_activation`` correspond to ``delta`` and ``sigma`` in in eq. 3. | |
| Args: | |
| input_channels (int): Number of channels in the input image | |
| squeeze_channels (int): Number of squeeze channels | |
| activation (Callable[..., torch.nn.Module], optional): ``delta`` activation. Default: ``torch.nn.ReLU`` | |
| scale_activation (Callable[..., torch.nn.Module]): ``sigma`` activation. Default: ``torch.nn.Sigmoid`` | |
| """ | |
| def __init__( | |
| self, | |
| input_channels: int, | |
| squeeze_channels: int, | |
| activation: Callable[..., torch.nn.Module] = torch.nn.ReLU, | |
| scale_activation: Callable[..., torch.nn.Module] = torch.nn.Sigmoid, | |
| ) -> None: | |
| super().__init__() | |
| _log_api_usage_once(self) | |
| self.avgpool = torch.nn.AdaptiveAvgPool2d(1) | |
| self.fc1 = torch.nn.Conv2d(input_channels, squeeze_channels, 1) | |
| self.fc2 = torch.nn.Conv2d(squeeze_channels, input_channels, 1) | |
| self.activation = activation() | |
| self.scale_activation = scale_activation() | |
| def _scale(self, input: Tensor) -> Tensor: | |
| scale = self.avgpool(input) | |
| scale = self.fc1(scale) | |
| scale = self.activation(scale) | |
| scale = self.fc2(scale) | |
| return self.scale_activation(scale) | |
| def forward(self, input: Tensor) -> Tensor: | |
| scale = self._scale(input) | |
| return scale * input | |