Source code for elektronn3.modules.l1batchnorm

# ELEKTRONN3 - Neural Network Toolkit
# Copyright (c) 2017 - now
# Max Planck Institute of Neurobiology, Munich, Germany
# Authors: Martin Drawitsch

import torch
from torch import nn

# TODO: ScriptModule
# @torch._jit_internal.weak_module
# class L1BatchNorm(torch.jit.ScriptModule):
[docs]class L1BatchNorm(nn.Module): """L1-Norm-based Batch Normalization module. Use with caution. This code is not extensively tested. References: - - """ __constants__ = ['l2factor', 'eps', 'momentum'] def __init__(self, num_features: int, momentum: float = 0.9): super().__init__() self.register_buffer('running_mean', torch.zeros(num_features)) self.register_buffer('running_var', torch.zeros(num_features)) self.momentum = momentum self.gamma = nn.Parameter(torch.ones(1, num_features)) self.beta = nn.Parameter(torch.zeros(1, num_features)) self.eps = 1e-5 self.l2factor = (3.1416 / 2) ** 0.5 # @torch._jit_internal.weak_script # @torch.jit.script_method
[docs] def forward(self, x): ndim = x.dim() # If this is known statically, this module can be a ScriptModule reduce_dims = (0, 2, 3, 4)[:ndim] b_sh = (1, x.shape[1], 1, 1, 1)[:ndim] # Broadcastable shape if mean = x.mean(dim=reduce_dims, keepdim=True) meandiff = x - mean absdiff = meandiff.abs() l1mean = absdiff.mean(dim=reduce_dims, keepdim=True) l1scaled = l1mean * self.l2factor + self.eps with torch.no_grad(): # Update running stats mom = self.momentum self.running_mean.mul_(mom).add_(mean.flatten() * (1 - mom)) self.running_var.mul_(mom).add_(l1scaled.flatten() * (1 - mom)) else: mean = self.running_mean.view(b_sh) l1scaled = self.running_var.view(b_sh) meandiff = x - mean gamma = self.gamma.view(b_sh) beta = self.beta.view(b_sh) return gamma * meandiff / l1scaled + beta
# @torch._jit_internal.weak_script
[docs]def l1_group_norm(x, num_groups, weight, bias, eps): l2factor = 1.2533 # == (pi / 2) ** 0.5 ndim = x.dim() sh = x.shape g = num_groups n, c = sh[:2] # grouped_sh = (n, g, c // g, d, h, w) grouped_sh = (n, g, c // g, *sh[2:]) # Split C dimension into groups grouped = x.view(grouped_sh) reduce_dims = (2, 3, 4, 5)[:ndim - 1] # Reduce over grouped channels and spatial dimensions mean = grouped.mean(dim=reduce_dims, keepdim=True) meandiff = grouped - mean absdiff = meandiff.abs() l1mean = absdiff.mean(dim=reduce_dims, keepdim=True) l1scaled = l1mean * l2factor + eps normalized = meandiff / l1scaled normalized = normalized.view(sh) broadcast_sh = (1, c, 1, 1, 1)[:ndim] # Shape broadcastable over all dims of x weight = weight.view(broadcast_sh) bias = bias.view(broadcast_sh) return weight * normalized + bias
# @torch._jit_internal.weak_module
[docs]class L1GroupNorm(nn.GroupNorm): r"""Applies L1 Group Normalization over a mini-batch of inputs. This works in the same way as `torch.nn.GroupNorm`, but uses the scaled L1 norm instead of the L2 norm for better numerical stability, performance and half precision support. L1 *batch* normalization was proposed in - - This layer uses statistics computed from input data in both training and evaluation modes. Args: num_groups (int): number of groups to separate the channels into num_channels (int): number of channels expected in input eps: a value added to the denominator for numerical stability. Default: 1e-5 affine: a boolean value that when set to ``True``, this module has learnable per-channel affine parameters initialized to ones (for weights) and zeros (for biases). Default: ``True``. Shape: - Input: :math:`(N, C, *)` where :math:`C=\text{num\_channels}` - Output: :math:`(N, C, *)` (same shape as input) .. _`Group Normalization`: """ __constants__ = ['num_groups', 'num_channels', 'eps', 'affine', 'weight', 'bias'] def __init__(self, num_groups, num_channels, eps=1e-5, affine=True): super().__init__(num_groups, num_channels, eps, affine) print('Warning: L1 Group norm is experimental and may have issues.') @torch._jit_internal.weak_script_method def forward(self, input): return l1_group_norm(input, self.num_groups, self.weight, self.bias, self.eps)