Source code for elektronn3.models.fcn_2d

# ELEKTRONN3 - Neural Network Toolkit
#
# Copyright (c) 2017 - now
# Max Planck Institute of Neurobiology, Munich, Germany

"""
adopted from https://github.com/pochih/FCN-pytorch/blob/master/python/fcn.py
LICENSE https://github.com/meetshah1995/pytorch-semseg/blob/master/LICENSE

"""

from __future__ import print_function

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models
from torchvision.models.vgg import VGG


[docs] def resize_conv_ala_distill(in_feat, out_feat, kernel_size, stride, padding, output_padding, dilation): """ # TODO: needs refinement to work with arbitrary kernel size, stride and padding etc. https://distill.pub/2016/deconv-checkerboard/ https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/issues/190 Args: in_feat (): out_feat (): kernel_size (): stride (): padding (): output_padding (): dilation (): Returns: """ return nn.Sequential(nn.UpsamplingNearest2d(scale_factor=stride), nn.ReflectionPad2d(padding), nn.Conv2d(in_feat, out_feat, kernel_size=kernel_size, stride=1, dilation=dilation))
[docs] class FCN32s(nn.Module): def __init__(self, base_net, n_class): super().__init__() self.n_class = n_class self.base_net = base_net self.relu = nn.ReLU(inplace=True) self.deconv1 = nn.ConvTranspose2d(512, 512, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) self.bn1 = nn.BatchNorm2d(512) self.deconv2 = nn.ConvTranspose2d(512, 256, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) self.bn2 = nn.BatchNorm2d(256) self.deconv3 = nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) self.bn3 = nn.BatchNorm2d(128) self.deconv4 = nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) self.bn4 = nn.BatchNorm2d(64) self.deconv5 = nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) self.bn5 = nn.BatchNorm2d(32) self.classifier = nn.Conv2d(32, n_class, kernel_size=1)
[docs] def forward(self, x): output = self.base_net(x) x5 = output['x5'] # size=(N, 512, x.H/32, x.W/32) score = self.bn1(self.relu(self.deconv1(x5))) # size=(N, 512, x.H/16, x.W/16) score = self.bn2(self.relu(self.deconv2(score))) # size=(N, 256, x.H/8, x.W/8) score = self.bn3(self.relu(self.deconv3(score))) # size=(N, 128, x.H/4, x.W/4) score = self.bn4(self.relu(self.deconv4(score))) # size=(N, 64, x.H/2, x.W/2) score = self.bn5(self.relu(self.deconv5(score))) # size=(N, 32, x.H, x.W) score = self.classifier(score) # size=(N, n_class, x.H/1, x.W/1) return score # size=(N, n_class, x.H/1, x.W/1)
[docs] class FCN16s(nn.Module): def __init__(self, base_net, n_class): super().__init__() self.n_class = n_class self.base_net = base_net self.relu = nn.ReLU(inplace=True) self.deconv1 = nn.ConvTranspose2d(512, 512, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) self.bn1 = nn.BatchNorm2d(512) self.deconv2 = nn.ConvTranspose2d(512, 256, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) self.bn2 = nn.BatchNorm2d(256) self.deconv3 = nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) self.bn3 = nn.BatchNorm2d(128) self.deconv4 = nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) self.bn4 = nn.BatchNorm2d(64) self.deconv5 = nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) self.bn5 = nn.BatchNorm2d(32) self.classifier = nn.Conv2d(32, n_class, kernel_size=1)
[docs] def forward(self, x): output = self.base_net(x) x5 = output['x5'] # size=(N, 512, x.H/32, x.W/32) x4 = output['x4'] # size=(N, 512, x.H/16, x.W/16) score = self.relu(self.deconv1(x5)) # size=(N, 512, x.H/16, x.W/16) score = self.bn1(score + x4) # element-wise add, size=(N, 512, x.H/16, x.W/16) score = self.bn2(self.relu(self.deconv2(score))) # size=(N, 256, x.H/8, x.W/8) score = self.bn3(self.relu(self.deconv3(score))) # size=(N, 128, x.H/4, x.W/4) score = self.bn4(self.relu(self.deconv4(score))) # size=(N, 64, x.H/2, x.W/2) score = self.bn5(self.relu(self.deconv5(score))) # size=(N, 32, x.H, x.W) score = self.classifier(score) # size=(N, n_class, x.H/1, x.W/1) return score # size=(N, n_class, x.H/1, x.W/1)
[docs] class FCN8s(nn.Module): def __init__(self, base_net, n_class): super().__init__() self.n_class = n_class self.base_net = base_net self.relu = nn.ReLU(inplace=True) self.deconv1 = nn.ConvTranspose2d(512, 512, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) self.bn1 = nn.BatchNorm2d(512) self.deconv2 = nn.ConvTranspose2d(512, 256, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) self.bn2 = nn.BatchNorm2d(256) self.deconv3 = nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) self.bn3 = nn.BatchNorm2d(128) self.deconv4 = nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) self.bn4 = nn.BatchNorm2d(64) self.deconv5 = nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) self.bn5 = nn.BatchNorm2d(32) self.classifier = nn.Conv2d(32, n_class, kernel_size=1)
[docs] def forward(self, x): output = self.base_net(x) x5 = output['x5'] # size=(N, 512, x.H/32, x.W/32) x4 = output['x4'] # size=(N, 512, x.H/16, x.W/16) x3 = output['x3'] # size=(N, 256, x.H/8, x.W/8) score = self.relu(self.deconv1(x5)) # size=(N, 512, x.H/16, x.W/16) score = self.bn1(score + x4) # element-wise add, size=(N, 512, x.H/16, x.W/16) score = self.relu(self.deconv2(score)) # size=(N, 256, x.H/8, x.W/8) score = self.bn2(score + x3) # element-wise add, size=(N, 256, x.H/8, x.W/8) score = self.bn3(self.relu(self.deconv3(score))) # size=(N, 128, x.H/4, x.W/4) score = self.bn4(self.relu(self.deconv4(score))) # size=(N, 64, x.H/2, x.W/2) score = self.bn5(self.relu(self.deconv5(score))) # size=(N, 32, x.H, x.W) score = self.classifier(score) # size=(N, n_class, x.H/1, x.W/1) return score # size=(N, n_class, x.H/1, x.W/1)
[docs] class FCNs(nn.Module): def __init__(self, base_net, n_class): super().__init__() self.n_class = n_class self.base_net = base_net self.relu = nn.ReLU(inplace=True) self.deconv1 = nn.ConvTranspose2d(512, 512, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) # self.deconv1 = resize_conv_ala_distill(512, 512, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) self.bn1 = nn.BatchNorm2d(512) self.deconv2 = nn.ConvTranspose2d(512, 256, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) # self.deconv2 = resize_conv_ala_distill(512, 256, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) self.bn2 = nn.BatchNorm2d(256) self.deconv3 = nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) # self.deconv3 = resize_conv_ala_distill(256, 128, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) self.bn3 = nn.BatchNorm2d(128) self.deconv4 = nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) # self.deconv4 = resize_conv_ala_distill(128, 64, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) self.bn4 = nn.BatchNorm2d(64) self.deconv5 = nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) # self.deconv5 = resize_conv_ala_distill(64, 32, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) self.bn5 = nn.BatchNorm2d(32) self.classifier = nn.Conv2d(32, n_class, kernel_size=1)
[docs] def forward(self, x): output = self.base_net(x) x5 = output['x5'] # size=(N, 512, x.H/32, x.W/32) x4 = output['x4'] # size=(N, 512, x.H/16, x.W/16) x3 = output['x3'] # size=(N, 256, x.H/8, x.W/8) x2 = output['x2'] # size=(N, 128, x.H/4, x.W/4) x1 = output['x1'] # size=(N, 64, x.H/2, x.W/2) score = self.bn1(self.relu(self.deconv1(x5))) # size=(N, 512, x.H/16, x.W/16) score = score + x4 # element-wise add, size=(N, 512, x.H/16, x.W/16) score = self.bn2(self.relu(self.deconv2(score))) # size=(N, 256, x.H/8, x.W/8) score = score + x3 # element-wise add, size=(N, 256, x.H/8, x.W/8) score = self.bn3(self.relu(self.deconv3(score))) # size=(N, 128, x.H/4, x.W/4) score = score + x2 # element-wise add, size=(N, 128, x.H/4, x.W/4) score = self.bn4(self.relu(self.deconv4(score))) # size=(N, 64, x.H/2, x.W/2) score = score + x1 # element-wise add, size=(N, 64, x.H/2, x.W/2) score = self.bn5(self.relu(self.deconv5(score))) # size=(N, 32, x.H, x.W) score = self.classifier(score) # size=(N, n_class, x.H/1, x.W/1) return score # size=(N, n_class, x.H/1, x.W/1)
[docs] class VGGNet(VGG): def __init__(self, model='vgg16', requires_grad=True, remove_fc=True, show_params=False, in_channels=3, batch_norm=False): super().__init__(make_layers(cfg[model], batch_norm, in_channels)) self.ranges = ranges[model] if not requires_grad: for param in super().parameters(): param.requires_grad = False if remove_fc: # delete redundant fully-connected layer params, can save memory del self.classifier if show_params: for name, param in self.named_parameters(): print(name, param.size())
[docs] def forward(self, x): output = {} # get the output of each maxpooling layer (5 maxpool in VGG net) for idx in range(len(self.ranges)): for layer in range(self.ranges[idx][0], self.ranges[idx][1]): x = self.features[layer](x) output["x%d"%(idx+1)] = x return output
ranges = { 'vgg11': ((0, 3), (3, 6), (6, 11), (11, 16), (16, 21)), 'vgg13': ((0, 5), (5, 10), (10, 15), (15, 20), (20, 25)), 'vgg16': ((0, 5), (5, 10), (10, 17), (17, 24), (24, 31)), 'vgg19': ((0, 5), (5, 10), (10, 19), (19, 28), (28, 37)) } # cropped version from https://github.com/pytorch/vision/blob/master/torchvision/models/vgg.py cfg = { 'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], 'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], }
[docs] def make_layers(cfg, batch_norm=False, in_channels=3): layers = [] for v in cfg: if v == 'M': layers += [nn.MaxPool2d(kernel_size=2, stride=2)] else: conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) if batch_norm: layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] else: layers += [conv2d, nn.ReLU(inplace=True)] in_channels = v return nn.Sequential(*layers)