DilatedNet

show more

Implementations

Overview

This code provides various models combining dilated convolutions with residual networks. Our models can achieve better performance with less parameters than ResNet on image classification and semantic segmentation.

import pdb

import torch.nn as nn
import math
import torch.utils.model_zoo as model_zoo

BatchNorm = nn.BatchNorm2d


# __all__ = ['DRN', 'drn26', 'drn42', 'drn58']


webroot = 'https://tigress-web.princeton.edu/~fy/drn/models/'

model_urls = {
    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
    'drn-c-26': webroot + 'drn_c_26-ddedf421.pth',
    'drn-c-42': webroot + 'drn_c_42-9d336e8c.pth',
    'drn-c-58': webroot + 'drn_c_58-0a53a92c.pth',
    'drn-d-22': webroot + 'drn_d_22-4bd2f8ea.pth',
    'drn-d-38': webroot + 'drn_d_38-eebb45f0.pth',
    'drn-d-54': webroot + 'drn_d_54-0e0534ff.pth',
    'drn-d-105': webroot + 'drn_d_105-12b40979.pth'
}


def conv3x3(in_planes, out_planes, stride=1, padding=1, dilation=1):
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=padding, bias=False, dilation=dilation)


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None,
                 dilation=(1, 1), residual=True):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride,
                             padding=dilation[0], dilation=dilation[0])
        self.bn1 = BatchNorm(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes,
                             padding=dilation[1], dilation=dilation[1])
        self.bn2 = BatchNorm(planes)
        self.downsample = downsample
        self.stride = stride
        self.residual = residual

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            residual = self.downsample(x)
        if self.residual:
            out += residual
        out = self.relu(out)

        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None,
                 dilation=(1, 1), residual=True):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
        self.bn1 = BatchNorm(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
                               padding=dilation[1], bias=False,
                               dilation=dilation[1])
        self.bn2 = BatchNorm(planes)
        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
        self.bn3 = BatchNorm(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out


class DRN(nn.Module):

    def __init__(self, block, layers, num_classes=1000,
                 channels=(16, 32, 64, 128, 256, 512, 512, 512),
                 out_map=False, out_middle=False, pool_size=28, arch='D'):
        super(DRN, self).__init__()
        self.inplanes = channels[0]
        self.out_map = out_map
        self.out_dim = channels[-1]
        self.out_middle = out_middle
        self.arch = arch

        if arch == 'C':
            self.conv1 = nn.Conv2d(3, channels[0], kernel_size=7, stride=1,
                                   padding=3, bias=False)
            self.bn1 = BatchNorm(channels[0])
            self.relu = nn.ReLU(inplace=True)

            self.layer1 = self._make_layer(
                BasicBlock, channels[0], layers[0], stride=1)
            self.layer2 = self._make_layer(
                BasicBlock, channels[1], layers[1], stride=2)
        elif arch == 'D':
            self.layer0 = nn.Sequential(
                nn.Conv2d(3, channels[0], kernel_size=7, stride=1, padding=3,
                          bias=False),
                BatchNorm(channels[0]),
                nn.ReLU(inplace=True)
            )

            self.layer1 = self._make_conv_layers(
                channels[0], layers[0], stride=1)
            self.layer2 = self._make_conv_layers(
                channels[1], layers[1], stride=2)

        self.layer3 = self._make_layer(block, channels[2], layers[2], stride=2)
        self.layer4 = self._make_layer(block, channels[3], layers[3], stride=2)
        self.layer5 = self._make_layer(block, channels[4], layers[4],
                                       dilation=2, new_level=False)
        self.layer6 = None if layers[5] == 0 else \
            self._make_layer(block, channels[5], layers[5], dilation=4,
                             new_level=False)

        if arch == 'C':
            self.layer7 = None if layers[6] == 0 else \
                self._make_layer(BasicBlock, channels[6], layers[6], dilation=2,
                                 new_level=False, residual=False)
            self.layer8 = None if layers[7] == 0 else \
                self._make_layer(BasicBlock, channels[7], layers[7], dilation=1,
                                 new_level=False, residual=False)
        elif arch == 'D':
            self.layer7 = None if layers[6] == 0 else \
                self._make_conv_layers(channels[6], layers[6], dilation=2)
            self.layer8 = None if layers[7] == 0 else \
                self._make_conv_layers(channels[7], layers[7], dilation=1)

        if num_classes > 0:
            self.avgpool = nn.AvgPool2d(pool_size)
            self.fc = nn.Conv2d(self.out_dim, num_classes, kernel_size=1,
                                stride=1, padding=0, bias=True)
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, BatchNorm):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def _make_layer(self, block, planes, blocks, stride=1, dilation=1,
                    new_level=True, residual=True):
        assert dilation == 1 or dilation % 2 == 0
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                BatchNorm(planes * block.expansion),
            )

        layers = list()
        layers.append(block(
            self.inplanes, planes, stride, downsample,
            dilation=(1, 1) if dilation == 1 else (
                dilation // 2 if new_level else dilation, dilation),
            residual=residual))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes, residual=residual,
                                dilation=(dilation, dilation)))

        return nn.Sequential(*layers)

    def _make_conv_layers(self, channels, convs, stride=1, dilation=1):
        modules = []
        for i in range(convs):
            modules.extend([
                nn.Conv2d(self.inplanes, channels, kernel_size=3,
                          stride=stride if i == 0 else 1,
                          padding=dilation, bias=False, dilation=dilation),
                BatchNorm(channels),
                nn.ReLU(inplace=True)])
            self.inplanes = channels
        return nn.Sequential(*modules)

    def forward(self, x):
        y = list()

        if self.arch == 'C':
            x = self.conv1(x)
            x = self.bn1(x)
            x = self.relu(x)
        elif self.arch == 'D':
            x = self.layer0(x)

        x = self.layer1(x)
        y.append(x)
        x = self.layer2(x)
        y.append(x)

        x = self.layer3(x)
        y.append(x)

        x = self.layer4(x)
        y.append(x)

        x = self.layer5(x)
        y.append(x)

        if self.layer6 is not None:
            x = self.layer6(x)
            y.append(x)

        if self.layer7 is not None:
            x = self.layer7(x)
            y.append(x)

        if self.layer8 is not None:
            x = self.layer8(x)
            y.append(x)

        if self.out_map:
            x = self.fc(x)
        else:
            x = self.avgpool(x)
            x = self.fc(x)
            x = x.view(x.size(0), -1)

        if self.out_middle:
            return x, y
        else:
            return x


class DRN_A(nn.Module):

    def __init__(self, block, layers, num_classes=1000):
        self.inplanes = 64
        super(DRN_A, self).__init__()
        self.out_dim = 512 * block.expansion
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=1,
                                       dilation=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=1,
                                       dilation=4)
        self.avgpool = nn.AvgPool2d(28, stride=1)
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, BatchNorm):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def _make_layer(self, block, planes, blocks, stride=1, dilation=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes,
                                dilation=(dilation, dilation)))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x


def drn_a_50(pretrained=False, **kwargs):
    model = DRN_A(Bottleneck, [3, 4, 6, 3], **kwargs)
    if pretrained:
        model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
    return model


def drn_c_26(pretrained=False, **kwargs):
    model = DRN(BasicBlock, [1, 1, 2, 2, 2, 2, 1, 1], arch='C', **kwargs)
    if pretrained:
        model.load_state_dict(model_zoo.load_url(model_urls['drn-c-26']))
    return model


def drn_c_42(pretrained=False, **kwargs):
    model = DRN(BasicBlock, [1, 1, 3, 4, 6, 3, 1, 1], arch='C', **kwargs)
    if pretrained:
        model.load_state_dict(model_zoo.load_url(model_urls['drn-c-42']))
    return model


def drn_c_58(pretrained=False, **kwargs):
    model = DRN(Bottleneck, [1, 1, 3, 4, 6, 3, 1, 1], arch='C', **kwargs)
    if pretrained:
        model.load_state_dict(model_zoo.load_url(model_urls['drn-c-58']))
    return model


def drn_d_22(pretrained=False, **kwargs):
    model = DRN(BasicBlock, [1, 1, 2, 2, 2, 2, 1, 1], arch='D', **kwargs)
    if pretrained:
        model.load_state_dict(model_zoo.load_url(model_urls['drn-d-22']))
    return model


def drn_d_24(pretrained=False, **kwargs):
    model = DRN(BasicBlock, [1, 1, 2, 2, 2, 2, 2, 2], arch='D', **kwargs)
    if pretrained:
        model.load_state_dict(model_zoo.load_url(model_urls['drn-d-24']))
    return model


def drn_d_38(pretrained=False, **kwargs):
    model = DRN(BasicBlock, [1, 1, 3, 4, 6, 3, 1, 1], arch='D', **kwargs)
    if pretrained:
        model.load_state_dict(model_zoo.load_url(model_urls['drn-d-38']))
    return model


def drn_d_40(pretrained=False, **kwargs):
    model = DRN(BasicBlock, [1, 1, 3, 4, 6, 3, 2, 2], arch='D', **kwargs)
    if pretrained:
        model.load_state_dict(model_zoo.load_url(model_urls['drn-d-40']))
    return model


def drn_d_54(pretrained=False, **kwargs):
    model = DRN(Bottleneck, [1, 1, 3, 4, 6, 3, 1, 1], arch='D', **kwargs)
    if pretrained:
        model.load_state_dict(model_zoo.load_url(model_urls['drn-d-54']))
    return model


def drn_d_56(pretrained=False, **kwargs):
    model = DRN(Bottleneck, [1, 1, 3, 4, 6, 3, 2, 2], arch='D', **kwargs)
    if pretrained:
        model.load_state_dict(model_zoo.load_url(model_urls['drn-d-56']))
    return model


def drn_d_105(pretrained=False, **kwargs):
    model = DRN(Bottleneck, [1, 1, 3, 4, 23, 3, 1, 1], arch='D', **kwargs)
    if pretrained:
        model.load_state_dict(model_zoo.load_url(model_urls['drn-d-105']))
    return model


def drn_d_107(pretrained=False, **kwargs):
    model = DRN(Bottleneck, [1, 1, 3, 4, 23, 3, 2, 2], arch='D', **kwargs)
    if pretrained:
        model.load_state_dict(model_zoo.load_url(model_urls['drn-d-107']))
    return model

Github: https://github.com/fyu/drn#semantic-image-segmentataion

Multi-Scale Context Aggregation by Dilated Convolutions

Properties of dilated convolution are discussed in our ICLR 2016 conference paper. This repository contains the network definitions and the trained models. You can use this code together with vanilla Caffe to segment images using the pre-trained models. If you want to train the models yourself, please check out the document for training.

If you are looking for dilation models with state-of-the-art performance and Python implementation, please check out Dilated Residual Networks.

#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import print_function, division
from caffe import layers as L
from caffe import params as P


__author__ = 'Fisher Yu'
__copyright__ = 'Copyright (c) 2016, Fisher Yu'
__email__ = 'i@yf.io'
__license__ = 'MIT'


def make_image_label_data(image_list_path, label_list_path, batch_size,
                          mirror, crop_size, mean_pixel,
                          label_stride=8, margin=186):
    label_dim = (crop_size - margin * 2) // label_stride
    data, label = L.ImageLabelData(
        transform_param=dict(mirror=mirror, mean_value=mean_pixel,
                             crop_size=crop_size),
        image_label_data_param=dict(
            image_list_path=image_list_path, label_list_path=label_list_path,
            shuffle=True, batch_size=batch_size,
            padding=P.ImageLabelData.REFLECT,
            label_slice=dict(dim=[label_dim, label_dim],
                             stride=[label_stride, label_stride],
                             offset=[margin, margin])),
        ntop=2)
    return data, label


def make_bin_label_data(bin_list_path, label_list_path, batch_size,
                        label_shape, label_stride):
    data, label = L.BinLabelData(
        bin_label_data_param=dict(
            bin_list_path=bin_list_path, label_list_path=label_list_path,
            shuffle=True, batch_size=batch_size,
            label_slice=dict(stride=[label_stride, label_stride],
                             dim=label_shape)),
        ntop=2)
    return data, label


def make_input_data(input_size, channels=3):
    return L.Input(input_param=dict(shape=dict(
        dim=[1, channels, input_size[0], input_size[1]])))


def make_softmax_loss(bottom, label):
    return L.SoftmaxWithLoss(bottom, label,
                             loss_param=dict(ignore_label=255,
                                             normalization=P.Loss.VALID))


def make_accuracy(bottom, label):
    return L.Accuracy(bottom, label, accuracy_param=dict(ignore_label=255))


def make_prob(bottom):
    return L.Softmax(bottom)


def make_upsample(bottom, num_classes):
    return L.Deconvolution(
        bottom,
        param=[dict(lr_mult=0, decay_mult=0)],
        convolution_param=dict(
            bias_term=False, num_output=num_classes, kernel_size=16, stride=8,
            group=num_classes, pad=4, weight_filler=dict(type="bilinear")))


def build_frontend_vgg(net, bottom, num_classes):
    prev_layer = bottom
    num_convolutions = [2, 2, 3, 3, 3]
    dilations = [0, 0, 0, 0, 2, 4]
    for l in range(5):
        num_outputs = min(64 * 2 ** l, 512)
        for i in range(0, num_convolutions[l]):
            conv_name = 'conv{0}_{1}'.format(l+1, i+1)
            relu_name = 'relu{0}_{1}'.format(l+1, i+1)
            if dilations[l] == 0:
                setattr(net, conv_name,
                        L.Convolution(
                            prev_layer,
                            param=[dict(lr_mult=1, decay_mult=1),
                                   dict(lr_mult=2, decay_mult=0)],
                            convolution_param=dict(num_output=num_outputs,
                                                   kernel_size=3)))
            else:
                setattr(net, conv_name,
                        L.Convolution(
                            prev_layer,
                            param=[dict(lr_mult=1, decay_mult=1),
                                   dict(lr_mult=2, decay_mult=0)],
                            convolution_param=dict(num_output=num_outputs,
                                                   kernel_size=3,
                                                   dilation=dilations[l])))
            setattr(net, relu_name,
                    L.ReLU(getattr(net, conv_name), in_place=True))
            prev_layer = getattr(net, relu_name)
        if dilations[l+1] == 0:
            pool_name = 'pool{0}'.format(l+1)
            setattr(net, pool_name, L.Pooling(
                prev_layer, pool=P.Pooling.MAX, kernel_size=2, stride=2))
            prev_layer = getattr(net, pool_name)

    net.fc6 = L.Convolution(
        prev_layer,
        param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
        convolution_param=dict(num_output=4096, kernel_size=7,
                               dilation=dilations[5]))
    net.relu6 = L.ReLU(net.fc6, in_place=True)
    net.drop6 = L.Dropout(net.relu6, in_place=True, dropout_ratio=0.5)
    net.fc7 = L.Convolution(
        net.drop6,
        param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
        convolution_param=dict(num_output=4096, kernel_size=1))
    net.relu7 = L.ReLU(net.fc7, in_place=True)
    net.drop7 = L.Dropout(net.relu7, in_place=True, dropout_ratio=0.5)
    net.final = L.Convolution(
        net.drop7,
        param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
        convolution_param=dict(
            num_output=num_classes, kernel_size=1,
            weight_filler=dict(type='gaussian', std=0.001),
            bias_filler=dict(type='constant', value=0)))
    return net.final, 'final'


def build_context(net, bottom, num_classes, layers=8):
    prev_layer = bottom
    multiplier = 1
    for i in range(1, 3):
        conv_name = 'ctx_conv1_{}'.format(i)
        relu_name = 'ctx_relu1_{}'.format(i)
        setattr(net, conv_name,
                L.Convolution(
                    *([] if prev_layer is None else [prev_layer]),
                    param=[dict(lr_mult=1, decay_mult=1),
                           dict(lr_mult=2, decay_mult=0)],
                    convolution_param=dict(
                        num_output=num_classes * multiplier, kernel_size=3,
                        pad=1,
                        weight_filler=dict(type='identity',
                                           num_groups=num_classes, std=0.01),
                        bias_filler=dict(type='constant', value=0))))
        setattr(net, relu_name,
                L.ReLU(getattr(net, conv_name), in_place=True))
        prev_layer = getattr(net, relu_name)

    for i in range(2, layers - 2):
        dilation = 2 ** (i - 1)
        multiplier = 1
        conv_name = 'ctx_conv{}_1'.format(i)
        relu_name = 'ctx_relu{}_1'.format(i)
        setattr(net, conv_name,
                L.Convolution(
                    prev_layer,
                    param=[dict(lr_mult=1, decay_mult=1),
                           dict(lr_mult=2, decay_mult=0)],
                    convolution_param=dict(
                        num_output=num_classes * multiplier, kernel_size=3,
                        dilation=dilation, pad=dilation,
                        weight_filler=dict(type='identity',
                                           num_groups=num_classes,
                                           std=0.01 / multiplier),
                        bias_filler=dict(type='constant', value=0))))
        setattr(net, relu_name,
                L.ReLU(getattr(net, conv_name), in_place=True))
        prev_layer = getattr(net, relu_name)

    net.ctx_fc1 = L.Convolution(
        prev_layer,
        param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
        convolution_param=dict(
            num_output=num_classes * multiplier, kernel_size=3, pad=1,
            weight_filler=dict(type='identity',
                               num_groups=num_classes,
                               std=0.01 / multiplier),
            bias_filler=dict(type='constant', value=0)))
    net.ctx_fc1_relu = L.ReLU(net.ctx_fc1, in_place=True)
    net.ctx_final = L.Convolution(
        net.ctx_fc1_relu,
        param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],
        convolution_param=dict(
            num_output=num_classes, kernel_size=1,
            weight_filler=dict(type='identity',
                               num_groups=num_classes,
                               std=0.01 / multiplier),
            bias_filler=dict(type='constant', value=0)))
    return net.ctx_final, 'ctx_final'

Github: https://github.com/fyu/dilation/blob/master/network.py

This repository contains multiple implementations of DilatedNet with different languages

Github: https://github.com/mrgloom/awesome-semantic-segmentation

segmentationdilatednet
How helpful was this page?