Skip to content

Commit

Permalink
Merge pull request #2 from braincreators/oct-resnet152
Browse files Browse the repository at this point in the history
Benchmark code
  • Loading branch information
Miguel Varela Ramos authored Jun 17, 2019
2 parents 0f23a4c + aea4634 commit 733b73c
Show file tree
Hide file tree
Showing 15 changed files with 1,244 additions and 2 deletions.
28 changes: 28 additions & 0 deletions benchmarks/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Image Classification Benchmarks

## Setup

- Install `octconv`:

- (Option 1) From pip:

pip install octconv

- (Option 2) Locally:

pip install -e ..

- Install remaining requirements

pip install -r requirements.txt


## Training

### Single GPU

python train.py -c configs/cifar10/oct-resnet20.yml --device cuda:0

### Multi-GPU

NGPUS=4; python -m torch.distributed.launch --nproc_per_node ${NGPUS} train.py -c configs/cifar10/oct-resnet20.yml
Empty file added benchmarks/__init__.py
Empty file.
66 changes: 66 additions & 0 deletions benchmarks/benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import time

import torch
import torch.nn as nn
from torchvision.models.resnet import resnet50

from benchmarks.models.resnets import oct_resnet50
from octconv import OctConv2d


@torch.no_grad()
def benchmark_conv():
x = torch.rand(1, 3, 224, 224)

conv1 = nn.Conv2d(3, 64, 3)
conv2 = OctConv2d(3, 64, 3, alpha=(0., 0.5))

if torch.cuda.is_available():
x = x.cuda()
conv1 = conv1.cuda()
conv2 = conv2.cuda()

t0 = time.time()
conv1(x)
t1 = time.time()
conv2(x)
t2 = time.time()

conv_time = t1 - t0
octconv_time = t2 - t1

print("Conv2D:", conv_time)
print("OctConv2D:", octconv_time)
print("ratio:", conv_time / octconv_time * 100)


@torch.no_grad()
def benchmark_resnet50():
x = torch.rand(1, 3, 224, 224)

model1 = resnet50()
model2 = oct_resnet50()

if torch.cuda.is_available():
x = x.cuda()
model1 = model1.cuda()
model2 = model2.cuda()

t0 = time.time()
model1(x)
t1 = time.time()
model2(x)
t2 = time.time()

conv_time = t1 - t0
octconv_time = t2 - t1

print("ResNet50:", conv_time)
print("OctResNet50:", octconv_time)
print("ratio:", conv_time / octconv_time * 100)


if __name__ == '__main__':
benchmark_conv()
print("*" * 30)
benchmark_resnet50()
14 changes: 14 additions & 0 deletions benchmarks/configs/cifar10/resnet20_small.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
root: ./data
dataset: cifar10
download: True
arch: resnet20_small
alpha: 0.5
workers: 4
epochs: 90
batch-size: 128
lr: 0.1
lr-steps: [40, 80]
lr-warmup-epochs: 5
momentum: 0.9
weight-decay: 1e-4
print-freq: 10
14 changes: 14 additions & 0 deletions benchmarks/configs/imagenet/oct-resnet50.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
root: ./data
dataset: imagenet
download: False
arch: oct_resnet50
alpha: 0.5
workers: 4
epochs: 120
batch-size: 256
lr: 0.1
lr-steps: [40, 80]
lr-warmup-epochs: 5
momentum: 0.9
weight-decay: 1e-4
print-freq: 10
14 changes: 14 additions & 0 deletions benchmarks/configs/imagenet/resnet50.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
root: ./data
dataset: imagenet
download: False
arch: resnet50
alpha: 0.5
workers: 4
epochs: 120
batch-size: 256
lr: 0.1
lr-steps: [40, 80]
lr-warmup-epochs: 5
momentum: 0.9
weight-decay: 1e-4
print-freq: 10
Empty file added benchmarks/models/__init__.py
Empty file.
67 changes: 67 additions & 0 deletions benchmarks/models/layers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import torch.nn as nn
from octconv import OctConv2d


class OctConvBn(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size, alpha=0.5, stride=1, padding=0,
bias=False, norm_layer=None):

super(OctConvBn, self).__init__()

if norm_layer is None:
norm_layer = nn.BatchNorm2d

self.conv = OctConv2d(in_channels, out_channels, kernel_size=kernel_size,
alpha=alpha, stride=stride, padding=padding, bias=bias)

alpha_out = self.conv.alpha_out

self.bn_h = None if alpha_out == 1 else norm_layer(self.conv.out_channels['high'])
self.bn_l = None if alpha_out == 0 else norm_layer(self.conv.out_channels['low'])

def forward(self, x):
out = self.conv(x)

x_h, x_l = out if isinstance(out, tuple) else (out, None)

x_h = self.bn_h(x_h)
x_l = self.bn_l(x_l) if x_l is not None else None

return x_h, x_l


class OctConvBnAct(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size, alpha=0.5, stride=1, padding=0,
bias=False, norm_layer=None, activation_layer=None):

super(OctConvBnAct, self).__init__()

if norm_layer is None:
norm_layer = nn.BatchNorm2d

if activation_layer is None:
activation_layer = nn.ReLU(inplace=True)

self.conv = OctConv2d(in_channels, out_channels, kernel_size=kernel_size,
alpha=alpha, stride=stride, padding=padding, bias=bias)

alpha_out = self.conv.alpha_out

self.bn_h = None if alpha_out == 1 else norm_layer(self.conv.out_channels['high'])
self.bn_l = None if alpha_out == 0 else norm_layer(self.conv.out_channels['low'])

self.act = activation_layer

def forward(self, x):
out = self.conv(x)

x_h, x_l = out if isinstance(out, tuple) else (out, None)

x_h = self.act(self.bn_h(x_h))
x_l = self.act(self.bn_l(x_l)) if x_l is not None else None

return x_h, x_l


if __name__ == '__main__':
pass
157 changes: 157 additions & 0 deletions benchmarks/models/resnets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
import torch.nn as nn

from benchmarks.models.layers import OctConvBn, OctConvBnAct


class Bottleneck(nn.Module):
expansion = 4

def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
base_width=64, alpha=0.5, norm_layer=None,
first_block=False, last_block=False):

super(Bottleneck, self).__init__()

assert not (first_block and last_block), "mutually exclusive options"

if norm_layer is None:
norm_layer = nn.BatchNorm2d

width = int(planes * (base_width / 64.)) * groups

# Both self.conv2 and self.downsample layers downsample the input when stride != 1
self.conv1 = OctConvBnAct(inplanes, width, kernel_size=1, norm_layer=norm_layer,
alpha=alpha if not first_block else (0., alpha))
self.conv2 = OctConvBnAct(width, width, kernel_size=3, stride=stride, padding=1,
norm_layer=norm_layer, alpha=alpha)
self.conv3 = OctConvBn(width, planes * self.expansion, kernel_size=1, norm_layer=norm_layer,
alpha=alpha if not last_block else (alpha, 0.))

self.relu = nn.ReLU(inplace=True)

self.downsample = downsample
self.stride = stride

def forward(self, x):
identity_h = x[0] if type(x) is tuple else x
identity_l = x[1] if type(x) is tuple else None

x_h, x_l = self.conv1(x)
x_h, x_l = self.conv2((x_h, x_l))
out = self.conv3((x_h, x_l))

x_h, x_l = out if isinstance(out, tuple) else (out, None)

if self.downsample is not None:
identity = self.downsample(x)
identity_h, identity_l = identity if isinstance(identity, tuple) else (identity, None)

x_h += identity_h
x_l = x_l + identity_l if identity_l is not None else None

x_h = self.relu(x_h)
x_l = self.relu(x_l) if x_l is not None else None

return x_h, x_l


class OctResNet(nn.Module):

def __init__(self, block, layers, num_classes=1000, groups=1, width_per_group=64, norm_layer=None, alpha=0.5):
super(OctResNet, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d

self.alpha = alpha
self.inplanes = 64
self.groups = groups
self.base_width = width_per_group
self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
bias=False)
self.bn1 = norm_layer(self.inplanes)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0], norm_layer=norm_layer, first_layer=True)
self.layer2 = self._make_layer(block, 128, layers[1], stride=2, norm_layer=norm_layer)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2, norm_layer=norm_layer)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2, norm_layer=norm_layer, last_layer=True)
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(512 * block.expansion, num_classes)

for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)

def _make_layer(self, block, planes, blocks, stride=1, norm_layer=None, first_layer=False, last_layer=False):

assert not (first_layer and last_layer), "mutually exclusive options"

if norm_layer is None:
norm_layer = nn.BatchNorm2d
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
if last_layer:
downsample = nn.Sequential(
OctConvBn(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride,
alpha=(self.alpha, 0.))
)
else:
downsample = nn.Sequential(
OctConvBn(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride,
alpha=self.alpha if not first_layer else (0., self.alpha))
)

layers = []
layers.append(block(self.inplanes, planes, stride=stride, downsample=downsample,
groups=self.groups, base_width=self.base_width,
alpha=self.alpha, norm_layer=norm_layer,
first_block=first_layer, last_block=last_layer))

self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes, groups=self.groups,
base_width=self.base_width, norm_layer=norm_layer,
alpha=self.alpha if not last_layer else 0.,
last_block=last_layer))

return nn.Sequential(*layers)

def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)

x_h, x_l = self.layer1(x)
x_h, x_l = self.layer2((x_h, x_l))
x_h, x_l = self.layer3((x_h, x_l))
x_h, x_l = self.layer4((x_h, x_l))

x = self.avgpool(x_h)
x = x.view(x.size(0), -1)
x = self.fc(x)

return x


def _oct_resnet(inplanes, planes, **kwargs):
model = OctResNet(inplanes, planes, **kwargs)
return model


def oct_resnet50(**kwargs):
"""Constructs a OctResNet-50 model."""
return _oct_resnet(Bottleneck, [3, 4, 6, 3], **kwargs)


def oct_resnet101(**kwargs):
"""Constructs a OctResNet-101 model."""
return _oct_resnet(Bottleneck, [3, 4, 23, 3], **kwargs)


def oct_resnet152(**kwargs):
"""Constructs a OctResNet-152 model."""
return _oct_resnet(Bottleneck, [3, 8, 36, 3], **kwargs)
Loading

0 comments on commit 733b73c

Please sign in to comment.