-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2 from braincreators/oct-resnet152
Benchmark code
- Loading branch information
Showing
15 changed files
with
1,244 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
# Image Classification Benchmarks | ||
|
||
## Setup | ||
|
||
- Install `octconv`: | ||
|
||
- (Option 1) From pip: | ||
|
||
pip install octconv | ||
|
||
- (Option 2) Locally: | ||
|
||
pip install -e .. | ||
|
||
- Install remaining requirements | ||
|
||
pip install -r requirements.txt | ||
|
||
|
||
## Training | ||
|
||
### Single GPU | ||
|
||
python train.py -c configs/cifar10/oct-resnet20.yml --device cuda:0 | ||
|
||
### Multi-GPU | ||
|
||
NGPUS=4; python -m torch.distributed.launch --nproc_per_node ${NGPUS} train.py -c configs/cifar10/oct-resnet20.yml |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
import time | ||
|
||
import torch | ||
import torch.nn as nn | ||
from torchvision.models.resnet import resnet50 | ||
|
||
from benchmarks.models.resnets import oct_resnet50 | ||
from octconv import OctConv2d | ||
|
||
|
||
@torch.no_grad() | ||
def benchmark_conv(): | ||
x = torch.rand(1, 3, 224, 224) | ||
|
||
conv1 = nn.Conv2d(3, 64, 3) | ||
conv2 = OctConv2d(3, 64, 3, alpha=(0., 0.5)) | ||
|
||
if torch.cuda.is_available(): | ||
x = x.cuda() | ||
conv1 = conv1.cuda() | ||
conv2 = conv2.cuda() | ||
|
||
t0 = time.time() | ||
conv1(x) | ||
t1 = time.time() | ||
conv2(x) | ||
t2 = time.time() | ||
|
||
conv_time = t1 - t0 | ||
octconv_time = t2 - t1 | ||
|
||
print("Conv2D:", conv_time) | ||
print("OctConv2D:", octconv_time) | ||
print("ratio:", conv_time / octconv_time * 100) | ||
|
||
|
||
@torch.no_grad() | ||
def benchmark_resnet50(): | ||
x = torch.rand(1, 3, 224, 224) | ||
|
||
model1 = resnet50() | ||
model2 = oct_resnet50() | ||
|
||
if torch.cuda.is_available(): | ||
x = x.cuda() | ||
model1 = model1.cuda() | ||
model2 = model2.cuda() | ||
|
||
t0 = time.time() | ||
model1(x) | ||
t1 = time.time() | ||
model2(x) | ||
t2 = time.time() | ||
|
||
conv_time = t1 - t0 | ||
octconv_time = t2 - t1 | ||
|
||
print("ResNet50:", conv_time) | ||
print("OctResNet50:", octconv_time) | ||
print("ratio:", conv_time / octconv_time * 100) | ||
|
||
|
||
if __name__ == '__main__': | ||
benchmark_conv() | ||
print("*" * 30) | ||
benchmark_resnet50() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
root: ./data | ||
dataset: cifar10 | ||
download: True | ||
arch: resnet20_small | ||
alpha: 0.5 | ||
workers: 4 | ||
epochs: 90 | ||
batch-size: 128 | ||
lr: 0.1 | ||
lr-steps: [40, 80] | ||
lr-warmup-epochs: 5 | ||
momentum: 0.9 | ||
weight-decay: 1e-4 | ||
print-freq: 10 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
root: ./data | ||
dataset: imagenet | ||
download: False | ||
arch: oct_resnet50 | ||
alpha: 0.5 | ||
workers: 4 | ||
epochs: 120 | ||
batch-size: 256 | ||
lr: 0.1 | ||
lr-steps: [40, 80] | ||
lr-warmup-epochs: 5 | ||
momentum: 0.9 | ||
weight-decay: 1e-4 | ||
print-freq: 10 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
root: ./data | ||
dataset: imagenet | ||
download: False | ||
arch: resnet50 | ||
alpha: 0.5 | ||
workers: 4 | ||
epochs: 120 | ||
batch-size: 256 | ||
lr: 0.1 | ||
lr-steps: [40, 80] | ||
lr-warmup-epochs: 5 | ||
momentum: 0.9 | ||
weight-decay: 1e-4 | ||
print-freq: 10 |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
import torch.nn as nn | ||
from octconv import OctConv2d | ||
|
||
|
||
class OctConvBn(nn.Module): | ||
def __init__(self, in_channels, out_channels, kernel_size, alpha=0.5, stride=1, padding=0, | ||
bias=False, norm_layer=None): | ||
|
||
super(OctConvBn, self).__init__() | ||
|
||
if norm_layer is None: | ||
norm_layer = nn.BatchNorm2d | ||
|
||
self.conv = OctConv2d(in_channels, out_channels, kernel_size=kernel_size, | ||
alpha=alpha, stride=stride, padding=padding, bias=bias) | ||
|
||
alpha_out = self.conv.alpha_out | ||
|
||
self.bn_h = None if alpha_out == 1 else norm_layer(self.conv.out_channels['high']) | ||
self.bn_l = None if alpha_out == 0 else norm_layer(self.conv.out_channels['low']) | ||
|
||
def forward(self, x): | ||
out = self.conv(x) | ||
|
||
x_h, x_l = out if isinstance(out, tuple) else (out, None) | ||
|
||
x_h = self.bn_h(x_h) | ||
x_l = self.bn_l(x_l) if x_l is not None else None | ||
|
||
return x_h, x_l | ||
|
||
|
||
class OctConvBnAct(nn.Module): | ||
def __init__(self, in_channels, out_channels, kernel_size, alpha=0.5, stride=1, padding=0, | ||
bias=False, norm_layer=None, activation_layer=None): | ||
|
||
super(OctConvBnAct, self).__init__() | ||
|
||
if norm_layer is None: | ||
norm_layer = nn.BatchNorm2d | ||
|
||
if activation_layer is None: | ||
activation_layer = nn.ReLU(inplace=True) | ||
|
||
self.conv = OctConv2d(in_channels, out_channels, kernel_size=kernel_size, | ||
alpha=alpha, stride=stride, padding=padding, bias=bias) | ||
|
||
alpha_out = self.conv.alpha_out | ||
|
||
self.bn_h = None if alpha_out == 1 else norm_layer(self.conv.out_channels['high']) | ||
self.bn_l = None if alpha_out == 0 else norm_layer(self.conv.out_channels['low']) | ||
|
||
self.act = activation_layer | ||
|
||
def forward(self, x): | ||
out = self.conv(x) | ||
|
||
x_h, x_l = out if isinstance(out, tuple) else (out, None) | ||
|
||
x_h = self.act(self.bn_h(x_h)) | ||
x_l = self.act(self.bn_l(x_l)) if x_l is not None else None | ||
|
||
return x_h, x_l | ||
|
||
|
||
if __name__ == '__main__': | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,157 @@ | ||
import torch.nn as nn | ||
|
||
from benchmarks.models.layers import OctConvBn, OctConvBnAct | ||
|
||
|
||
class Bottleneck(nn.Module): | ||
expansion = 4 | ||
|
||
def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, | ||
base_width=64, alpha=0.5, norm_layer=None, | ||
first_block=False, last_block=False): | ||
|
||
super(Bottleneck, self).__init__() | ||
|
||
assert not (first_block and last_block), "mutually exclusive options" | ||
|
||
if norm_layer is None: | ||
norm_layer = nn.BatchNorm2d | ||
|
||
width = int(planes * (base_width / 64.)) * groups | ||
|
||
# Both self.conv2 and self.downsample layers downsample the input when stride != 1 | ||
self.conv1 = OctConvBnAct(inplanes, width, kernel_size=1, norm_layer=norm_layer, | ||
alpha=alpha if not first_block else (0., alpha)) | ||
self.conv2 = OctConvBnAct(width, width, kernel_size=3, stride=stride, padding=1, | ||
norm_layer=norm_layer, alpha=alpha) | ||
self.conv3 = OctConvBn(width, planes * self.expansion, kernel_size=1, norm_layer=norm_layer, | ||
alpha=alpha if not last_block else (alpha, 0.)) | ||
|
||
self.relu = nn.ReLU(inplace=True) | ||
|
||
self.downsample = downsample | ||
self.stride = stride | ||
|
||
def forward(self, x): | ||
identity_h = x[0] if type(x) is tuple else x | ||
identity_l = x[1] if type(x) is tuple else None | ||
|
||
x_h, x_l = self.conv1(x) | ||
x_h, x_l = self.conv2((x_h, x_l)) | ||
out = self.conv3((x_h, x_l)) | ||
|
||
x_h, x_l = out if isinstance(out, tuple) else (out, None) | ||
|
||
if self.downsample is not None: | ||
identity = self.downsample(x) | ||
identity_h, identity_l = identity if isinstance(identity, tuple) else (identity, None) | ||
|
||
x_h += identity_h | ||
x_l = x_l + identity_l if identity_l is not None else None | ||
|
||
x_h = self.relu(x_h) | ||
x_l = self.relu(x_l) if x_l is not None else None | ||
|
||
return x_h, x_l | ||
|
||
|
||
class OctResNet(nn.Module): | ||
|
||
def __init__(self, block, layers, num_classes=1000, groups=1, width_per_group=64, norm_layer=None, alpha=0.5): | ||
super(OctResNet, self).__init__() | ||
if norm_layer is None: | ||
norm_layer = nn.BatchNorm2d | ||
|
||
self.alpha = alpha | ||
self.inplanes = 64 | ||
self.groups = groups | ||
self.base_width = width_per_group | ||
self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, | ||
bias=False) | ||
self.bn1 = norm_layer(self.inplanes) | ||
self.relu = nn.ReLU(inplace=True) | ||
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) | ||
self.layer1 = self._make_layer(block, 64, layers[0], norm_layer=norm_layer, first_layer=True) | ||
self.layer2 = self._make_layer(block, 128, layers[1], stride=2, norm_layer=norm_layer) | ||
self.layer3 = self._make_layer(block, 256, layers[2], stride=2, norm_layer=norm_layer) | ||
self.layer4 = self._make_layer(block, 512, layers[3], stride=2, norm_layer=norm_layer, last_layer=True) | ||
self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) | ||
self.fc = nn.Linear(512 * block.expansion, num_classes) | ||
|
||
for m in self.modules(): | ||
if isinstance(m, nn.Conv2d): | ||
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') | ||
elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): | ||
nn.init.constant_(m.weight, 1) | ||
nn.init.constant_(m.bias, 0) | ||
|
||
def _make_layer(self, block, planes, blocks, stride=1, norm_layer=None, first_layer=False, last_layer=False): | ||
|
||
assert not (first_layer and last_layer), "mutually exclusive options" | ||
|
||
if norm_layer is None: | ||
norm_layer = nn.BatchNorm2d | ||
downsample = None | ||
if stride != 1 or self.inplanes != planes * block.expansion: | ||
if last_layer: | ||
downsample = nn.Sequential( | ||
OctConvBn(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, | ||
alpha=(self.alpha, 0.)) | ||
) | ||
else: | ||
downsample = nn.Sequential( | ||
OctConvBn(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, | ||
alpha=self.alpha if not first_layer else (0., self.alpha)) | ||
) | ||
|
||
layers = [] | ||
layers.append(block(self.inplanes, planes, stride=stride, downsample=downsample, | ||
groups=self.groups, base_width=self.base_width, | ||
alpha=self.alpha, norm_layer=norm_layer, | ||
first_block=first_layer, last_block=last_layer)) | ||
|
||
self.inplanes = planes * block.expansion | ||
for i in range(1, blocks): | ||
layers.append(block(self.inplanes, planes, groups=self.groups, | ||
base_width=self.base_width, norm_layer=norm_layer, | ||
alpha=self.alpha if not last_layer else 0., | ||
last_block=last_layer)) | ||
|
||
return nn.Sequential(*layers) | ||
|
||
def forward(self, x): | ||
x = self.conv1(x) | ||
x = self.bn1(x) | ||
x = self.relu(x) | ||
x = self.maxpool(x) | ||
|
||
x_h, x_l = self.layer1(x) | ||
x_h, x_l = self.layer2((x_h, x_l)) | ||
x_h, x_l = self.layer3((x_h, x_l)) | ||
x_h, x_l = self.layer4((x_h, x_l)) | ||
|
||
x = self.avgpool(x_h) | ||
x = x.view(x.size(0), -1) | ||
x = self.fc(x) | ||
|
||
return x | ||
|
||
|
||
def _oct_resnet(inplanes, planes, **kwargs): | ||
model = OctResNet(inplanes, planes, **kwargs) | ||
return model | ||
|
||
|
||
def oct_resnet50(**kwargs): | ||
"""Constructs a OctResNet-50 model.""" | ||
return _oct_resnet(Bottleneck, [3, 4, 6, 3], **kwargs) | ||
|
||
|
||
def oct_resnet101(**kwargs): | ||
"""Constructs a OctResNet-101 model.""" | ||
return _oct_resnet(Bottleneck, [3, 4, 23, 3], **kwargs) | ||
|
||
|
||
def oct_resnet152(**kwargs): | ||
"""Constructs a OctResNet-152 model.""" | ||
return _oct_resnet(Bottleneck, [3, 8, 36, 3], **kwargs) |
Oops, something went wrong.