Merge pull request #2 from braincreators/oct-resnet152

Benchmark code
braincreators · Jun 17, 2019 · 733b73c · 733b73c
2 parents 0f23a4c + aea4634
commit 733b73c
Show file tree

Hide file tree

Showing 15 changed files with 1,244 additions and 2 deletions.
diff --git a/benchmarks/README.md b/benchmarks/README.md
@@ -0,0 +1,28 @@
+# Image Classification Benchmarks
+
+## Setup
+
+- Install `octconv`:
+
+   - (Option 1) From pip:
+
+         pip install octconv
+
+   - (Option 2) Locally: 
+
+         pip install -e ..   
+
+- Install remaining requirements
+
+        pip install -r requirements.txt
+
+
+## Training
+
+### Single GPU
+
+    python train.py -c configs/cifar10/oct-resnet20.yml --device cuda:0
+
+### Multi-GPU
+
+    NGPUS=4; python -m torch.distributed.launch --nproc_per_node ${NGPUS} train.py -c configs/cifar10/oct-resnet20.yml
diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py
diff --git a/benchmarks/benchmark.py b/benchmarks/benchmark.py
@@ -0,0 +1,66 @@
+import time
+
+import torch
+import torch.nn as nn
+from torchvision.models.resnet import resnet50
+
+from benchmarks.models.resnets import oct_resnet50
+from octconv import OctConv2d
+
+
+@torch.no_grad()
+def benchmark_conv():
+    x = torch.rand(1, 3, 224, 224)
+
+    conv1 = nn.Conv2d(3, 64, 3)
+    conv2 = OctConv2d(3, 64, 3, alpha=(0., 0.5))
+
+    if torch.cuda.is_available():
+        x = x.cuda()
+        conv1 = conv1.cuda()
+        conv2 = conv2.cuda()
+
+    t0 = time.time()
+    conv1(x)
+    t1 = time.time()
+    conv2(x)
+    t2 = time.time()
+
+    conv_time = t1 - t0
+    octconv_time = t2 - t1
+
+    print("Conv2D:", conv_time)
+    print("OctConv2D:", octconv_time)
+    print("ratio:", conv_time / octconv_time * 100)
+
+
+@torch.no_grad()
+def benchmark_resnet50():
+    x = torch.rand(1, 3, 224, 224)
+
+    model1 = resnet50()
+    model2 = oct_resnet50()
+
+    if torch.cuda.is_available():
+        x = x.cuda()
+        model1 = model1.cuda()
+        model2 = model2.cuda()
+
+    t0 = time.time()
+    model1(x)
+    t1 = time.time()
+    model2(x)
+    t2 = time.time()
+
+    conv_time = t1 - t0
+    octconv_time = t2 - t1
+
+    print("ResNet50:", conv_time)
+    print("OctResNet50:", octconv_time)
+    print("ratio:", conv_time / octconv_time * 100)
+
+
+if __name__ == '__main__':
+    benchmark_conv()
+    print("*" * 30)
+    benchmark_resnet50()
diff --git a/benchmarks/configs/cifar10/resnet20_small.yml b/benchmarks/configs/cifar10/resnet20_small.yml
@@ -0,0 +1,14 @@
+root: ./data
+dataset: cifar10
+download: True
+arch: resnet20_small
+alpha: 0.5
+workers: 4
+epochs: 90
+batch-size: 128
+lr: 0.1
+lr-steps: [40, 80]
+lr-warmup-epochs: 5
+momentum: 0.9
+weight-decay: 1e-4
+print-freq: 10
diff --git a/benchmarks/configs/imagenet/oct-resnet50.yml b/benchmarks/configs/imagenet/oct-resnet50.yml
@@ -0,0 +1,14 @@
+root: ./data
+dataset: imagenet
+download: False
+arch: oct_resnet50
+alpha: 0.5
+workers: 4
+epochs: 120
+batch-size: 256
+lr: 0.1
+lr-steps: [40, 80]
+lr-warmup-epochs: 5
+momentum: 0.9
+weight-decay: 1e-4
+print-freq: 10
diff --git a/benchmarks/configs/imagenet/resnet50.yml b/benchmarks/configs/imagenet/resnet50.yml
@@ -0,0 +1,14 @@
+root: ./data
+dataset: imagenet
+download: False
+arch: resnet50
+alpha: 0.5
+workers: 4
+epochs: 120
+batch-size: 256
+lr: 0.1
+lr-steps: [40, 80]
+lr-warmup-epochs: 5
+momentum: 0.9
+weight-decay: 1e-4
+print-freq: 10
diff --git a/benchmarks/models/__init__.py b/benchmarks/models/__init__.py
diff --git a/benchmarks/models/layers.py b/benchmarks/models/layers.py
@@ -0,0 +1,67 @@
+import torch.nn as nn
+from octconv import OctConv2d
+
+
+class OctConvBn(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size, alpha=0.5, stride=1, padding=0,
+                 bias=False, norm_layer=None):
+
+        super(OctConvBn, self).__init__()
+
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+
+        self.conv = OctConv2d(in_channels, out_channels, kernel_size=kernel_size,
+                              alpha=alpha, stride=stride, padding=padding, bias=bias)
+
+        alpha_out = self.conv.alpha_out
+
+        self.bn_h = None if alpha_out == 1 else norm_layer(self.conv.out_channels['high'])
+        self.bn_l = None if alpha_out == 0 else norm_layer(self.conv.out_channels['low'])
+
+    def forward(self, x):
+        out = self.conv(x)
+
+        x_h, x_l = out if isinstance(out, tuple) else (out, None)
+
+        x_h = self.bn_h(x_h)
+        x_l = self.bn_l(x_l) if x_l is not None else None
+
+        return x_h, x_l
+
+
+class OctConvBnAct(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size, alpha=0.5, stride=1, padding=0,
+                 bias=False, norm_layer=None, activation_layer=None):
+
+        super(OctConvBnAct, self).__init__()
+
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+
+        if activation_layer is None:
+            activation_layer = nn.ReLU(inplace=True)
+
+        self.conv = OctConv2d(in_channels, out_channels, kernel_size=kernel_size,
+                              alpha=alpha, stride=stride, padding=padding, bias=bias)
+
+        alpha_out = self.conv.alpha_out
+
+        self.bn_h = None if alpha_out == 1 else norm_layer(self.conv.out_channels['high'])
+        self.bn_l = None if alpha_out == 0 else norm_layer(self.conv.out_channels['low'])
+
+        self.act = activation_layer
+
+    def forward(self, x):
+        out = self.conv(x)
+
+        x_h, x_l = out if isinstance(out, tuple) else (out, None)
+
+        x_h = self.act(self.bn_h(x_h))
+        x_l = self.act(self.bn_l(x_l)) if x_l is not None else None
+
+        return x_h, x_l
+
+
+if __name__ == '__main__':
+    pass
diff --git a/benchmarks/models/resnets.py b/benchmarks/models/resnets.py
@@ -0,0 +1,157 @@
+import torch.nn as nn
+
+from benchmarks.models.layers import OctConvBn, OctConvBnAct
+
+
+class Bottleneck(nn.Module):
+    expansion = 4
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
+                 base_width=64, alpha=0.5, norm_layer=None,
+                 first_block=False, last_block=False):
+
+        super(Bottleneck, self).__init__()
+
+        assert not (first_block and last_block), "mutually exclusive options"
+
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+
+        width = int(planes * (base_width / 64.)) * groups
+
+        # Both self.conv2 and self.downsample layers downsample the input when stride != 1
+        self.conv1 = OctConvBnAct(inplanes, width, kernel_size=1, norm_layer=norm_layer,
+                                  alpha=alpha if not first_block else (0., alpha))
+        self.conv2 = OctConvBnAct(width, width, kernel_size=3, stride=stride, padding=1,
+                                  norm_layer=norm_layer, alpha=alpha)
+        self.conv3 = OctConvBn(width, planes * self.expansion, kernel_size=1, norm_layer=norm_layer,
+                               alpha=alpha if not last_block else (alpha, 0.))
+
+        self.relu = nn.ReLU(inplace=True)
+
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        identity_h = x[0] if type(x) is tuple else x
+        identity_l = x[1] if type(x) is tuple else None
+
+        x_h, x_l = self.conv1(x)
+        x_h, x_l = self.conv2((x_h, x_l))
+        out = self.conv3((x_h, x_l))
+
+        x_h, x_l = out if isinstance(out, tuple) else (out, None)
+
+        if self.downsample is not None:
+            identity = self.downsample(x)
+            identity_h, identity_l = identity if isinstance(identity, tuple) else (identity, None)
+
+        x_h += identity_h
+        x_l = x_l + identity_l if identity_l is not None else None
+
+        x_h = self.relu(x_h)
+        x_l = self.relu(x_l) if x_l is not None else None
+
+        return x_h, x_l
+
+
+class OctResNet(nn.Module):
+
+    def __init__(self, block, layers, num_classes=1000, groups=1, width_per_group=64, norm_layer=None, alpha=0.5):
+        super(OctResNet, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+
+        self.alpha = alpha
+        self.inplanes = 64
+        self.groups = groups
+        self.base_width = width_per_group
+        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
+                               bias=False)
+        self.bn1 = norm_layer(self.inplanes)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = self._make_layer(block, 64, layers[0], norm_layer=norm_layer, first_layer=True)
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2, norm_layer=norm_layer)
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2, norm_layer=norm_layer)
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=2, norm_layer=norm_layer, last_layer=True)
+        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+        self.fc = nn.Linear(512 * block.expansion, num_classes)
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+
+    def _make_layer(self, block, planes, blocks, stride=1, norm_layer=None, first_layer=False, last_layer=False):
+
+        assert not (first_layer and last_layer), "mutually exclusive options"
+
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            if last_layer:
+                downsample = nn.Sequential(
+                    OctConvBn(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride,
+                              alpha=(self.alpha, 0.))
+                )
+            else:
+                downsample = nn.Sequential(
+                    OctConvBn(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride,
+                              alpha=self.alpha if not first_layer else (0., self.alpha))
+                )
+
+        layers = []
+        layers.append(block(self.inplanes, planes, stride=stride, downsample=downsample,
+                            groups=self.groups, base_width=self.base_width,
+                            alpha=self.alpha, norm_layer=norm_layer,
+                            first_block=first_layer, last_block=last_layer))
+
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes, groups=self.groups,
+                                base_width=self.base_width, norm_layer=norm_layer,
+                                alpha=self.alpha if not last_layer else 0.,
+                                last_block=last_layer))
+
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+
+        x_h, x_l = self.layer1(x)
+        x_h, x_l = self.layer2((x_h, x_l))
+        x_h, x_l = self.layer3((x_h, x_l))
+        x_h, x_l = self.layer4((x_h, x_l))
+
+        x = self.avgpool(x_h)
+        x = x.view(x.size(0), -1)
+        x = self.fc(x)
+
+        return x
+
+
+def _oct_resnet(inplanes, planes, **kwargs):
+    model = OctResNet(inplanes, planes, **kwargs)
+    return model
+
+
+def oct_resnet50(**kwargs):
+    """Constructs a OctResNet-50 model."""
+    return _oct_resnet(Bottleneck, [3, 4, 6, 3], **kwargs)
+
+
+def oct_resnet101(**kwargs):
+    """Constructs a OctResNet-101 model."""
+    return _oct_resnet(Bottleneck, [3, 4, 23, 3], **kwargs)
+
+
+def oct_resnet152(**kwargs):
+    """Constructs a OctResNet-152 model."""
+    return _oct_resnet(Bottleneck, [3, 8, 36, 3], **kwargs)