Skip to content

Commit

Permalink
First commit
Browse files Browse the repository at this point in the history
  • Loading branch information
mitmul authored and Shunta Saito committed Jun 23, 2016
0 parents commit 6e82f38
Show file tree
Hide file tree
Showing 15 changed files with 1,148 additions and 0 deletions.
16 changes: 16 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
.DS_Store
.remote-sync.json
.sync-config.cson

*__pycache__*
*.pyc
*.png
*.model
*.ipynb_checkpoints
*.ipynb
*.c
*.so
*.o
*.jpg

faster_rcnn_models
50 changes: 50 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Faster R-CNN

This is an experimental implementation of Faster R-CNN using Chainer based on Ross Girshick's [py-faster-rcnn codes](https://github.com/rbgirshick/py-faster-rcnn).

## Requirement

- Python 2.7.6+, 3.4.3+, 3.5.1+

- [Chainer](https://github.com/pfnet/chainer) 1.9.1+
- NumPy 1.9, 1.10, 1.11
- Cython 0.23+
- OpenCV 2.9+, 3.1+

## Inference

### 1. Download pre-trained model

```
wget https://www.dropbox.com/s/2fadbs9q50igar8/VGG16_faster_rcnn_final.model?dl=0
mv VGG16_faster_rcnn_final.model?dl=0 VGG16_faster_rcnn_final.model
```

### 2. Build extensions

```
cd lib
python setup.py build_ext -i
```

### 3. Use forward.py

```
wget http://vision.cs.utexas.edu/voc/VOC2007_test/JPEGImages/004545.jpg
python forward.py --img_fn 004545.jpg --gpu 0
```

`--gpu 0` turns on GPU. When you turn off GPU, use `--gpu -1` or remove `--gpu` option.

To use forward.py with CPU, you have to apply this diff due to a known bug in Chainer: <https://github.com/pfnet/chainer/pull/1273>

![](https://raw.githubusercontent.com/wiki/mitmul/chainer-faster-rcnn/images/result.png)

## Training

will be updated soon

## Framework

![](https://raw.githubusercontent.com/wiki/mitmul/chainer-faster-rcnn/images/Faster%20R-CNN.png)
94 changes: 94 additions & 0 deletions forward.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from chainer import serializers
from lib.cpu_nms import cpu_nms as nms
from lib.models.faster_rcnn import FasterRCNN

import argparse
import chainer
import cv2 as cv
import numpy as np

CLASSES = ('__background__',
'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair',
'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant',
'sheep', 'sofa', 'train', 'tvmonitor')
PIXEL_MEANS = np.array([[[102.9801, 115.9465, 122.7717]]])


def get_model():
model = FasterRCNN()
model.train = False
serializers.load_npz('VGG16_faster_rcnn_final.model', model)

return model


def img_preprocessing(orig_img, pixel_means, max_size=1000, scale=600):
img = orig_img.astype(np.float32, copy=True)
img -= pixel_means
im_size_min = np.min(img.shape[0:2])
im_size_max = np.max(img.shape[0:2])
im_scale = float(scale) / float(im_size_min)
if np.round(im_scale * im_size_max) > max_size:
im_scale = float(max_size) / float(im_size_max)
img = cv.resize(img, None, None, fx=im_scale, fy=im_scale,
interpolation=cv.INTER_LINEAR)

return img.transpose([2, 0, 1]).astype(np.float32), im_scale


def draw_result(out, im_scale, clss, bbox, nms_thresh, conf):
CV_AA = 16
for cls_id in range(1, 21):
_cls = clss[:, cls_id][:, np.newaxis]
_bbx = bbox[:, cls_id * 4: (cls_id + 1) * 4]
dets = np.hstack((_bbx, _cls))
keep = nms(dets, nms_thresh)
dets = dets[keep, :]

inds = np.where(dets[:, -1] >= conf)[0]
for i in inds:
x1, y1, x2, y2 = map(int, dets[i, :4])
cv.rectangle(out, (x1, y1), (x2, y2), (0, 0, 255), 2, CV_AA)
ret, baseline = cv.getTextSize(
CLASSES[cls_id], cv.FONT_HERSHEY_SIMPLEX, 0.8, 1)
cv.rectangle(out, (x1, y2 - ret[1] - baseline),
(x1 + ret[0], y2), (0, 0, 255), -1)
cv.putText(out, CLASSES[cls_id], (x1, y2 - baseline),
cv.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 1, CV_AA)

return out


if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--img_fn', type=str, default='sample.jpg')
parser.add_argument('--out_fn', type=str, default='result.jpg')
parser.add_argument('--nms_thresh', type=float, default=0.3)
parser.add_argument('--conf', type=float, default=0.8)
parser.add_argument('--gpu', type=int, default=-1)
args = parser.parse_args()

xp = chainer.cuda.cupy if chainer.cuda.available and args.gpu >= 0 else np
model = get_model()
if chainer.cuda.available and args.gpu >= 0:
model.to_gpu(args.gpu)

orig_image = cv.imread(args.img_fn)
img, im_scale = img_preprocessing(orig_image, PIXEL_MEANS)
img = np.expand_dims(img, axis=0)
img = chainer.Variable(xp.asarray(img, dtype=np.float32), volatile=True)
h, w = img.data.shape[2:]
cls_score, bbox_pred = model(img, np.array([[h, w, im_scale]]))
cls_score = cls_score.data

if args.gpu >= 0:
cls_score = chainer.cuda.cupy.asnumpy(cls_score)
bbox_pred = chainer.cuda.cupy.asnumpy(bbox_pred)
result = draw_result(orig_image, im_scale, cls_score, bbox_pred,
args.nms_thresh, args.conf)
cv.imwrite(args.out_fn, result)
Empty file added lib/__init__.py
Empty file.
Empty file added lib/faster_rcnn/__init__.py
Empty file.
217 changes: 217 additions & 0 deletions lib/faster_rcnn/anchor_target.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

# Mofidied by:
# Copyright (c) 2016 Shunta Saito

# Original work by:
# --------------------------------------------------------
# Faster R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick and Sean Bell
# https://github.com/rbgirshick/py-faster-rcnn
# --------------------------------------------------------

import chainer
import numpy as np
from generate_anchors import generate_anchors
from utils.cython_bbox import bbox_overlaps
from fast_rcnn.bbox_transform import bbox_transform


class AnchorTarget(object):
"""
Args:
feat_stride (int):
"""

RPN_NEGATIVE_OVERLAP = 0.3
RPN_POSITIVE_OVERLAP = 0.7
RPN_CLOBBER_POSITIVES = False
RPN_FG_FRACTION = 0.5
RPN_BATCHSIZE = 256
RPN_BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
RPN_POSITIVE_WEIGHT = -1.0

def __init__(self, feat_stride=16):
self.feat_stride = feat_stride
self.anchors = generate_anchors()
self.n_anchors = self.anchors.shape[0]
self.allowed_border = 0

def __call__(self, x, gt_boxes, im_info):
height, width = x.data.shape[2:]

shift_x = np.arange(0, width) * self.feat_stride
shift_y = np.arange(0, height) * self.feat_stride
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose()

A = self.n_anchors
K = shifts.shape[0]
all_anchors = (self.anchors.reshape((1, A, 4)) +
shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
all_anchors = all_anchors.reshape((K * A, 4))
total_anchors = int(K * A)

# only keep anchors inside the image
inds_inside = np.where(
(all_anchors[:, 0] >= -self.allowed_border) &
(all_anchors[:, 1] >= -self.allowed_border) &
(all_anchors[:, 2] < im_info[1] + self.allowed_border) & # width
(all_anchors[:, 3] < im_info[0] + self.allowed_border) # height
)[0]

# keep only inside anchors
anchors = all_anchors[inds_inside, :]

# label: 1 is positive, 0 is negative, -1 is dont care
labels = np.empty((len(inds_inside), ), dtype=np.float32)
labels.fill(-1)

# overlaps between the anchors and the gt boxes
# overlaps (ex, gt)
overlaps = bbox_overlaps(
np.ascontiguousarray(anchors, dtype=np.float),
np.ascontiguousarray(gt_boxes, dtype=np.float))
argmax_overlaps = overlaps.argmax(axis=1)
max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
gt_argmax_overlaps = overlaps.argmax(axis=0)
gt_max_overlaps = overlaps[gt_argmax_overlaps,
np.arange(overlaps.shape[1])]
gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

if not self.RPN_CLOBBER_POSITIVES:
# assign bg labels first so that positive labels can clobber them
labels[max_overlaps < self.RPN_NEGATIVE_OVERLAP] = 0

# fg label: for each gt, anchor with highest overlap
labels[gt_argmax_overlaps] = 1

# fg label: above threshold IOU
labels[max_overlaps >= self.RPN_POSITIVE_OVERLAP] = 1

if self.RPN_CLOBBER_POSITIVES:
# assign bg labels last so that negative labels can clobber
# positives
labels[max_overlaps < self.RPN_NEGATIVE_OVERLAP] = 0

# subsample positive labels if we have too many
num_fg = int(self.RPN_FG_FRACTION * self.RPN_BATCHSIZE)
fg_inds = np.where(labels == 1)[0]
if len(fg_inds) > num_fg:
disable_inds = np.random.choice(
fg_inds, size=(len(fg_inds) - num_fg), replace=False)
labels[disable_inds] = -1

# subsample negative labels if we have too many
num_bg = self.RPN_BATCHSIZE - np.sum(labels == 1)
bg_inds = np.where(labels == 0)[0]
if len(bg_inds) > num_bg:
disable_inds = np.random.choice(
bg_inds, size=(len(bg_inds) - num_bg), replace=False)
labels[disable_inds] = -1
# print "was %s inds, disabling %s, now %s inds" % (
# len(bg_inds), len(disable_inds), np.sum(labels == 0))

bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])

bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
bbox_inside_weights[labels == 1, :] = np.array(
self.RPN_BBOX_INSIDE_WEIGHTS)

bbox_outside_weights = np.zeros(
(len(inds_inside), 4), dtype=np.float32)
if self.RPN_POSITIVE_WEIGHT < 0:
# uniform weighting of examples (given non-uniform sampling)
num_examples = np.sum(labels >= 0)
positive_weights = np.ones((1, 4)) * 1.0 / num_examples
negative_weights = np.ones((1, 4)) * 1.0 / num_examples
else:
assert ((self.RPN_POSITIVE_WEIGHT > 0) &
(self.RPN_POSITIVE_WEIGHT < 1))
positive_weights = (self.RPN_POSITIVE_WEIGHT /
np.sum(labels == 1))
negative_weights = ((1.0 - self.RPN_POSITIVE_WEIGHT) /
np.sum(labels == 0))
bbox_outside_weights[labels == 1, :] = positive_weights
bbox_outside_weights[labels == 0, :] = negative_weights

# map up to original set of anchors
labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
bbox_inside_weights = _unmap(
bbox_inside_weights, total_anchors, inds_inside, fill=0)
bbox_outside_weights = _unmap(
bbox_outside_weights, total_anchors, inds_inside, fill=0)

# labels
labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)

# bbox_targets
bbox_targets = bbox_targets \
.reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)

# bbox_inside_weights
bbox_inside_weights = bbox_inside_weights \
.reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
assert bbox_inside_weights.shape[2] == height
assert bbox_inside_weights.shape[3] == width

# bbox_outside_weights
bbox_outside_weights = bbox_outside_weights \
.reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
assert bbox_outside_weights.shape[2] == height
assert bbox_outside_weights.shape[3] == width

return labels, bbox_targets, bbox_inside_weights, bbox_outside_weights


def _unmap(data, count, inds, fill=0):
""" Unmap a subset of item (data) back to the original set of items (of
size count) """
if len(data.shape) == 1:
ret = np.empty((count, ), dtype=np.float32)
ret.fill(fill)
ret[inds] = data
else:
ret = np.empty((count, ) + data.shape[1:], dtype=np.float32)
ret.fill(fill)
ret[inds, :] = data
return ret


def _compute_targets(ex_rois, gt_rois):
"""Compute bounding-box regression targets for an image."""

assert ex_rois.shape[0] == gt_rois.shape[0]
assert ex_rois.shape[1] == 4
assert gt_rois.shape[1] == 5

return bbox_transform(ex_rois, gt_rois[:, :4]).astype(
np.float32, copy=False)

if __name__ == '__main__':
height, width = 480, 640
x = chainer.Variable(np.ones((1, 3, height, width), dtype=np.float32))
gt_boxes = np.array([
[100, 100, 150, 150, 0],
[250, 250, 300, 350, 1]
], dtype=np.int32)
im_info = np.array([height, width], dtype=np.int32)

anchor_target = AnchorTarget()
labels, bbox_targets, bbox_inside_weights, bbox_outside_weights = \
anchor_target(x, gt_boxes, im_info)

print(labels.shape)
print(bbox_targets.shape)
print(bbox_inside_weights.shape)
print(bbox_outside_weights.shape)

from skimage import io
io.imsave('a.png', (labels[0, 0] == 0).astype(np.uint8) * 255)
Loading

0 comments on commit 6e82f38

Please sign in to comment.