Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Spatial Pyramid Pooling Layer #2177

Merged
merged 1 commit into from
May 15, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions include/caffe/vision_layers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,72 @@ class CuDNNPoolingLayer : public PoolingLayer<Dtype> {
};
#endif

/**
* @brief Does spatial pyramid pooling on the input image
* by taking the max, average, etc. within regions
* so that the result vector of different sized
* images are of the same size.
*/
template <typename Dtype>
class SPPLayer : public Layer<Dtype> {
public:
explicit SPPLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);

virtual inline const char* type() const { return "SPP"; }
virtual inline int ExactNumBottomBlobs() const { return 1; }
virtual inline int MinTopBlobs() const { return 1; }
// MAX POOL layers can output an extra top blob for the mask;
// others can only output the pooled inputs.
virtual inline int MaxTopBlobs() const {
return (this->layer_param_.pooling_param().pool() ==
PoolingParameter_PoolMethod_MAX) ? 2 : 1;
}

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
// calculates the kernel and stride dimensions for the pooling layer,
// returns a correctly configured LayerParameter for a PoolingLayer
virtual LayerParameter GetPoolingParam(const int pyramid_level,
const int bottom_h, const int bottom_w, const SPPParameter spp_param);

int pyramid_height_;
int bottom_h_, bottom_w_;
int channels_;
int kernel_h_, kernel_w_;
int pad_h_, pad_w_;

/// the internal Split layer that feeds the pooling layers
shared_ptr<SplitLayer<Dtype> > split_layer_;
/// top vector holder used in call to the underlying SplitLayer::Forward
vector<Blob<Dtype>*> split_top_vec_;
/// bottom vector holder used in call to the underlying PoolingLayer::Forward
vector<vector<Blob<Dtype>*>*> pooling_bottom_vecs_;
/// the internal Pooling layers of different kernel sizes
vector<shared_ptr<PoolingLayer<Dtype> > > pooling_layers_;
/// top vector holders used in call to the underlying PoolingLayer::Forward
vector<vector<Blob<Dtype>*>*> pooling_top_vecs_;
/// pooling_outputs stores the outputs of the PoolingLayers
vector<Blob<Dtype>*> pooling_outputs_;
/// the internal Flatten layers that the Pooling layers feed into
vector<FlattenLayer<Dtype>*> flatten_layers_;
/// top vector holders used in call to the underlying FlattenLayer::Forward
vector<vector<Blob<Dtype>*>*> flatten_top_vecs_;
/// flatten_outputs stores the outputs of the FlattenLayers
vector<Blob<Dtype>*> flatten_outputs_;
/// bottom vector holder used in call to the underlying ConcatLayer::Forward
vector<Blob<Dtype>*> concat_bottom_vec_;
/// the internal Concat layers that the Flatten layers feed into
shared_ptr<ConcatLayer<Dtype> > concat_layer_;
};

} // namespace caffe

#endif // CAFFE_VISION_LAYERS_HPP_
193 changes: 193 additions & 0 deletions src/caffe/layers/spp_layer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
#include <algorithm>
#include <cfloat>
#include <vector>

#include "caffe/common.hpp"
#include "caffe/layer.hpp"
#include "caffe/syncedmem.hpp"
#include "caffe/util/math_functions.hpp"
#include "caffe/vision_layers.hpp"

namespace caffe {

using std::min;
using std::max;

template <typename Dtype>
LayerParameter SPPLayer<Dtype>::GetPoolingParam(const int pyramid_level,
const int bottom_h, const int bottom_w, const SPPParameter spp_param) {
LayerParameter pooling_param;
int num_bins = pow(2, pyramid_level);

// find padding and kernel size so that the pooling is
// performed across the entire image
int kernel_h = ceil(bottom_h / static_cast<double>(num_bins));
// remainder_h is the min number of pixels that need to be padded before
// entire image height is pooled over with the chosen kernel dimension
int remainder_h = kernel_h * num_bins - bottom_h;
// pooling layer pads (2 * pad_h) pixels on the top and bottom of the
// image.
int pad_h = (remainder_h + 1) / 2;

// similar logic for width
int kernel_w = ceil(bottom_w / static_cast<double>(num_bins));
int remainder_w = kernel_w * num_bins - bottom_w;
int pad_w = (remainder_w + 1) / 2;

pooling_param.mutable_pooling_param()->set_pad_h(pad_h);
pooling_param.mutable_pooling_param()->set_pad_w(pad_w);
pooling_param.mutable_pooling_param()->set_kernel_h(kernel_h);
pooling_param.mutable_pooling_param()->set_kernel_w(kernel_w);
pooling_param.mutable_pooling_param()->set_stride_h(kernel_h);
pooling_param.mutable_pooling_param()->set_stride_w(kernel_w);

switch (spp_param.pool()) {
case SPPParameter_PoolMethod_MAX:
pooling_param.mutable_pooling_param()->set_pool(
PoolingParameter_PoolMethod_MAX);
break;
case SPPParameter_PoolMethod_AVE:
pooling_param.mutable_pooling_param()->set_pool(
PoolingParameter_PoolMethod_AVE);
break;
case SPPParameter_PoolMethod_STOCHASTIC:
pooling_param.mutable_pooling_param()->set_pool(
PoolingParameter_PoolMethod_STOCHASTIC);
break;
default:
LOG(FATAL) << "Unknown pooling method.";
}

return pooling_param;
}

template <typename Dtype>
void SPPLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
SPPParameter spp_param = this->layer_param_.spp_param();

bottom_h_ = bottom[0]->height();
bottom_w_ = bottom[0]->width();
CHECK_GT(bottom_h_, 0) << "Input dimensions cannot be zero.";
CHECK_GT(bottom_w_, 0) << "Input dimensions cannot be zero.";

pyramid_height_ = spp_param.pyramid_height();
split_top_vec_.clear();
pooling_bottom_vecs_.clear();
pooling_layers_.clear();
pooling_top_vecs_.clear();
pooling_outputs_.clear();
flatten_layers_.clear();
flatten_top_vecs_.clear();
flatten_outputs_.clear();
concat_bottom_vec_.clear();

// split layer output holders setup
for (int i = 0; i < pyramid_height_; i++) {
split_top_vec_.push_back(new Blob<Dtype>());
}

// split layer setup
LayerParameter split_param;
split_layer_.reset(new SplitLayer<Dtype>(split_param));
split_layer_->SetUp(bottom, split_top_vec_);

for (int i = 0; i < pyramid_height_; i++) {
// pooling layer input holders setup
pooling_bottom_vecs_.push_back(new vector<Blob<Dtype>*>);
pooling_bottom_vecs_[i]->push_back(split_top_vec_[i]);

// pooling layer output holders setup
pooling_outputs_.push_back(new Blob<Dtype>());
pooling_top_vecs_.push_back(new vector<Blob<Dtype>*>);
pooling_top_vecs_[i]->push_back(pooling_outputs_[i]);

// pooling layer setup
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The kernel size and stride logic need to be in Reshape(). The number of spatial pyramid pooling bins should stay constant but their dimensions will need to change for each input. Inputs can change shape with (1) reshaping data layers #1313 or (2) calls to net or blob reshape(). When this happens, the kernel size and stride need re-configuring.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a way to change parameters of a layer without having to set it up again? The only way I could figure out the re-configuring of the kernel size and stride height is by constructing a new LayerParameter, resetting the PoolingLayer with that LayerParameter, and calling the PoolingLayer's SetUp.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think there is a way to change parameters without deleting and reinitializing the layer -- you could add a setter to Layer but I don't think it would really save anything since the constructor itself is probably basically free (SetUp is probably a little more expensive but you'd have to call that regardless). Do you know if it's an issue in practice?

LayerParameter pooling_param = GetPoolingParam(
i, bottom_h_, bottom_w_, spp_param);

pooling_layers_.push_back(shared_ptr<PoolingLayer<Dtype> > (
new PoolingLayer<Dtype>(pooling_param)));
pooling_layers_[i]->SetUp(*pooling_bottom_vecs_[i], *pooling_top_vecs_[i]);

// flatten layer output holders setup
flatten_outputs_.push_back(new Blob<Dtype>());
flatten_top_vecs_.push_back(new vector<Blob<Dtype>*>);
flatten_top_vecs_[i]->push_back(flatten_outputs_[i]);

// flatten layer setup
LayerParameter flatten_param;
flatten_layers_.push_back(new FlattenLayer<Dtype>(flatten_param));
flatten_layers_[i]->SetUp(*pooling_top_vecs_[i], *flatten_top_vecs_[i]);

// concat layer input holders setup
concat_bottom_vec_.push_back(flatten_outputs_[i]);
}

// concat layer setup
LayerParameter concat_param;
concat_layer_.reset(new ConcatLayer<Dtype>(concat_param));
concat_layer_->SetUp(concat_bottom_vec_, top);
}

template <typename Dtype>
void SPPLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
CHECK_EQ(4, bottom[0]->num_axes()) << "Input must have 4 axes, "
<< "corresponding to (num, channels, height, width)";
channels_ = bottom[0]->channels();
bottom_h_ = bottom[0]->height();
bottom_w_ = bottom[0]->width();
SPPParameter spp_param = this->layer_param_.spp_param();
split_layer_->Reshape(bottom, split_top_vec_);
for (int i = 0; i < pyramid_height_; i++) {
LayerParameter pooling_param = GetPoolingParam(
i, bottom_h_, bottom_w_, spp_param);

pooling_layers_[i].reset(
new PoolingLayer<Dtype>(pooling_param));
pooling_layers_[i]->SetUp(
*pooling_bottom_vecs_[i], *pooling_top_vecs_[i]);
pooling_layers_[i]->Reshape(
*pooling_bottom_vecs_[i], *pooling_top_vecs_[i]);
flatten_layers_[i]->Reshape(
*pooling_top_vecs_[i], *flatten_top_vecs_[i]);
}
concat_layer_->Reshape(concat_bottom_vec_, top);
}

template <typename Dtype>
void SPPLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
split_layer_->Forward(bottom, split_top_vec_);
for (int i = 0; i < pyramid_height_; i++) {
pooling_layers_[i]->Forward(
*pooling_bottom_vecs_[i], *pooling_top_vecs_[i]);
flatten_layers_[i]->Forward(
*pooling_top_vecs_[i], *flatten_top_vecs_[i]);
}
concat_layer_->Forward(concat_bottom_vec_, top);
}

template <typename Dtype>
void SPPLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
if (!propagate_down[0]) {
return;
}
vector<bool> concat_propagate_down(pyramid_height_, true);
concat_layer_->Backward(top, concat_propagate_down, concat_bottom_vec_);
for (int i = 0; i < pyramid_height_; i++) {
flatten_layers_[i]->Backward(
*flatten_top_vecs_[i], propagate_down, *pooling_top_vecs_[i]);
pooling_layers_[i]->Backward(
*pooling_top_vecs_[i], propagate_down, *pooling_bottom_vecs_[i]);
}
split_layer_->Backward(split_top_vec_, propagate_down, bottom);
}


INSTANTIATE_CLASS(SPPLayer);
REGISTER_LAYER_CLASS(SPP);

} // namespace caffe
20 changes: 19 additions & 1 deletion src/caffe/proto/caffe.proto
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ message ParamSpec {
// NOTE
// Update the next available ID when you add a new LayerParameter field.
//
// LayerParameter next available layer-specific ID: 132 (last added: prelu_param)
// LayerParameter next available layer-specific ID: 133 (last added: spp_param)
message LayerParameter {
optional string name = 1; // the layer name
optional string type = 2; // the layer type
Expand Down Expand Up @@ -328,6 +328,7 @@ message LayerParameter {
optional ReLUParameter relu_param = 123;
optional SigmoidParameter sigmoid_param = 124;
optional SoftmaxParameter softmax_param = 125;
optional SPPParameter spp_param = 132;
optional SliceParameter slice_param = 126;
optional TanHParameter tanh_param = 127;
optional ThresholdParameter threshold_param = 128;
Expand Down Expand Up @@ -768,6 +769,23 @@ message WindowDataParameter {
optional string root_folder = 13 [default = ""];
}

// Message that stores parameters used by SPPLayer
message SPPParameter {
enum PoolMethod {
MAX = 0;
AVE = 1;
STOCHASTIC = 2;
}
optional uint32 pyramid_height = 1;
optional PoolMethod pool = 2 [default = MAX]; // The pooling method
enum Engine {
DEFAULT = 0;
CAFFE = 1;
CUDNN = 2;
}
optional Engine engine = 6 [default = DEFAULT];
}

// DEPRECATED: use LayerParameter.
message V1LayerParameter {
repeated string bottom = 2;
Expand Down
Loading