-
Notifications
You must be signed in to change notification settings - Fork 18.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Spatial Pyramid Pooling Layer #2177
Merged
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,193 @@ | ||
#include <algorithm> | ||
#include <cfloat> | ||
#include <vector> | ||
|
||
#include "caffe/common.hpp" | ||
#include "caffe/layer.hpp" | ||
#include "caffe/syncedmem.hpp" | ||
#include "caffe/util/math_functions.hpp" | ||
#include "caffe/vision_layers.hpp" | ||
|
||
namespace caffe { | ||
|
||
using std::min; | ||
using std::max; | ||
|
||
template <typename Dtype> | ||
LayerParameter SPPLayer<Dtype>::GetPoolingParam(const int pyramid_level, | ||
const int bottom_h, const int bottom_w, const SPPParameter spp_param) { | ||
LayerParameter pooling_param; | ||
int num_bins = pow(2, pyramid_level); | ||
|
||
// find padding and kernel size so that the pooling is | ||
// performed across the entire image | ||
int kernel_h = ceil(bottom_h / static_cast<double>(num_bins)); | ||
// remainder_h is the min number of pixels that need to be padded before | ||
// entire image height is pooled over with the chosen kernel dimension | ||
int remainder_h = kernel_h * num_bins - bottom_h; | ||
// pooling layer pads (2 * pad_h) pixels on the top and bottom of the | ||
// image. | ||
int pad_h = (remainder_h + 1) / 2; | ||
|
||
// similar logic for width | ||
int kernel_w = ceil(bottom_w / static_cast<double>(num_bins)); | ||
int remainder_w = kernel_w * num_bins - bottom_w; | ||
int pad_w = (remainder_w + 1) / 2; | ||
|
||
pooling_param.mutable_pooling_param()->set_pad_h(pad_h); | ||
pooling_param.mutable_pooling_param()->set_pad_w(pad_w); | ||
pooling_param.mutable_pooling_param()->set_kernel_h(kernel_h); | ||
pooling_param.mutable_pooling_param()->set_kernel_w(kernel_w); | ||
pooling_param.mutable_pooling_param()->set_stride_h(kernel_h); | ||
pooling_param.mutable_pooling_param()->set_stride_w(kernel_w); | ||
|
||
switch (spp_param.pool()) { | ||
case SPPParameter_PoolMethod_MAX: | ||
pooling_param.mutable_pooling_param()->set_pool( | ||
PoolingParameter_PoolMethod_MAX); | ||
break; | ||
case SPPParameter_PoolMethod_AVE: | ||
pooling_param.mutable_pooling_param()->set_pool( | ||
PoolingParameter_PoolMethod_AVE); | ||
break; | ||
case SPPParameter_PoolMethod_STOCHASTIC: | ||
pooling_param.mutable_pooling_param()->set_pool( | ||
PoolingParameter_PoolMethod_STOCHASTIC); | ||
break; | ||
default: | ||
LOG(FATAL) << "Unknown pooling method."; | ||
} | ||
|
||
return pooling_param; | ||
} | ||
|
||
template <typename Dtype> | ||
void SPPLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top) { | ||
SPPParameter spp_param = this->layer_param_.spp_param(); | ||
|
||
bottom_h_ = bottom[0]->height(); | ||
bottom_w_ = bottom[0]->width(); | ||
CHECK_GT(bottom_h_, 0) << "Input dimensions cannot be zero."; | ||
CHECK_GT(bottom_w_, 0) << "Input dimensions cannot be zero."; | ||
|
||
pyramid_height_ = spp_param.pyramid_height(); | ||
split_top_vec_.clear(); | ||
pooling_bottom_vecs_.clear(); | ||
pooling_layers_.clear(); | ||
pooling_top_vecs_.clear(); | ||
pooling_outputs_.clear(); | ||
flatten_layers_.clear(); | ||
flatten_top_vecs_.clear(); | ||
flatten_outputs_.clear(); | ||
concat_bottom_vec_.clear(); | ||
|
||
// split layer output holders setup | ||
for (int i = 0; i < pyramid_height_; i++) { | ||
split_top_vec_.push_back(new Blob<Dtype>()); | ||
} | ||
|
||
// split layer setup | ||
LayerParameter split_param; | ||
split_layer_.reset(new SplitLayer<Dtype>(split_param)); | ||
split_layer_->SetUp(bottom, split_top_vec_); | ||
|
||
for (int i = 0; i < pyramid_height_; i++) { | ||
// pooling layer input holders setup | ||
pooling_bottom_vecs_.push_back(new vector<Blob<Dtype>*>); | ||
pooling_bottom_vecs_[i]->push_back(split_top_vec_[i]); | ||
|
||
// pooling layer output holders setup | ||
pooling_outputs_.push_back(new Blob<Dtype>()); | ||
pooling_top_vecs_.push_back(new vector<Blob<Dtype>*>); | ||
pooling_top_vecs_[i]->push_back(pooling_outputs_[i]); | ||
|
||
// pooling layer setup | ||
LayerParameter pooling_param = GetPoolingParam( | ||
i, bottom_h_, bottom_w_, spp_param); | ||
|
||
pooling_layers_.push_back(shared_ptr<PoolingLayer<Dtype> > ( | ||
new PoolingLayer<Dtype>(pooling_param))); | ||
pooling_layers_[i]->SetUp(*pooling_bottom_vecs_[i], *pooling_top_vecs_[i]); | ||
|
||
// flatten layer output holders setup | ||
flatten_outputs_.push_back(new Blob<Dtype>()); | ||
flatten_top_vecs_.push_back(new vector<Blob<Dtype>*>); | ||
flatten_top_vecs_[i]->push_back(flatten_outputs_[i]); | ||
|
||
// flatten layer setup | ||
LayerParameter flatten_param; | ||
flatten_layers_.push_back(new FlattenLayer<Dtype>(flatten_param)); | ||
flatten_layers_[i]->SetUp(*pooling_top_vecs_[i], *flatten_top_vecs_[i]); | ||
|
||
// concat layer input holders setup | ||
concat_bottom_vec_.push_back(flatten_outputs_[i]); | ||
} | ||
|
||
// concat layer setup | ||
LayerParameter concat_param; | ||
concat_layer_.reset(new ConcatLayer<Dtype>(concat_param)); | ||
concat_layer_->SetUp(concat_bottom_vec_, top); | ||
} | ||
|
||
template <typename Dtype> | ||
void SPPLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top) { | ||
CHECK_EQ(4, bottom[0]->num_axes()) << "Input must have 4 axes, " | ||
<< "corresponding to (num, channels, height, width)"; | ||
channels_ = bottom[0]->channels(); | ||
bottom_h_ = bottom[0]->height(); | ||
bottom_w_ = bottom[0]->width(); | ||
SPPParameter spp_param = this->layer_param_.spp_param(); | ||
split_layer_->Reshape(bottom, split_top_vec_); | ||
for (int i = 0; i < pyramid_height_; i++) { | ||
LayerParameter pooling_param = GetPoolingParam( | ||
i, bottom_h_, bottom_w_, spp_param); | ||
|
||
pooling_layers_[i].reset( | ||
new PoolingLayer<Dtype>(pooling_param)); | ||
pooling_layers_[i]->SetUp( | ||
*pooling_bottom_vecs_[i], *pooling_top_vecs_[i]); | ||
pooling_layers_[i]->Reshape( | ||
*pooling_bottom_vecs_[i], *pooling_top_vecs_[i]); | ||
flatten_layers_[i]->Reshape( | ||
*pooling_top_vecs_[i], *flatten_top_vecs_[i]); | ||
} | ||
concat_layer_->Reshape(concat_bottom_vec_, top); | ||
} | ||
|
||
template <typename Dtype> | ||
void SPPLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, | ||
const vector<Blob<Dtype>*>& top) { | ||
split_layer_->Forward(bottom, split_top_vec_); | ||
for (int i = 0; i < pyramid_height_; i++) { | ||
pooling_layers_[i]->Forward( | ||
*pooling_bottom_vecs_[i], *pooling_top_vecs_[i]); | ||
flatten_layers_[i]->Forward( | ||
*pooling_top_vecs_[i], *flatten_top_vecs_[i]); | ||
} | ||
concat_layer_->Forward(concat_bottom_vec_, top); | ||
} | ||
|
||
template <typename Dtype> | ||
void SPPLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, | ||
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { | ||
if (!propagate_down[0]) { | ||
return; | ||
} | ||
vector<bool> concat_propagate_down(pyramid_height_, true); | ||
concat_layer_->Backward(top, concat_propagate_down, concat_bottom_vec_); | ||
for (int i = 0; i < pyramid_height_; i++) { | ||
flatten_layers_[i]->Backward( | ||
*flatten_top_vecs_[i], propagate_down, *pooling_top_vecs_[i]); | ||
pooling_layers_[i]->Backward( | ||
*pooling_top_vecs_[i], propagate_down, *pooling_bottom_vecs_[i]); | ||
} | ||
split_layer_->Backward(split_top_vec_, propagate_down, bottom); | ||
} | ||
|
||
|
||
INSTANTIATE_CLASS(SPPLayer); | ||
REGISTER_LAYER_CLASS(SPP); | ||
|
||
} // namespace caffe |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The kernel size and stride logic need to be in
Reshape()
. The number of spatial pyramid pooling bins should stay constant but their dimensions will need to change for each input. Inputs can change shape with (1) reshaping data layers #1313 or (2) calls to net or blobreshape()
. When this happens, the kernel size and stride need re-configuring.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there a way to change parameters of a layer without having to set it up again? The only way I could figure out the re-configuring of the kernel size and stride height is by constructing a new LayerParameter, resetting the PoolingLayer with that LayerParameter, and calling the PoolingLayer's SetUp.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think there is a way to change parameters without deleting and reinitializing the layer -- you could add a setter to
Layer
but I don't think it would really save anything since the constructor itself is probably basically free (SetUp
is probably a little more expensive but you'd have to call that regardless). Do you know if it's an issue in practice?