diff --git a/docs/tutorial/layers.md b/docs/tutorial/layers.md index 5f8f519cdc4..6439f1fb460 100644 --- a/docs/tutorial/layers.md +++ b/docs/tutorial/layers.md @@ -419,6 +419,48 @@ The `SPLIT` layer is a utility layer that splits an input blob to multiple outpu The `FLATTEN` layer is a utility layer that flattens an input of shape `n * c * h * w` to a simple vector output of shape `n * (c*h*w) * 1 * 1`. +#### Reshape + +* LayerType: `RESHAPE` +* CPU implementation: `./src/caffe/layers/reshape_layer.cpp` +* CUDA GPU implementation: `./src/caffe/layers/reshape_layer.cu` +* Parameters (`ReshapeParameter reshape_param`) + - Optional: (also see detailed description below) + - `num` [default `0`] + - `channels` [default `0`] + - `width` [default `0`] + - `height` [default `0`] + +* Input + - a single with arbitrary dimensions +* Output + - the same blob, with modified dimensions, as specified by `reshape_param` + +* Sample + + layers { + name: "reshape" + type: RESHAPE + bottom: "input" + top: "output" + + reshape_param { + num: 0 # copy the dimension from below + channels: 2 + width: 3 + height: -1 # infer it from the other dimensions + } + } + +The `RESHAPE` layer can be used to change the dimensions of its input, without changing its data. Just like the `FLATTEN` layer, only the dimensions are changed, no data is copied in the process. + +Output dimensions are specified by the `ReshapeParam` proto. Positive numbers are used directly, setting the corresponding dimension of the output blob. In addition, two special values are accepted for any of the target dimension values: + +* `0` means "copy the respective dimension of the bottom layer". That is, if the bottom layer has 2 channels, the top one will have 2 channels too, given `channels: 0` as target dimension. Since the default value of all the target dimensions is 0, omitting any of the target dimensions will also cause it to be copied. +* `-1` stands for "infer this from the other dimensions". This behavior is similar to that of `-1` in *numpy*'s or `[]` for *MATLAB*'s reshape: this dimension is calculated to keep the overall element count the same as in the bottom layer. If this is not possible, an error is raised. Also, at most one `-1` can be used in a reshape operation. + +As another example, giving `num: 0, channels: -1, height: 1, width: 1` as parameters makes the layer behave in exactly the same way as the `FLATTEN` layer. + #### Concatenation * LayerType: `CONCAT` diff --git a/include/caffe/common_layers.hpp b/include/caffe/common_layers.hpp index 9718b825b14..fa2c3577a29 100644 --- a/include/caffe/common_layers.hpp +++ b/include/caffe/common_layers.hpp @@ -189,6 +189,42 @@ class EltwiseLayer : public Layer { bool stable_prod_grad_; }; +/** + * @brief Reshapes the input Blob into an arbitrary-sized output Blob. + * + * Note: similarly to FlattenLayer, this layer does not change the input values + * (see FlattenLayer, Blob::ShareData and Blob::ShareDiff). + */ +template +class ReshapeLayer : public Layer { + public: + explicit ReshapeLayer(const LayerParameter& param) + : Layer(param) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + protected: + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_cpu(const vector*>& bottom, + const vector& propagate_down, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + void FillInSingleUnspecifiedDimension(int bottom_count); + + int num_out; + int channels_out; + int height_out; + int width_out; +}; + + /** * @brief Reshapes the input Blob into flat vectors. * diff --git a/src/caffe/layers/reshape_layer.cpp b/src/caffe/layers/reshape_layer.cpp new file mode 100644 index 00000000000..7e8704e058a --- /dev/null +++ b/src/caffe/layers/reshape_layer.cpp @@ -0,0 +1,113 @@ +#include + +#include "caffe/common_layers.hpp" +#include "caffe/layer.hpp" + +namespace caffe { + +template +void ReshapeLayer::LayerSetUp(const vector*>& bottom, + const vector*>& top) { + CHECK_EQ(bottom.size(), 1) << "Reshape Layer takes a single blob as input."; + CHECK_EQ(top.size(), 1) << "Reshape Layer takes a single blob as output."; + + num_out = this->layer_param_.reshape_param().num(); + // Dimensions set to 0 (either by default or explicitly) will be copied from + // the bottom layer. + if (num_out == 0) { + num_out = bottom[0]->num(); + } + + channels_out = this->layer_param_.reshape_param().channels(); + if (channels_out == 0) { + channels_out = bottom[0]->channels(); + } + + width_out = this->layer_param_.reshape_param().width(); + if (width_out == 0) { + width_out = bottom[0]->width(); + } + + height_out = this->layer_param_.reshape_param().height(); + if (height_out == 0) { + height_out = bottom[0]->height(); + } + + FillInSingleUnspecifiedDimension(bottom[0]->count()); +} + +template +void ReshapeLayer::Reshape(const vector*>& bottom, + const vector*>& top) { + top[0]->Reshape(num_out, channels_out, height_out, width_out); + + const size_t out_count = num_out * channels_out * height_out * width_out; + CHECK_EQ(out_count, bottom[0]->count()) << + "Bottom layer count isn't equal to predicted; output layer size is " << + num_out << "x" << channels_out << "x" << height_out << "x" << width_out; +} + +template +void ReshapeLayer::Forward_cpu(const vector*>& bottom, + const vector*>& top) { + top[0]->ShareData(*bottom[0]); +} + +template +void ReshapeLayer::Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) { + bottom[0]->ShareDiff(*top[0]); +} + +/** + * @brief Fill in a single dimension left unspecified. + * + * If a dimension is set to -1, it will be filled in with a value inferred from + * the count of the bottom layer (if the product of the nonzero dimensions is a + * divisor of the count). + * + * @param bottom_count Count of the bottom layer. + */ +template +void ReshapeLayer::FillInSingleUnspecifiedDimension(int bottom_count) { + int* const dimensions[] = {&num_out, &channels_out, &width_out, &height_out}; + const size_t N_DIMENSIONS = 4; + + // How many -1 dimensions do we have. + int n_unspecified = 0; + // Product of the remaining dimensions. + int product_without_unspecified_dim = 1; + + for (size_t i = 0; i < N_DIMENSIONS; i++) { + if (*(dimensions[i]) == -1) { + n_unspecified++; + } else { + product_without_unspecified_dim *= *(dimensions[i]); + } + } + + if (n_unspecified == 0) { + // Everything is filled out, nothing to do. + return; + } + + CHECK_EQ(n_unspecified, 1) << "Only one dimension can be set -1."; + CHECK_EQ(bottom_count % product_without_unspecified_dim, 0) << + "Bottom layer count " << bottom_count << " not divisible by product " << + product_without_unspecified_dim; + + // Fill up the one remaining dimension. + for (size_t i = 0; i < N_DIMENSIONS; i++) { + if (*(dimensions[i]) == -1) { + *(dimensions[i]) = bottom_count / product_without_unspecified_dim; + } + } +} + +#ifdef CPU_ONLY +STUB_GPU(ReshapeLayer); +#endif + +INSTANTIATE_CLASS(ReshapeLayer); +REGISTER_LAYER_CLASS(RESHAPE, ReshapeLayer); +} // namespace caffe diff --git a/src/caffe/layers/reshape_layer.cu b/src/caffe/layers/reshape_layer.cu new file mode 100644 index 00000000000..3023ce3ae88 --- /dev/null +++ b/src/caffe/layers/reshape_layer.cu @@ -0,0 +1,23 @@ +#include + +#include "caffe/common_layers.hpp" +#include "caffe/layer.hpp" +#include "caffe/util/math_functions.hpp" + +namespace caffe { + +template +void ReshapeLayer::Forward_gpu(const vector*>& bottom, + const vector*>& top) { + top[0]->ShareData(*bottom[0]); +} + +template +void ReshapeLayer::Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) { + bottom[0]->ShareDiff(*top[0]); +} + +INSTANTIATE_LAYER_GPU_FUNCS(ReshapeLayer); + +} // namespace caffe diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index f0404a09b90..cb1bb7be9ba 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -200,7 +200,7 @@ message NetStateRule { // NOTE // Update the next available ID when you add a new LayerParameter field. // -// LayerParameter next available ID: 42 (last added: exp_param) +// LayerParameter next available ID: 43 (last added: reshape_param) message LayerParameter { repeated string bottom = 2; // the name of the bottom blobs repeated string top = 3; // the name of the top blobs @@ -221,7 +221,7 @@ message LayerParameter { // line above the enum. Update the next available ID when you add a new // LayerType. // - // LayerType next available ID: 39 (last added: EXP) + // LayerType next available ID: 40 (last added: RESHAPE) enum LayerType { // "NONE" layer type is 0th enum element so that we don't cause confusion // by defaulting to an existent LayerType (instead, should usually error if @@ -255,6 +255,7 @@ message LayerParameter { POOLING = 17; POWER = 26; RELU = 18; + RESHAPE = 39; SIGMOID = 19; SIGMOID_CROSS_ENTROPY_LOSS = 27; SILENCE = 36; @@ -315,6 +316,7 @@ message LayerParameter { optional PoolingParameter pooling_param = 19; optional PowerParameter power_param = 21; optional ReLUParameter relu_param = 30; + optional ReshapeParameter reshape_param = 42; optional SigmoidParameter sigmoid_param = 38; optional SoftmaxParameter softmax_param = 39; optional SliceParameter slice_param = 31; @@ -638,6 +640,19 @@ message ReLUParameter { optional Engine engine = 2 [default = DEFAULT]; } +// Message that stores parameters used by ReshapeLayer +message ReshapeParameter { + // Specify the output dimensions. If some of the following parameters are + // omitted or set to 0 explicitly, the corresponding dimension from the bottom + // layer is used (unchanged). Also, if exactly one of them is set to -1, its + // value is calculated from the count of the bottom layer and the remaining + // dimensions, if possible. + optional int32 num = 1 [default = 0]; + optional int32 channels = 2 [default = 0]; + optional int32 height = 3 [default = 0]; + optional int32 width = 4 [default = 0]; +} + // Message that stores parameters used by SigmoidLayer message SigmoidParameter { enum Engine { diff --git a/src/caffe/test/test_reshape_layer.cpp b/src/caffe/test/test_reshape_layer.cpp new file mode 100644 index 00000000000..878d40bb4d5 --- /dev/null +++ b/src/caffe/test/test_reshape_layer.cpp @@ -0,0 +1,120 @@ +#include +#include + +#include "gtest/gtest.h" + +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/common_layers.hpp" +#include "caffe/filler.hpp" + +#include "caffe/test/test_caffe_main.hpp" +#include "caffe/test/test_gradient_check_util.hpp" + +namespace caffe { + +template +class ReshapeLayerTest : public MultiDeviceTest { + typedef typename TypeParam::Dtype Dtype; + protected: + ReshapeLayerTest() + : blob_bottom_(new Blob(2, 3, 6, 5)), + blob_top_(new Blob()) { + // fill the values + FillerParameter filler_param; + GaussianFiller filler(filler_param); + filler.Fill(this->blob_bottom_); + blob_bottom_vec_.push_back(blob_bottom_); + blob_top_vec_.push_back(blob_top_); + } + + virtual ~ReshapeLayerTest() { delete blob_bottom_; delete blob_top_; } + + Blob* const blob_bottom_; + Blob* const blob_top_; + vector*> blob_bottom_vec_; + vector*> blob_top_vec_; +}; + +TYPED_TEST_CASE(ReshapeLayerTest, TestDtypesAndDevices); + +TYPED_TEST(ReshapeLayerTest, TestFlattenOutputSizes) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + ReshapeParameter* reshape_param = + layer_param.mutable_reshape_param(); + reshape_param->set_channels(-1); + reshape_param->set_height(1); + reshape_param->set_width(1); + + ReshapeLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + EXPECT_EQ(this->blob_top_->num(), 2); + EXPECT_EQ(this->blob_top_->channels(), 3 * 6 * 5); + EXPECT_EQ(this->blob_top_->height(), 1); + EXPECT_EQ(this->blob_top_->width(), 1); +} + +TYPED_TEST(ReshapeLayerTest, TestFlattenValues) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + ReshapeParameter* reshape_param = + layer_param.mutable_reshape_param(); + reshape_param->set_channels(-1); + reshape_param->set_height(1); + reshape_param->set_width(1); + ReshapeLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_); + for (int c = 0; c < 3 * 6 * 5; ++c) { + EXPECT_EQ(this->blob_top_->data_at(0, c, 0, 0), + this->blob_bottom_->data_at(0, c / (6 * 5), (c / 5) % 6, c % 5)); + EXPECT_EQ(this->blob_top_->data_at(1, c, 0, 0), + this->blob_bottom_->data_at(1, c / (6 * 5), (c / 5) % 6, c % 5)); + } +} + +// Test whether setting output dimensions to 0 either explicitly or implicitly +// copies the respective dimension of the input layer. +TYPED_TEST(ReshapeLayerTest, TestCopyDimensions) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + ReshapeParameter* reshape_param = + layer_param.mutable_reshape_param(); + // Omitting num to test implicit zeroes. + reshape_param->set_channels(0); + reshape_param->set_height(0); + reshape_param->set_width(0); + ReshapeLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + + EXPECT_EQ(this->blob_top_->num(), 2); + EXPECT_EQ(this->blob_top_->channels(), 3); + EXPECT_EQ(this->blob_top_->height(), 6); + EXPECT_EQ(this->blob_top_->width(), 5); +} + +// When a dimension is set to -1, we should infer its value from the other +// dimensions (including those that get copied from below). +TYPED_TEST(ReshapeLayerTest, TestInferenceOfUnspecified) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + ReshapeParameter* reshape_param = + layer_param.mutable_reshape_param(); + // Since omitted, num is implicitly set to 0 (thus, copies 2). + reshape_param->set_channels(3); + reshape_param->set_height(10); + reshape_param->set_width(-1); + + // Count is 180, thus height should be 180 / (2*3*10) = 3. + + ReshapeLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + + EXPECT_EQ(this->blob_top_->num(), 2); + EXPECT_EQ(this->blob_top_->channels(), 3); + EXPECT_EQ(this->blob_top_->height(), 10); + EXPECT_EQ(this->blob_top_->width(), 3); +} + +} // namespace caffe