diff --git a/include/caffe/util/math_functions.hpp b/include/caffe/util/math_functions.hpp index ce6200372c1..9df94e7a171 100644 --- a/include/caffe/util/math_functions.hpp +++ b/include/caffe/util/math_functions.hpp @@ -194,6 +194,12 @@ void caffe_cpu_scale(const int n, const Dtype alpha, const Dtype *x, Dtype* y); template void caffe_gpu_scale(const int n, const Dtype alpha, const Dtype *x, Dtype* y); +template +void caffe_cpu_sigmoid(const int n, const Dtype* x, Dtype* y); + +template +void caffe_gpu_sigmoid(const int n, const Dtype* x, Dtype* y); + } // namespace caffe diff --git a/include/caffe/vision_layers.hpp b/include/caffe/vision_layers.hpp index 4f6dfa70be2..bd84a5e2cdf 100644 --- a/include/caffe/vision_layers.hpp +++ b/include/caffe/vision_layers.hpp @@ -14,6 +14,7 @@ #include "caffe/blob.hpp" #include "caffe/common.hpp" +#include "caffe/filler.hpp" #include "caffe/layer.hpp" #include "caffe/proto/caffe.pb.h" @@ -529,6 +530,43 @@ class PoolingLayer : public Layer { Blob rand_idx_; }; +// Restricted Boltzmann Machine +template +class RBMLayer : public Layer { + public: + explicit RBMLayer(const LayerParameter& param) + : Layer(param) {} + virtual void SetUp(const vector*>& bottom, + vector*>* top); + + protected: + virtual Dtype Forward_cpu(const vector*>& bottom, + vector*>* top); + virtual Dtype Forward_gpu(const vector*>& bottom, + vector*>* top); + virtual void Backward_cpu(const vector*>& top, + const bool propagate_down, vector*>* bottom); + virtual void Backward_gpu(const vector*>& top, + const bool propagate_down, vector*>* bottom); + + size_t visible_dim_; + size_t hidden_dim_; + shared_ptr > visible_hidden_weight_; + shared_ptr > visible_bias_; + shared_ptr > hidden_bias_; + shared_ptr > hidden_unit_sampling_filler_; + shared_ptr > pos_hidden_activations_; + shared_ptr > pos_hidden_probs_; + shared_ptr > pos_hidden_states_; + shared_ptr > pos_association_; + shared_ptr > random_threshold_; + shared_ptr > neg_visible_activations_; + shared_ptr > neg_visible_probs_; + shared_ptr > neg_hidden_activations_; + shared_ptr > neg_hidden_probs_; + shared_ptr > neg_associations_; +}; + template class SoftmaxLayer : public Layer { public: diff --git a/src/caffe/layer_factory.cpp b/src/caffe/layer_factory.cpp index f3e52a68237..428864b294e 100644 --- a/src/caffe/layer_factory.cpp +++ b/src/caffe/layer_factory.cpp @@ -56,6 +56,8 @@ Layer* GetLayer(const LayerParameter& param) { return new MultinomialLogisticLossLayer(param); case LayerParameter_LayerType_POOLING: return new PoolingLayer(param); + case LayerParameter_LayerType_RBM: + return new RBMLayer(param); case LayerParameter_LayerType_RELU: return new ReLULayer(param); case LayerParameter_LayerType_SIGMOID: diff --git a/src/caffe/layers/rbm_layer.cpp b/src/caffe/layers/rbm_layer.cpp new file mode 100644 index 00000000000..90c9a21b31c --- /dev/null +++ b/src/caffe/layers/rbm_layer.cpp @@ -0,0 +1,168 @@ +// Copyright 2014 BVLC and contributors. + +#include + +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/filler.hpp" +#include "caffe/layer.hpp" +#include "caffe/vision_layers.hpp" +#include "caffe/util/math_functions.hpp" + +namespace caffe { + +template +void RBMLayer::SetUp(const vector*>& bottom, + vector*>* top) { + CHECK(this->layer_param_.has_rbm_param()); + CHECK(this->layer_param_.rbm_param().has_hidden_dim()); + CHECK(this->layer_param_.rbm_param().has_weight_filler()); + hidden_dim_ = this->layer_param_.rbm_param().hidden_dim(); + CHECK_EQ(bottom.size(), 1) << "RBM Layer takes a single blob as input."; + CHECK_EQ(top->size(), 1) << "RBM Layer takes a single blob as output."; + visible_dim_ = bottom[0]->count() / bottom[0]->num(); + (*top)[0]->Reshape(bottom[0]->num(), hidden_dim_, 1, 1); + // Check if we need to set up the weights + if (this->blobs_.size() > 0) { + LOG(INFO)<< "Skipping parameter initialization"; + } else { + this->blobs_.resize(3); + visible_hidden_weight_ = this->blobs_[0]; + visible_bias_ = this->blobs_[1]; + hidden_bias_ = this->blobs_[2]; + // Intialize the visible-hidden weight + visible_hidden_weight_.reset(new Blob(1, 1, hidden_dim_, + visible_dim_)); + shared_ptr > weight_filler(GetFiller( + this->layer_param_.rbm_param().weight_filler())); + weight_filler->Fill(visible_hidden_weight_.get()); + visible_bias_.reset(new Blob(1, 1, 1, visible_dim_)); + memset(this->blobs_[1]->mutable_cpu_data(), 0, + sizeof(Dtype) * visible_bias_->count()); + hidden_bias_.reset(new Blob(1, 1, 1, hidden_dim_)); + Dtype* hidden_bias_data = hidden_bias_->mutable_cpu_data(); + for (int i = 0; i < hidden_bias_->count(); ++i) { + hidden_bias_data[i] = -4; + } + } // parameter initialization + FillerParameter filler_param; + filler_param.set_min(0); + filler_param.set_max(1); + hidden_unit_sampling_filler_.reset(new UniformFiller(filler_param)); + + int num = bottom[0]->num(); + pos_hidden_activations_.reset( + new Blob(num, hidden_dim_, 1, 1)); + pos_hidden_probs_.reset( + new Blob(num, hidden_dim_, 1, 1)); + pos_hidden_states_.reset( + new Blob(num, hidden_dim_, 1, 1)); + pos_association_.reset( + new Blob(visible_dim_, hidden_dim_, 1, 1)); + random_threshold_.reset( + new Blob(num, hidden_dim_, 1, 1)); + neg_visible_activations_.reset( + new Blob(num, visible_dim_, 1, 1)); + neg_visible_probs_.reset( + new Blob(num, visible_dim_, 1, 1)); + neg_hidden_activations_.reset( + new Blob(num, visible_dim_, 1, 1)); + neg_hidden_probs_.reset( + new Blob(num, visible_dim_, 1, 1)); + neg_associations_.reset( + new Blob(num, visible_dim_, 1, 1)); +} + +template +Dtype RBMLayer::Forward_cpu(const vector*>& bottom, + vector*>* top) { + const Dtype* bottom_data = bottom[0]->cpu_data(); + Dtype* pos_hidden_activations_data = pos_hidden_activations_->mutable_cpu_data(); + const Dtype* visible_hidden_weight_data = visible_hidden_weight_->cpu_data(); + int num = bottom[0]->num(); + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, num, hidden_dim_, + visible_dim_, (Dtype) 1., bottom_data, + visible_hidden_weight_data, (Dtype) 0., + pos_hidden_activations_data); + const Dtype* hidden_bias_data = hidden_bias_->cpu_data(); + for (int i = 0; i < num; ++i) { + caffe_axpy( + hidden_dim_, 1, hidden_bias_data, + pos_hidden_activations_data + pos_hidden_activations_->offset(i)); + } + Dtype* pos_hidden_probs_data = pos_hidden_probs_->mutable_cpu_data(); + caffe_cpu_sigmoid(pos_hidden_activations_->count(), + pos_hidden_activations_data, pos_hidden_probs_data); + // sampling hidden units + hidden_unit_sampling_filler_->Fill(random_threshold_.get()); + const Dtype* random_threshold_data = random_threshold_->cpu_data(); + Dtype* random_threshold_diff = random_threshold_->mutable_cpu_diff(); + Dtype* pos_hidden_states_data = pos_hidden_states_->mutable_cpu_data(); + const int count = pos_hidden_states_->count(); + caffe_sub(count, random_threshold_data, pos_hidden_states_data, + random_threshold_diff); + caffe_cpu_sgnbit(count, random_threshold_diff, pos_hidden_states_data); + Dtype* pos_association_data = pos_association_->mutable_cpu_data(); + caffe_cpu_gemm(CblasTrans, CblasNoTrans, visible_dim_, hidden_dim_, + num, (Dtype) 1., bottom_data, (*top)[0]->cpu_data(), + (Dtype) 0., pos_association_data); + return Dtype(0); +} +//# Clamp to the data and sample from the hidden units. +// # (This is the "positive CD phase", aka the reality phase.) +// pos_hidden_activations = np.dot(data, self.weights) +// pos_hidden_probs = self._logistic(pos_hidden_activations) +// pos_hidden_states = pos_hidden_probs > np.random.rand(num_examples, self.num_hidden + 1) +// # Note that we're using the activation *probabilities* of the hidden states, not the hidden states +// # themselves, when computing associations. We could also use the states; see section 3 of Hinton's +// # "A Practical Guide to Training Restricted Boltzmann Machines" for more. +// pos_associations = np.dot(data.T, pos_hidden_probs) +// +// # Reconstruct the visible units and sample again from the hidden units. +// # (This is the "negative CD phase", aka the daydreaming phase.) +// neg_visible_activations = np.dot(pos_hidden_states, self.weights.T) +// neg_visible_probs = self._logistic(neg_visible_activations) +// neg_visible_probs[:,0] = 1 # Fix the bias unit. +// neg_hidden_activations = np.dot(neg_visible_probs, self.weights) +// neg_hidden_probs = self._logistic(neg_hidden_activations) +// # Note, again, that we're using the activation *probabilities* when computing associations, not the states +// # themselves. +// neg_associations = np.dot(neg_visible_probs.T, neg_hidden_probs) + +template +void RBMLayer::Backward_cpu(const vector*>& top, + const bool propagate_down, + vector*>* bottom) { + int num = top[0]->num(); + const Dtype* pos_hidden_states_data = pos_hidden_states_->cpu_diff(); + Dtype* neg_visible_activations_data = + neg_visible_activations_->mutable_cpu_data(); + const Dtype* visible_hidden_weight_data = visible_hidden_weight_->cpu_data(); + caffe_cpu_gemm( + CblasNoTrans, CblasTrans, num, hidden_dim_, visible_dim_, (Dtype) 1., + pos_hidden_states_data, visible_hidden_weight_data, (Dtype) 0., + neg_visible_activations_data); + Dtype* neg_visible_probs_data = neg_visible_probs_->mutable_cpu_data(); + caffe_cpu_sigmoid(neg_visible_activations_->count(), + neg_visible_activations_data, + neg_visible_probs_data); + Dtype* neg_hidden_activations_data = + neg_hidden_activations_->mutable_cpu_data(); + caffe_cpu_gemm( + CblasNoTrans, CblasNoTrans, num, hidden_dim_, visible_dim_, (Dtype) 1., + neg_visible_probs_data, visible_hidden_weight_data, (Dtype) 0., + neg_hidden_activations_data); + Dtype* neg_hidden_probs_data = neg_hidden_probs_->mutable_cpu_data(); + caffe_cpu_sigmoid(neg_hidden_activations_->count(), + neg_hidden_activations_data, + neg_hidden_probs_data); + Dtype* neg_associations_data = neg_associations_->mutable_cpu_data(); + caffe_cpu_gemm( + CblasTrans, CblasNoTrans, visible_dim_, hidden_dim_, num, (Dtype) 1., + neg_visible_probs_data, neg_hidden_probs_data, (Dtype) 0., + neg_associations_data); +} + +INSTANTIATE_CLASS(RBMLayer); + +} // namespace caffe diff --git a/src/caffe/layers/rbm_layer.cu b/src/caffe/layers/rbm_layer.cu new file mode 100644 index 00000000000..af23b7c4c13 --- /dev/null +++ b/src/caffe/layers/rbm_layer.cu @@ -0,0 +1,28 @@ +// Copyright 2014 BVLC and contributors. + +#include + +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/filler.hpp" +#include "caffe/layer.hpp" +#include "caffe/vision_layers.hpp" +#include "caffe/util/math_functions.hpp" + +namespace caffe { + +template +Dtype RBMLayer::Forward_gpu(const vector*>& bottom, + vector*>* top) { + return Dtype(0); +} + +template +void RBMLayer::Backward_gpu(const vector*>& top, + const bool propagate_down, + vector*>* bottom) { +} + +INSTANTIATE_CLASS(RBMLayer); + +} // namespace caffe diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index 1a31109f784..e1f75225a48 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -129,6 +129,7 @@ message LayerParameter { SPLIT = 22; TANH = 23; WINDOW_DATA = 24; + RBM = 10001; } optional LayerType type = 5; // the layer type from the enum above @@ -153,6 +154,7 @@ message LayerParameter { optional LRNParameter lrn_param = 18; optional PoolingParameter pooling_param = 19; optional WindowDataParameter window_data_param = 20; + optional RBMParameter rbm_param = 10001; // DEPRECATED: The layer parameters specified as a V0LayerParameter. // This should never be used by any code except to upgrade to the new @@ -280,6 +282,12 @@ message PoolingParameter { optional uint32 stride = 3 [default = 1]; // The stride } +// Message that stores parameters used by RBMLayer +message RBMParameter { + optional uint32 hidden_dim = 1; // The dim of hidden units for the layer + optional FillerParameter weight_filler = 2; // The filler for the weight +} + // Message that stores parameters used by WindowDataLayer message WindowDataParameter { // Specify the data source. diff --git a/src/caffe/test/test_rbm_layer.cpp b/src/caffe/test/test_rbm_layer.cpp new file mode 100644 index 00000000000..2d07134500a --- /dev/null +++ b/src/caffe/test/test_rbm_layer.cpp @@ -0,0 +1,122 @@ +// Copyright 2014 BVLC and contributors. + +#include +#include + +#include "cuda_runtime.h" +#include "gtest/gtest.h" +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/filler.hpp" +#include "caffe/vision_layers.hpp" +#include "caffe/test/test_gradient_check_util.hpp" + +#include "caffe/test/test_caffe_main.hpp" + +namespace caffe { + +extern cudaDeviceProp CAFFE_TEST_CUDA_PROP; + +template +class RBMLayerTest : public ::testing::Test { + protected: + RBMLayerTest() + : blob_bottom_(new Blob(2, 3, 4, 5)), + blob_top_(new Blob()) { + // fill the values + FillerParameter filler_param; + UniformFiller filler(filler_param); + filler.Fill(this->blob_bottom_); + blob_bottom_vec_.push_back(blob_bottom_); + blob_top_vec_.push_back(blob_top_); + } + virtual ~RBMLayerTest() { delete blob_bottom_; delete blob_top_; } + Blob* const blob_bottom_; + Blob* const blob_top_; + vector*> blob_bottom_vec_; + vector*> blob_top_vec_; +}; + +typedef ::testing::Types Dtypes; +TYPED_TEST_CASE(RBMLayerTest, Dtypes); + +TYPED_TEST(RBMLayerTest, TestSetUp) { + LayerParameter layer_param; + RBMParameter* rbm_param = layer_param.mutable_rbm_param(); + rbm_param->set_hidden_dim(10); + shared_ptr > layer( + new InnerProductLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_)); + EXPECT_EQ(this->blob_top_->num(), 2); + EXPECT_EQ(this->blob_top_->height(), 1); + EXPECT_EQ(this->blob_top_->width(), 1); + EXPECT_EQ(this->blob_top_->channels(), 10); +} + +TYPED_TEST(RBMLayerTest, TestCPU) { + LayerParameter layer_param; + RBMParameter* rbm_param = layer_param.mutable_rbm_param(); + Caffe::set_mode(Caffe::CPU); + rbm_param->set_hidden_dim(10); + rbm_param->mutable_weight_filler()->set_type("uniform"); + shared_ptr > layer( + new InnerProductLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_)); + layer->Forward(this->blob_bottom_vec_, &(this->blob_top_vec_)); + const TypeParam* data = this->blob_top_->cpu_data(); + const int count = this->blob_top_->count(); + for (int i = 0; i < count; ++i) { + EXPECT_GE(data[i], 1.); + } +} + +TYPED_TEST(RBMLayerTest, TestGPU) { + if (sizeof(TypeParam) == 4 || CAFFE_TEST_CUDA_PROP.major >= 2) { + LayerParameter layer_param; + RBMParameter* rbm_param = layer_param.mutable_rbm_param(); + Caffe::set_mode(Caffe::GPU); + rbm_param->set_hidden_dim(10); + rbm_param->mutable_weight_filler()->set_type("uniform"); + shared_ptr > layer( + new InnerProductLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_)); + layer->Forward(this->blob_bottom_vec_, &(this->blob_top_vec_)); + const TypeParam* data = this->blob_top_->cpu_data(); + const int count = this->blob_top_->count(); + for (int i = 0; i < count; ++i) { + EXPECT_GE(data[i], 1.); + } + } else { + LOG(ERROR) << "Skipping test due to old architecture."; + } +} + +TYPED_TEST(RBMLayerTest, TestCPUGradient) { + LayerParameter layer_param; + RBMParameter* rbm_param = layer_param.mutable_rbm_param(); + Caffe::set_mode(Caffe::CPU); + rbm_param->set_hidden_dim(10); + rbm_param->mutable_weight_filler()->set_type("gaussian"); + InnerProductLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(&layer, &(this->blob_bottom_vec_), + &(this->blob_top_vec_)); +} + +TYPED_TEST(RBMLayerTest, TestGPUGradient) { + if (sizeof(TypeParam) == 4 || CAFFE_TEST_CUDA_PROP.major >= 2) { + LayerParameter layer_param; + RBMParameter* rbm_param = layer_param.mutable_rbm_param(); + Caffe::set_mode(Caffe::GPU); + rbm_param->set_hidden_dim(10); + rbm_param->mutable_weight_filler()->set_type("gaussian"); + InnerProductLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-2); + checker.CheckGradient(&layer, &(this->blob_bottom_vec_), + &(this->blob_top_vec_)); + } else { + LOG(ERROR) << "Skipping test due to old architecture."; + } +} + +} // namespace caffe diff --git a/src/caffe/util/math_functions.cpp b/src/caffe/util/math_functions.cpp index 80e420f5689..ba80f331ede 100644 --- a/src/caffe/util/math_functions.cpp +++ b/src/caffe/util/math_functions.cpp @@ -4,6 +4,7 @@ #include #include +#include // std::exp for sigmoid #include #include "caffe/common.hpp" @@ -442,4 +443,16 @@ void caffe_gpu_scale(const int n, const double alpha, const double *x, CUBLAS_CHECK(cublasDscal(Caffe::cublas_handle(), n, &alpha, y, 1)); } +template +void caffe_cpu_sigmoid(const int n, const Dtype* x, Dtype* y) { + for (int i = 0; i < n; ++i) { + y[i] = 1.0 / (1 + std::exp(-x[i])); + } +} + +template +void caffe_cpu_sigmoid(const int n, const float* x, float* y); +template +void caffe_cpu_sigmoid(const int n, const double* x, double* y); + } // namespace caffe diff --git a/src/caffe/util/math_functions.cu b/src/caffe/util/math_functions.cu index 82524cb412c..846db584d03 100644 --- a/src/caffe/util/math_functions.cu +++ b/src/caffe/util/math_functions.cu @@ -1,6 +1,6 @@ // Copyright 2014 BVLC and contributors. -#include // CUDA's, not caffe's, for fabs, signbit +#include // CUDA's, not caffe's, for fabs, signbit, exp(f) #include #include #include @@ -39,4 +39,34 @@ DEFINE_AND_INSTANTIATE_GPU_UNARY_FUNC(sign, y[index] = (Dtype(0) < x[index]) DEFINE_AND_INSTANTIATE_GPU_UNARY_FUNC(sgnbit, y[index] = signbit(x[index])); DEFINE_AND_INSTANTIATE_GPU_UNARY_FUNC(fabs, y[index] = fabs(x[index])); + +template +__global__ void sigmoid_kernel(const int n, const Dtype* x, Dtype* y); + +template <> +__global__ void sigmoid_kernel(const int n, const float* x, float* y) { + CUDA_KERNEL_LOOP(index, n) { + y[index] = 1.0 / (1 + expf(-x[index])); + } +} + +template <> +__global__ void sigmoid_kernel(const int n, const double* x, double* y) { + CUDA_KERNEL_LOOP(index, n) { + y[index] = 1.0 / (1 + exp(-x[index])); + } +} + +template +void caffe_gpu_sigmoid(const int n, const Dtype* x, Dtype* y) { + // NOLINT_NEXT_LINE(whitespace/operators) + sigmoid_kernel<<>>( + n, x, y); +} + +template +void caffe_gpu_sigmoid(const int n, const float* x, float* y); +template +void caffe_gpu_sigmoid(const int n, const double* x, double* y); + } // namespace caffe