From 1543eeb4ce20b8173ac8a60c5e2a348b296dcf18 Mon Sep 17 00:00:00 2001 From: superjom Date: Wed, 16 Aug 2017 18:03:18 +0800 Subject: [PATCH 1/5] init --- paddle/framework/pybind.cc | 7 ++++ paddle/operators/recurrent_op.cc | 1 - .../v2/framework/tests/gradient_checker.py | 12 +++--- .../v2/framework/tests/test_recurrent_op.py | 40 ++++++++++++++++++- 4 files changed, 52 insertions(+), 8 deletions(-) diff --git a/paddle/framework/pybind.cc b/paddle/framework/pybind.cc index fe0c87bc57082..13f23a681989f 100644 --- a/paddle/framework/pybind.cc +++ b/paddle/framework/pybind.cc @@ -275,6 +275,13 @@ All parameter, weight, gradient are variables in Paddle. const std::shared_ptr &net) -> void { self.set_stepnet(net); }); + + rnn.def("backward", [](const operators::RecurrentOp &forwardOp, + const std::unordered_set &no_grad_vars) { + const auto &op = *static_cast(&forwardOp); + return Backward(op, no_grad_vars); + }); + ExposeOperator(rnn); m.def("unique_integer", UniqueIntegerGenerator); diff --git a/paddle/operators/recurrent_op.cc b/paddle/operators/recurrent_op.cc index 78ce0ba3c0fa4..8c7300a3508c4 100644 --- a/paddle/operators/recurrent_op.cc +++ b/paddle/operators/recurrent_op.cc @@ -77,7 +77,6 @@ void RecurrentAlgorithm::CreateScopes(const Scope& scope) const { // Now all variables in scope must be created outside of op. PADDLE_ENFORCE_NOT_NULL(stepnet_); PADDLE_ENFORCE(!(*stepnet_)->Outputs().empty(), "stepnet_ op has no outputs"); - PADDLE_ENFORCE(!(*stepnet_)->Outputs().empty(), "net_op has no outputs"); if (seq_len_ > step_scopes->size()) { for (size_t i = step_scopes->size(); i < seq_len_; ++i) { diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py index 501cf6110ff74..b6d8131be98d5 100644 --- a/python/paddle/v2/framework/tests/gradient_checker.py +++ b/python/paddle/v2/framework/tests/gradient_checker.py @@ -29,13 +29,13 @@ def get_numeric_gradient(op, local_scope=None): """ Get Numeric Gradient for an operator's input. - - :param op: C++ operator instance, could be an network - :param input_values: The input variables. Should be an dictionary, key is - variable name. Value is numpy array. - :param output_name: The final output variable name. + + :param op: C++ operator instance, could be an network + :param input_values: The input variables. Should be an dictionary, key is + variable name. Value is numpy array + :param output_name: The final output variable name. :param input_to_check: The input variable need to get gradient. - :param delta: The perturbation value for numeric gradient method. The + :param delta: The perturbation value for numeric gradient method. The smaller delta is, the more accurate result will get. But if that delta is too small, it could occur numerical stability problem. :param local_scope: The local scope used for get_numeric_gradient. diff --git a/python/paddle/v2/framework/tests/test_recurrent_op.py b/python/paddle/v2/framework/tests/test_recurrent_op.py index 3d4a34d8d713f..0da56c1ad9ae2 100644 --- a/python/paddle/v2/framework/tests/test_recurrent_op.py +++ b/python/paddle/v2/framework/tests/test_recurrent_op.py @@ -3,6 +3,7 @@ import unittest import numpy as np from paddle.v2.framework.op import Operator, RecurrentOp +from gradient_checker import GradientChecker def py_sigmoid(x): @@ -69,7 +70,7 @@ def create_tensor(scope, name, shape, np_data): return tensor -class TestRecurrentOp(unittest.TestCase): +class RecurrentOpTest(unittest.TestCase): ''' Test RNNOp @@ -164,5 +165,42 @@ def test_forward(self): self.assertEqual(pd_output.shape, py_output.shape) +class RecurrentGradientOpTest(unittest.TestCase): + def create_forward_op(self): + self.forward_op = RecurrentOp( + # inputs + inlinks=["x"], + boot_memories=["h_boot"], + step_net="stepnet", + # outputs + outlinks=["h"], + step_scopes="step_scopes", + # attributes + inlink_alias=["x@alias"], + outlink_alias=["h@alias"], + pre_memories=["h@pre"], + memories=["h@alias"]) + + # create a stepnet for RNN + stepnet = core.Net.create() + x_fc_op = Operator("mul", X="x@alias", Y="W", Out="Wx") + h_fc_op = Operator("mul", X="h@pre", Y="U", Out="Uh") + sum_op = Operator("add_two", X="Wx", Y="Uh", Out="sum") + sig_op = Operator("sigmoid", X="sum", Y="h@alias") + + for op in [x_fc_op, h_fc_op, sum_op, sig_op]: + stepnet.add_op(op) + stepnet.complete_add_op(True) + self.forward_op.set_stepnet(stepnet) + + def create_gradient_op(self): + a = set() + backward_op = core.RecurrentOp.backward(self.forward_op, a) + + def test_grad(self): + self.create_forward_op() + self.create_gradient_op() + + if __name__ == '__main__': unittest.main() From 0d7e4294fcc3848b798810895e78dd17a6cbdd88 Mon Sep 17 00:00:00 2001 From: superjom Date: Tue, 19 Sep 2017 14:45:38 -0400 Subject: [PATCH 2/5] remove alias --- paddle/operators/recurrent_op.cc | 18 ++----- paddle/operators/rnn/recurrent_op_utils.cc | 49 ++++++------------- paddle/operators/rnn/recurrent_op_utils.h | 21 +++----- .../v2/framework/tests/test_recurrent_op.py | 9 ++-- 4 files changed, 28 insertions(+), 69 deletions(-) diff --git a/paddle/operators/recurrent_op.cc b/paddle/operators/recurrent_op.cc index e826703c60ca8..cbf1a3458fb53 100644 --- a/paddle/operators/recurrent_op.cc +++ b/paddle/operators/recurrent_op.cc @@ -28,9 +28,7 @@ using Variable = framework::Variable; using Tensor = framework::Tensor; void RecurrentAlgorithm::InferShape(const Scope& scope) const { - seq_len_ = scope.FindVar((arg_->inlinks[0]).external) - ->GetMutable() - ->dims()[0]; + seq_len_ = scope.FindVar(arg_->inlinks[0])->GetMutable()->dims()[0]; CreateScopes(scope); auto step_scopes = GetStepScopes(scope); rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_, @@ -121,14 +119,12 @@ void RecurrentAlgorithm::InitMemories(Scope* step_scope, } const rnn::ArgumentName RecurrentOp::kArgName{ - "step_net", "step_scopes", "inlinks", - "outlinks", "inlink_alias", "outlink_alias", + "step_net", "step_scopes", "inlinks", "outlinks", "memories", "pre_memories", "boot_memories"}; const rnn::ArgumentName RecurrentGradientOp::kArgName{ - "step_net", "step_scopes", "outlink@grad", - "inlink@grad", "inlink_alias", "outlink_alias", - "memories", "pre_memories", "boot_memories@grad"}; + "step_net", "step_scopes", "outlink@grad", "inlink@grad", + "memories", "pre_memories", "boot_memories@grad"}; RecurrentOp::RecurrentOp(const std::string& type, const framework::VariableNameMap& inputs, @@ -158,8 +154,6 @@ class RecurrentAlgorithmProtoAndCheckerMaker AddOutput(name.step_scopes, "step scopes"); // Attributes stored in AttributeMap - AddAttr>(name.inlink_alias, "alias of inlinks"); - AddAttr>(name.outlink_alias, "alias of outlinks"); AddAttr>(name.pre_memories, "names of pre-memories"); AddAttr>(name.memories, "names of memories"); @@ -204,9 +198,7 @@ void RecurrentGradientAlgorithm::LinkBootMemoryGradients( } void RecurrentGradientAlgorithm::InferShape(const Scope& scope) const { - seq_len_ = scope.FindVar((arg_->inlinks[0]).external) - ->GetMutable() - ->dims()[0]; + seq_len_ = scope.FindVar(arg_->inlinks[0])->GetMutable()->dims()[0]; auto step_scopes = GetStepScopes(scope); rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_, true /*infer_shape_mode*/); diff --git a/paddle/operators/rnn/recurrent_op_utils.cc b/paddle/operators/rnn/recurrent_op_utils.cc index 97872c67ac99f..11064d5b9e07c 100644 --- a/paddle/operators/rnn/recurrent_op_utils.cc +++ b/paddle/operators/rnn/recurrent_op_utils.cc @@ -23,13 +23,13 @@ namespace f = paddle::framework; using Tensor = framework::Tensor; void SegmentInputs(const std::vector& step_scopes, - const std::vector& inlinks, const size_t seq_len, - bool infer_shape_mode) { + const std::vector& inlinks, + const size_t seq_len, bool infer_shape_mode) { PADDLE_ENFORCE(!inlinks.empty(), "no in links are provided."); for (size_t i = 0; i < inlinks.size(); ++i) { - auto input_var = step_scopes[0]->FindVar(inlinks[i].external); + auto input_var = step_scopes[0]->FindVar(inlinks[i]); PADDLE_ENFORCE(input_var != nullptr, "input link [%s] is not in scope.", - inlinks[i].external); + inlinks[i]); Tensor* input = input_var->GetMutable(); f::DDim dims = input->dims(); @@ -38,7 +38,7 @@ void SegmentInputs(const std::vector& step_scopes, f::DDim step_dims = slice_ddim(dims, 1, dims.size()); for (size_t j = 0; j < seq_len; j++) { Tensor* step_input = - step_scopes[j]->NewVar(inlinks[i].internal)->GetMutable(); + step_scopes[j]->NewVar(inlinks[i])->GetMutable(); if (!infer_shape_mode) { *step_input = input->Slice(j, j + 1); } @@ -48,18 +48,17 @@ void SegmentInputs(const std::vector& step_scopes, } void ConcatOutputs(const std::vector& step_scopes, - const std::vector& outlinks, const size_t seq_len, - bool infer_shape_mode) { + const std::vector& outlinks, + const size_t seq_len, bool infer_shape_mode) { for (size_t i = 0; i < outlinks.size(); i++) { - auto output_var = step_scopes[0]->FindVar(outlinks[i].external); + auto output_var = step_scopes[0]->FindVar(outlinks[i]); PADDLE_ENFORCE(output_var != nullptr, "output link [%s] is not in scope.", - outlinks[i].external); + outlinks[i]); Tensor* output = output_var->GetMutable(); if (infer_shape_mode) { - auto step_scope_var = step_scopes[0]->FindVar(outlinks[i].internal); - PADDLE_ENFORCE(step_scope_var != nullptr, "%s not in scope", - outlinks[i].internal); + auto step_scope_var = step_scopes[0]->FindVar(outlinks[i]); + PADDLE_ENFORCE(step_scope_var != nullptr, "%s not in scope", outlinks[i]); f::DDim step_dims = step_scope_var->template GetMutable()->dims(); std::vector dims_vec = vectorize(step_dims); dims_vec.insert(dims_vec.begin(), seq_len); @@ -68,7 +67,7 @@ void ConcatOutputs(const std::vector& step_scopes, output->mutable_data(platform::CPUPlace()); for (size_t j = 0; j < seq_len; j++) { Tensor* step_output = - step_scopes[j]->FindVar(outlinks[i].internal)->GetMutable(); + step_scopes[j]->FindVar(outlinks[i])->GetMutable(); // TODO(luotao02) data type and platform::DeviceContext() should set // correctly (output->Slice(j, j + 1)) @@ -108,29 +107,9 @@ void InitArgument(const ArgumentName& name, Argument* arg, const framework::OperatorBase& op) { arg->step_scopes = op.Output(name.step_scopes); - auto inlinks = op.Inputs(name.inlinks); - auto inlink_alias = op.Attr>(name.inlink_alias); - PADDLE_ENFORCE(inlinks.size() == inlink_alias.size(), - "the size of inlinks and inlink_alias don't match:%d,%d", - inlinks.size(), inlink_alias.size()); - for (size_t i = 0; i < inlinks.size(); ++i) { - rnn::Link link; - link.external = inlinks[i]; - link.internal = inlink_alias[i]; - (arg->inlinks).push_back(link); - } + arg->inlinks = op.Inputs(name.inlinks); - auto outlinks = op.Outputs(name.outlinks); - auto outlink_alias = op.Attr>(name.outlink_alias); - PADDLE_ENFORCE(outlinks.size() == outlink_alias.size(), - "the size of outlinks and outlink_alias don't match:%d,%d", - outlinks.size(), outlink_alias.size()); - for (size_t i = 0; i < outlinks.size(); ++i) { - rnn::Link link; - link.external = outlinks[i]; - link.internal = outlink_alias[i]; - (arg->outlinks).push_back(link); - } + arg->outlinks = op.Outputs(name.outlinks); auto boot_memories = op.Inputs(name.boot_memories); diff --git a/paddle/operators/rnn/recurrent_op_utils.h b/paddle/operators/rnn/recurrent_op_utils.h index 17941c503cfcc..7dafe5d0088c4 100644 --- a/paddle/operators/rnn/recurrent_op_utils.h +++ b/paddle/operators/rnn/recurrent_op_utils.h @@ -41,18 +41,11 @@ struct MemoryAttr { std::string boot_var; }; -struct Link { - // input or output links name. - std::string internal; - // alias to avoid duplicate keys in scopes. - std::string external; -}; - struct Argument { std::string step_net; std::string step_scopes; - std::vector inlinks; - std::vector outlinks; + std::vector inlinks; + std::vector outlinks; std::vector memories; }; @@ -61,8 +54,6 @@ struct ArgumentName { std::string step_scopes; std::string inlinks; std::string outlinks; - std::string inlink_alias; // the alias of inlinks in step net. - std::string outlink_alias; // the alias of outlinks in step net. std::string memories; // the memory name std::string pre_memories; // the previous memory name std::string boot_memories; // the boot memory name @@ -72,15 +63,15 @@ struct ArgumentName { * Prepare inputs for each step net. */ void SegmentInputs(const std::vector& step_scopes, - const std::vector& inlinks, const size_t seq_len, - bool infer_shape_mode); + const std::vector& inlinks, + const size_t seq_len, bool infer_shape_mode); /** * Process outputs of step nets and merge to variables. */ void ConcatOutputs(const std::vector& step_scopes, - const std::vector& outlinks, const size_t seq_len, - bool infer_shape_mode); + const std::vector& outlinks, + const size_t seq_len, bool infer_shape_mode); void LinkMemories(const std::vector& step_scopes, const std::vector& memories, const size_t step_id, diff --git a/python/paddle/v2/framework/tests/test_recurrent_op.py b/python/paddle/v2/framework/tests/test_recurrent_op.py index 22e680fd783ec..886f941d4dc5f 100644 --- a/python/paddle/v2/framework/tests/test_recurrent_op.py +++ b/python/paddle/v2/framework/tests/test_recurrent_op.py @@ -123,7 +123,6 @@ def create_global_variables(self): create_tensor(self.scope, "h_boot", [self.batch_size, self.input_dim], h_boot_np_data) self.scope.new_var("step_scopes") - self.scope.new_var("h@alias") self.scope.new_var("h") def create_rnn_op(self): @@ -137,17 +136,15 @@ def create_rnn_op(self): outlinks=["h"], step_scopes="step_scopes", # attributes - inlink_alias=["x@alias"], - outlink_alias=["h@alias"], pre_memories=["h@pre"], - memories=["h@alias"]) + memories=["h@mem"]) def create_step_net(self): stepnet = core.Net.create() - x_fc_op = Operator("mul", X="x@alias", Y="W", Out="Wx") + x_fc_op = Operator("mul", X="x", Y="W", Out="Wx") h_fc_op = Operator("mul", X="h@pre", Y="U", Out="Uh") sum_op = Operator("add", X="Wx", Y="Uh", Out="sum") - sig_op = Operator("sigmoid", X="sum", Y="h@alias") + sig_op = Operator("sigmoid", X="sum", Y="h@mem") for op in [x_fc_op, h_fc_op, sum_op, sig_op]: stepnet.append_op(op) From 3c29224ef32f5a994714051785610e8e710fa2b1 Mon Sep 17 00:00:00 2001 From: superjom Date: Wed, 20 Sep 2017 18:18:16 -0400 Subject: [PATCH 3/5] remove alias --- paddle/framework/scope.h | 2 ++ paddle/operators/prelu_op.h | 2 +- paddle/operators/recurrent_op.cc | 7 ++++-- paddle/operators/rnn/recurrent_op_utils.cc | 22 +++++++++---------- .../v2/framework/tests/test_recurrent_op.py | 7 +++--- 5 files changed, 22 insertions(+), 18 deletions(-) diff --git a/paddle/framework/scope.h b/paddle/framework/scope.h index 2ba3f8ed355b4..c93b03e48130a 100644 --- a/paddle/framework/scope.h +++ b/paddle/framework/scope.h @@ -58,6 +58,8 @@ class Scope { /// nullptr if cannot find. Variable* FindVar(const std::string& name) const; + const Scope& parent() const { return *parent_; } + /// Find the scope or an ancestor scope that contains the given variable. const Scope* FindScope(const Variable* var) const; diff --git a/paddle/operators/prelu_op.h b/paddle/operators/prelu_op.h index 63031c25cc357..9843c476e4a49 100644 --- a/paddle/operators/prelu_op.h +++ b/paddle/operators/prelu_op.h @@ -94,7 +94,7 @@ class PReluGradKernel : public framework::OpKernel { Transform(context.device_context(), out_ptr, out_ptr + numel, dout_ptr, dx_ptr, PReluGradFunctor(alpha_ptr)); - // TODO (Zhuoyuan): add dalpha upgrade when GPU kernels ready + // TODO(Zhuoyuan): add dalpha upgrade when GPU kernels ready } }; diff --git a/paddle/operators/recurrent_op.cc b/paddle/operators/recurrent_op.cc index fc57f0dcea9a5..ad985839f5908 100644 --- a/paddle/operators/recurrent_op.cc +++ b/paddle/operators/recurrent_op.cc @@ -29,8 +29,11 @@ using Tensor = framework::Tensor; using LoDTensor = framework::LoDTensor; void RecurrentAlgorithm::InferShape(const Scope& scope) const { - seq_len_ = - scope.FindVar(arg_->inlinks[0])->GetMutable()->dims()[0]; + auto* input0 = scope.FindVar(arg_->inlinks[0]); + PADDLE_ENFORCE_NOT_NULL(input0); + seq_len_ = input0->GetMutable()->dims()[0]; + PADDLE_ENFORCE_GT(seq_len_, 0); + CreateScopes(scope); auto step_scopes = GetStepScopes(scope); rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_, diff --git a/paddle/operators/rnn/recurrent_op_utils.cc b/paddle/operators/rnn/recurrent_op_utils.cc index 3e7e301ae3daa..ca7219b26d83e 100644 --- a/paddle/operators/rnn/recurrent_op_utils.cc +++ b/paddle/operators/rnn/recurrent_op_utils.cc @@ -28,14 +28,15 @@ void SegmentInputs(const std::vector& step_scopes, const size_t seq_len, bool infer_shape_mode) { PADDLE_ENFORCE(!inlinks.empty(), "no in links are provided."); for (size_t i = 0; i < inlinks.size(); ++i) { - auto input_var = step_scopes[0]->FindVar(inlinks[i]); - PADDLE_ENFORCE(input_var != nullptr, "input link [%s] is not in scope.", - inlinks[i]); + // global inputs + auto input_var = step_scopes[0]->parent().FindVar(inlinks[i]); + PADDLE_ENFORCE_NOT_NULL(input_var, "input link [%s] is not in scope.", + inlinks[i]); LoDTensor* input = input_var->GetMutable(); f::DDim dims = input->dims(); - PADDLE_ENFORCE(static_cast(dims[0]) == seq_len, - "all the inlinks must have same length"); + PADDLE_ENFORCE_EQ(static_cast(dims[0]), seq_len, + "all the inlinks be the same length"); f::DDim step_dims = slice_ddim(dims, 1, dims.size()); for (size_t j = 0; j < seq_len; j++) { Tensor* step_input = @@ -54,15 +55,14 @@ void ConcatOutputs(const std::vector& step_scopes, const std::vector& outlinks, const size_t seq_len, bool infer_shape_mode) { for (size_t i = 0; i < outlinks.size(); i++) { - auto output_var = step_scopes[0]->FindVar(outlinks[i]); - PADDLE_ENFORCE(output_var != nullptr, "output link [%s] is not in scope.", - outlinks[i]); + auto output_var = step_scopes[0]->parent().FindVar(outlinks[i]); + PADDLE_ENFORCE_NOT_NULL(output_var, "output link [%s] is not in scope.", + outlinks[i]); LoDTensor* output = output_var->GetMutable(); if (infer_shape_mode) { - auto step_scope_var = step_scopes[0]->FindVar(outlinks[i].internal); - PADDLE_ENFORCE(step_scope_var != nullptr, "%s not in scope", - outlinks[i].internal); + auto step_scope_var = step_scopes[0]->FindVar(outlinks[i]); + PADDLE_ENFORCE_NOT_NULL(step_scope_var, "%s not in scope", outlinks[i]); f::DDim step_dims = step_scope_var->template GetMutable()->dims(); std::vector dims_vec = vectorize(step_dims); diff --git a/python/paddle/v2/framework/tests/test_recurrent_op.py b/python/paddle/v2/framework/tests/test_recurrent_op.py index 886f941d4dc5f..79eda70021b76 100644 --- a/python/paddle/v2/framework/tests/test_recurrent_op.py +++ b/python/paddle/v2/framework/tests/test_recurrent_op.py @@ -59,7 +59,6 @@ def setUp(self): def test_forward(self): output = self.rnn.forward() - print 'output', output def create_tensor(scope, name, shape, np_data): @@ -103,7 +102,7 @@ def forward(self): ctx = core.DeviceContext.create(core.CPUPlace()) self.rnnop.infer_shape(self.scope) self.rnnop.run(self.scope, ctx) - return np.array(self.scope.find_var("h").get_tensor()) + return np.array(self.scope.find_var("h@mem").get_tensor()) def create_global_variables(self): # create inlink @@ -123,7 +122,7 @@ def create_global_variables(self): create_tensor(self.scope, "h_boot", [self.batch_size, self.input_dim], h_boot_np_data) self.scope.new_var("step_scopes") - self.scope.new_var("h") + self.scope.new_var("h@mem") def create_rnn_op(self): # create RNNOp @@ -133,7 +132,7 @@ def create_rnn_op(self): boot_memories=["h_boot"], step_net="stepnet", # outputs - outlinks=["h"], + outlinks=["h@mem"], step_scopes="step_scopes", # attributes pre_memories=["h@pre"], From 6a0c3428745603c2ae8b0749b2d6aa66bee6ed52 Mon Sep 17 00:00:00 2001 From: superjom Date: Wed, 20 Sep 2017 22:36:13 -0400 Subject: [PATCH 4/5] make RecurrentOp's backward work --- paddle/framework/operator.cc | 4 ++-- paddle/operators/recurrent_op.cc | 12 ++++++------ paddle/operators/recurrent_op.h | 5 ++++- paddle/operators/rnn/recurrent_op_utils.cc | 8 +++++--- paddle/operators/rnn/recurrent_op_utils.h | 2 +- paddle/pybind/pybind.cc | 9 --------- .../paddle/v2/framework/tests/test_recurrent_op.py | 11 +++++------ 7 files changed, 23 insertions(+), 28 deletions(-) diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc index 49509af6630ad..41992185abada 100644 --- a/paddle/framework/operator.cc +++ b/paddle/framework/operator.cc @@ -60,8 +60,8 @@ std::string OperatorBase::Output(const std::string& name) const { const std::vector& OperatorBase::Outputs( const std::string& name) const { auto it = outputs_.find(name); - PADDLE_ENFORCE(it != outputs_.end(), "Op %s does not have output %s", type_, - name); + PADDLE_ENFORCE(it != outputs_.end(), "Op %s does not have output called %s", + type_, name); return it->second; } diff --git a/paddle/operators/recurrent_op.cc b/paddle/operators/recurrent_op.cc index 494bf2707e402..e7deaf9940699 100644 --- a/paddle/operators/recurrent_op.cc +++ b/paddle/operators/recurrent_op.cc @@ -128,8 +128,8 @@ const rnn::ArgumentName RecurrentOp::kArgName{ "memories", "pre_memories", "boot_memories"}; const rnn::ArgumentName RecurrentGradientOp::kArgName{ - "step_net", "step_scopes", "outlink@grad", "inlink@grad", - "memories", "pre_memories", "boot_memories@grad"}; + "step_net", "step_scopes@GRAD", "outlinks@GRAD", "inlinks@GRAD", + "memories", "pre_memories", "boot_memories@GRAD"}; RecurrentOp::RecurrentOp(const std::string& type, const framework::VariableNameMap& inputs, @@ -225,13 +225,13 @@ RecurrentGradientOp::RecurrentGradientOp( const framework::VariableNameMap& outputs, const framework::AttributeMap& attrs) : OperatorBase(type, inputs, outputs, attrs) { - rnn::InitArgument(kArgName, &arg_, *this); + rnn::InitArgument(kArgName, &arg_, *this, true /*is grad*/); alg_.Init(&arg_, &stepnet_); } } // namespace operators } // namespace paddle -REGISTER_OP_WITHOUT_GRADIENT( - recurrent, paddle::operators::RecurrentOp, - paddle::operators::RecurrentAlgorithmProtoAndCheckerMaker); +REGISTER_OP(recurrent, paddle::operators::RecurrentOp, + paddle::operators::RecurrentAlgorithmProtoAndCheckerMaker, + recurrent_grad, paddle::operators::RecurrentGradientOp); diff --git a/paddle/operators/recurrent_op.h b/paddle/operators/recurrent_op.h index 1033d657a3a8f..ad4df9e55b91d 100644 --- a/paddle/operators/recurrent_op.h +++ b/paddle/operators/recurrent_op.h @@ -22,7 +22,7 @@ namespace paddle { namespace operators { // The sequence format in RecurrentOp is Tensor now. -// TODO(Yan Chunwei): +// TODO(Superjom) // 1. No-padding computing for sequences with indifinite length in one batch. // 2. Hierarchical RNN for sequence with sub-sequence. // 3. Internal Memory. @@ -177,6 +177,9 @@ class RecurrentGradientOp : public framework::OperatorBase { static const rnn::ArgumentName kArgName; + /* + * set a stepnet that is created according to a RecurrentOp's stepnet. + */ void set_stepnet(std::unique_ptr net) { stepnet_ = std::move(net); } diff --git a/paddle/operators/rnn/recurrent_op_utils.cc b/paddle/operators/rnn/recurrent_op_utils.cc index ca7219b26d83e..d63c86b301c72 100644 --- a/paddle/operators/rnn/recurrent_op_utils.cc +++ b/paddle/operators/rnn/recurrent_op_utils.cc @@ -109,14 +109,16 @@ void LinkMemories(const std::vector& scopes, } void InitArgument(const ArgumentName& name, Argument* arg, - const framework::OperatorBase& op) { - arg->step_scopes = op.Output(name.step_scopes); + const framework::OperatorBase& op, bool is_grad) { + arg->step_scopes = + is_grad ? op.Input(name.step_scopes) : op.Output(name.step_scopes); arg->inlinks = op.Inputs(name.inlinks); arg->outlinks = op.Outputs(name.outlinks); - auto boot_memories = op.Inputs(name.boot_memories); + auto boot_memories = + is_grad ? op.Outputs(name.boot_memories) : op.Inputs(name.boot_memories); // attributes auto memories = op.Attr>(name.memories); diff --git a/paddle/operators/rnn/recurrent_op_utils.h b/paddle/operators/rnn/recurrent_op_utils.h index 7dafe5d0088c4..9c777f1e9067a 100644 --- a/paddle/operators/rnn/recurrent_op_utils.h +++ b/paddle/operators/rnn/recurrent_op_utils.h @@ -78,7 +78,7 @@ void LinkMemories(const std::vector& step_scopes, const int offset, bool infer_shape_mode); void InitArgument(const ArgumentName& name, Argument* arg, - const framework::OperatorBase& op); + const framework::OperatorBase& op, bool is_grad = false); } // namespace rnn } // namespace operators diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 7f4bad4df05d4..c7009a604f60c 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -311,15 +311,6 @@ All parameter, weight, gradient are variables in Paddle. self.set_falsenet(net.Clone()); }); - rnn.def("backward", - [](const operators::RecurrentOp &forwardOp, - const std::unordered_set &no_grad_vars) { - const auto &op = *static_cast(&forwardOp); - return Backward(op, no_grad_vars); - }); - - ExposeOperator(rnn); - m.def("unique_integer", UniqueIntegerGenerator); m.def("is_compile_gpu", IsCompileGPU); diff --git a/python/paddle/v2/framework/tests/test_recurrent_op.py b/python/paddle/v2/framework/tests/test_recurrent_op.py index 1842cd74e4aa2..cc3d4776e26a9 100644 --- a/python/paddle/v2/framework/tests/test_recurrent_op.py +++ b/python/paddle/v2/framework/tests/test_recurrent_op.py @@ -3,7 +3,7 @@ import unittest import numpy as np from paddle.v2.framework.op import Operator, RecurrentOp -from gradient_checker import GradientChecker +from op_test import get_numeric_gradient def py_sigmoid(x): @@ -48,7 +48,7 @@ def step(self, step_id, x): else: pre_mem = self.h_boot xW = np.matmul(x, self.W) - hU = np.matmul(mem, self.U) + hU = np.matmul(pre_mem, self.U) sum = xW + hU self.mems[step_id] = py_sigmoid(sum) @@ -159,6 +159,7 @@ def test_forward(self): print print 'py_output', py_output self.assertEqual(pd_output.shape, py_output.shape) + self.assertTrue(np.isclose(pd_output, py_output, rtol=0.1).all()) class RecurrentGradientOpTest(unittest.TestCase): @@ -172,8 +173,6 @@ def create_forward_op(self): outlinks=["h"], step_scopes="step_scopes", # attributes - inlink_alias=["x@alias"], - outlink_alias=["h@alias"], pre_memories=["h@pre"], memories=["h@alias"]) @@ -181,11 +180,11 @@ def create_forward_op(self): stepnet = core.Net.create() x_fc_op = Operator("mul", X="x@alias", Y="W", Out="Wx") h_fc_op = Operator("mul", X="h@pre", Y="U", Out="Uh") - sum_op = Operator("add_two", X="Wx", Y="Uh", Out="sum") + sum_op = Operator("add", X="Wx", Y="Uh", Out="sum") sig_op = Operator("sigmoid", X="sum", Y="h@alias") for op in [x_fc_op, h_fc_op, sum_op, sig_op]: - stepnet.add_op(op) + stepnet.append_op(op) stepnet.complete_add_op(True) self.forward_op.set_stepnet(stepnet) From 903cee93033db2957991b9385b380b6163fdda89 Mon Sep 17 00:00:00 2001 From: superjom Date: Wed, 20 Sep 2017 22:52:32 -0400 Subject: [PATCH 5/5] clean code --- paddle/operators/rnn/recurrent_op_utils.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/paddle/operators/rnn/recurrent_op_utils.cc b/paddle/operators/rnn/recurrent_op_utils.cc index d63c86b301c72..a767009d2366e 100644 --- a/paddle/operators/rnn/recurrent_op_utils.cc +++ b/paddle/operators/rnn/recurrent_op_utils.cc @@ -112,14 +112,11 @@ void InitArgument(const ArgumentName& name, Argument* arg, const framework::OperatorBase& op, bool is_grad) { arg->step_scopes = is_grad ? op.Input(name.step_scopes) : op.Output(name.step_scopes); - arg->inlinks = op.Inputs(name.inlinks); - arg->outlinks = op.Outputs(name.outlinks); auto boot_memories = is_grad ? op.Outputs(name.boot_memories) : op.Inputs(name.boot_memories); - // attributes auto memories = op.Attr>(name.memories); auto pre_memories = op.Attr>(name.pre_memories);