From 734e87e55b00418aed0fac5a879b2704d62cf3ab Mon Sep 17 00:00:00 2001 From: yangyaming Date: Fri, 15 Dec 2017 20:08:55 +0800 Subject: [PATCH 1/6] Add python wrapper for lstm unit op. --- doc/api/v2/fluid/layers.rst | 11 +- python/paddle/v2/fluid/layers/nn.py | 112 +++++++++++++++++++- python/paddle/v2/fluid/tests/test_layers.py | 17 +++ 3 files changed, 132 insertions(+), 8 deletions(-) diff --git a/doc/api/v2/fluid/layers.rst b/doc/api/v2/fluid/layers.rst index 89e5fec13bf906..0ab36402fa5acd 100644 --- a/doc/api/v2/fluid/layers.rst +++ b/doc/api/v2/fluid/layers.rst @@ -188,12 +188,6 @@ beam_search_decode :noindex: -lstm ---------- -.. autofunction:: paddle.v2.fluid.layers.lstm - :noindex: - - lod_rank_table --------- .. autofunction:: paddle.v2.fluid.layers.lod_rank_table @@ -300,3 +294,8 @@ conv2d_transpose .. autofunction:: paddle.v2.fluid.layers.conv2d_transpose :noindex: + +lstm_unit +--------- +.. autofunction:: paddle.v2.fluid.layers.lstm_unit + :noindex: diff --git a/python/paddle/v2/fluid/layers/nn.py b/python/paddle/v2/fluid/layers/nn.py index bad7dbd84e8810..84e62d988ce9db 100644 --- a/python/paddle/v2/fluid/layers/nn.py +++ b/python/paddle/v2/fluid/layers/nn.py @@ -5,12 +5,13 @@ from ..layer_helper import LayerHelper from ..initializer import Normal, Constant from ..framework import Variable +from tensor import concat __all__ = [ 'fc', 'embedding', 'dynamic_lstm', 'gru_unit', 'linear_chain_crf', 'crf_decoding', 'cos_sim', 'cross_entropy', 'square_error_cost', 'accuracy', 'chunk_eval', 'sequence_conv', 'conv2d', 'sequence_pool', 'pool2d', - 'batch_norm', 'beam_search_decode', 'conv2d_transpose' + 'batch_norm', 'beam_search_decode', 'conv2d_transpose', 'lstm_unit' ] @@ -392,7 +393,7 @@ def chunk_eval(input, excluded_chunk_types=None, **kwargs): """ - This function computes and outputs the precision, recall and + This function computes and outputs the precision, recall and F1-score of chunk detection. """ helper = LayerHelper("chunk_eval", **kwargs) @@ -789,3 +790,110 @@ def conv2d_transpose(input, attrs=op_attr) return out + + +def lstm_unit(x_t, + hidden_t_prev, + cell_t_prev, + forget_bias=0.0, + main_program=None, + startup_program=None): + """Lstm unit layer. The equation of a lstm step is: + + .. math:: + + i_t & = \sigma(W_{x_i}x_{t} + W_{h_i}h_{t-1} + W_{c_i}c_{t-1} + b_i) + + f_t & = \sigma(W_{x_f}x_{t} + W_{h_f}h_{t-1} + W_{c_f}c_{t-1} + b_f) + + c_t & = f_tc_{t-1} + i_t tanh (W_{x_c}x_t+W_{h_c}h_{t-1} + b_c) + + o_t & = \sigma(W_{x_o}x_{t} + W_{h_o}h_{t-1} + W_{c_o}c_t + b_o) + + h_t & = o_t tanh(c_t) + + The inputs of lstm unit includes :math:`x_t`, :math:`h_{t-1}` and + :math:`c_{t-1}`. The implementation separates the linear transformation + and non-linear transformation apart. Here, we take :math:`i_t` as an + example. The linear transformation is applied by calling a `fc` layer and + the equation is: + + .. math:: + + L_{i_t} = W_{x_i}x_{t} + W_{h_i}h_{t-1} + W_{c_i}c_{t-1} + b_i + + The non-linear transformation is applied by calling `lstm_unit_op` and the + equation is: + + .. math:: + + i_t = \sigma(L_{i_t}) + + This layer has two outputs including :math:`o_t` and :math:`h_t`. + + Args: + x_t (Variable): The input value of current step. + hidden_t_prev (Variable): The hidden value of lstm unit. + cell_t_prev (Variable): The cell value of lstm unit. + forget_bias (float): The forget bias of lstm unit. + main_program (Program): The main program. + startup_program (Program): the startup program. + + Returns: + tuple: The cell value and hidden value of lstm unit. + + Raises: + ValueError: The ranks of **x_t**, **hidden_t_prev** and **cell_t_prev**\ + not be 2 or the 1st dimensions of **x_t**, **hidden_t_prev** \ + and **cell_t_prev** not be the same. + + Examples: + + .. code-block:: python + + x_t = fluid.layers.fc(input=x_t_data, size=10) + prev_hidden = fluid.layers.fc(input=prev_hidden_data, size=20) + prev_cell = fluid.layers.fc(input=prev_cell_data, size=30) + cell_value, hidden_value = fluid.layers.lstm_unit(x_t=x_t, + hidden_t_prev=prev_hidden, + cell_t_prev=prev_cell) + """ + helper = LayerHelper('lstm_unit', **locals()) + + if len(x_t.shape) != 2: + raise ValueError("Rank of x_t must be 2.") + + if len(hidden_t_prev.shape) != 2: + raise ValueError("Rank of hidden_t_prev must be 2.") + + if len(cell_t_prev.shape) != 2: + raise ValueError("Rank of cell_t_prev must be 2.") + + if x_t.shape[0] != hidden_t_prev.shape[0] or x_t.shape[ + 0] != cell_t_prev.shape[0]: + raise ValueError("The 1s dimension of x_t, hidden_t_prev and " + "cell_t_prev must be the same.") + + size = cell_t_prev.shape[1] + concat_out = concat( + input=[x_t, hidden_t_prev], + axis=1, + main_program=main_program, + startup_program=startup_program) + fc_out = fc(input=concat_out, + size=4 * size, + main_program=main_program, + startup_program=startup_program) + dtype = x_t.dtype + c = helper.create_tmp_variable(dtype) + h = helper.create_tmp_variable(dtype) + + helper.append_op( + type='lstm_unit', + inputs={"X": fc_out, + "C_prev": cell_t_prev}, + outputs={"C": c, + "H": h}, + attrs={"forget_bias": forget_bias}) + + return c, h diff --git a/python/paddle/v2/fluid/tests/test_layers.py b/python/paddle/v2/fluid/tests/test_layers.py index 9b88080158139f..468bd41285526c 100644 --- a/python/paddle/v2/fluid/tests/test_layers.py +++ b/python/paddle/v2/fluid/tests/test_layers.py @@ -161,6 +161,23 @@ def test_sigmoid_cross_entropy(self): x=dat, label=lbl)) print(str(program)) + def test_lstm_unit(self): + program = Program() + with program_guard(program): + x_t_data = layers.data( + name='x_t_data', shape=[10, 10], dtype='float32') + x_t = layers.fc(input=x_t_data, size=10) + prev_hidden_data = layers.data( + name='prev_hidden_data', shape=[10, 20], dtype='float32') + prev_hidden = layers.fc(input=prev_hidden_data, size=20) + prev_cell_data = layers.data( + name='prev_cell', shape=[10, 30], dtype='float32') + prev_cell = layers.fc(input=prev_cell_data, size=30) + self.assertIsNotNone( + layers.lstm_unit( + x_t=x_t, hidden_t_prev=prev_hidden, cell_t_prev=prev_cell)) + print(str(program)) + if __name__ == '__main__': unittest.main() From a398e25d6ac786e14aa18be79438b8d2d1b191d0 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Mon, 18 Dec 2017 20:09:36 +0800 Subject: [PATCH 2/6] Expose param_attr and bias_attr. --- paddle/operators/lstm_unit_op.cc | 5 ++++- python/paddle/v2/fluid/layers/nn.py | 9 +++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/paddle/operators/lstm_unit_op.cc b/paddle/operators/lstm_unit_op.cc index 18b9cdf2a39e82..b6eb33bafe5054 100644 --- a/paddle/operators/lstm_unit_op.cc +++ b/paddle/operators/lstm_unit_op.cc @@ -51,7 +51,10 @@ class LstmUnitOpMaker : public framework::OpProtoAndCheckerMaker { LstmUnitOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "FC input before the non-linear activation."); + AddInput("X", + "Lstm unit only applies non-linear activations, please make sure" + "that linear tranformation has already been applied to `X`. " + "Linear tranformation can be applied by adding a `fc` layer"); AddInput( "C_prev", "The cell state tensor of last time-step in the Lstm Unit operator."); diff --git a/python/paddle/v2/fluid/layers/nn.py b/python/paddle/v2/fluid/layers/nn.py index 84e62d988ce9db..1c101c62c2dc4c 100644 --- a/python/paddle/v2/fluid/layers/nn.py +++ b/python/paddle/v2/fluid/layers/nn.py @@ -5,6 +5,7 @@ from ..layer_helper import LayerHelper from ..initializer import Normal, Constant from ..framework import Variable +from ..param_attr import ParamAttr from tensor import concat __all__ = [ @@ -796,6 +797,8 @@ def lstm_unit(x_t, hidden_t_prev, cell_t_prev, forget_bias=0.0, + param_attr=None, + bias_attr=ParamAttr(), main_program=None, startup_program=None): """Lstm unit layer. The equation of a lstm step is: @@ -836,6 +839,10 @@ def lstm_unit(x_t, hidden_t_prev (Variable): The hidden value of lstm unit. cell_t_prev (Variable): The cell value of lstm unit. forget_bias (float): The forget bias of lstm unit. + param_attr (ParamAttr): The attributes of parameter weights, used to set + initializer, name etc. + bias_attr (ParamAttr): The attributes of bias weights, used to set + initializer, name etc. main_program (Program): The main program. startup_program (Program): the startup program. @@ -882,6 +889,8 @@ def lstm_unit(x_t, startup_program=startup_program) fc_out = fc(input=concat_out, size=4 * size, + param_attr=param_attr, + bias_attr=bias_attr, main_program=main_program, startup_program=startup_program) dtype = x_t.dtype From 58d6946c874bbe539ace4fde05e7fb4693f30ca1 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Tue, 19 Dec 2017 11:03:20 +0800 Subject: [PATCH 3/6] Set the act to 'linear'. --- python/paddle/v2/fluid/layers/nn.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/paddle/v2/fluid/layers/nn.py b/python/paddle/v2/fluid/layers/nn.py index 1c101c62c2dc4c..ab443826bd7b44 100644 --- a/python/paddle/v2/fluid/layers/nn.py +++ b/python/paddle/v2/fluid/layers/nn.py @@ -891,6 +891,7 @@ def lstm_unit(x_t, size=4 * size, param_attr=param_attr, bias_attr=bias_attr, + act='linear', main_program=main_program, startup_program=startup_program) dtype = x_t.dtype From d993a4f58b7e2be4a76fda406e964229edff2dcb Mon Sep 17 00:00:00 2001 From: yangyaming Date: Tue, 19 Dec 2017 11:19:24 +0800 Subject: [PATCH 4/6] Change default value for bias_attr. --- python/paddle/v2/fluid/layers/nn.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/python/paddle/v2/fluid/layers/nn.py b/python/paddle/v2/fluid/layers/nn.py index 9728adba735d99..31a0a312dbe12f 100644 --- a/python/paddle/v2/fluid/layers/nn.py +++ b/python/paddle/v2/fluid/layers/nn.py @@ -866,7 +866,7 @@ def lstm_unit(x_t, cell_t_prev, forget_bias=0.0, param_attr=None, - bias_attr=ParamAttr(), + bias_attr=None, main_program=None, startup_program=None): """Lstm unit layer. The equation of a lstm step is: @@ -909,8 +909,8 @@ def lstm_unit(x_t, forget_bias (float): The forget bias of lstm unit. param_attr (ParamAttr): The attributes of parameter weights, used to set initializer, name etc. - bias_attr (ParamAttr): The attributes of bias weights, used to set - initializer, name etc. + bias_attr (ParamAttr): The attributes of bias weights, if not False, + bias weights will be created and be set to default value. main_program (Program): The main program. startup_program (Program): the startup program. @@ -949,6 +949,9 @@ def lstm_unit(x_t, raise ValueError("The 1s dimension of x_t, hidden_t_prev and " "cell_t_prev must be the same.") + if bias_attr is None: + bias_attr = ParamAttr() + size = cell_t_prev.shape[1] concat_out = concat( input=[x_t, hidden_t_prev], From 9ee9fefd2de46f2383309f489033fc6d94cd8628 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Tue, 19 Dec 2017 11:27:35 +0800 Subject: [PATCH 5/6] Change the return order to h, c. --- python/paddle/v2/fluid/layers/nn.py | 8 ++++---- python/paddle/v2/fluid/tests/test_layers.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/python/paddle/v2/fluid/layers/nn.py b/python/paddle/v2/fluid/layers/nn.py index 31a0a312dbe12f..dd6bb54599af74 100644 --- a/python/paddle/v2/fluid/layers/nn.py +++ b/python/paddle/v2/fluid/layers/nn.py @@ -900,7 +900,7 @@ def lstm_unit(x_t, i_t = \sigma(L_{i_t}) - This layer has two outputs including :math:`o_t` and :math:`h_t`. + This layer has two outputs including :math:`h_t` and :math:`o_t`. Args: x_t (Variable): The input value of current step. @@ -915,7 +915,7 @@ def lstm_unit(x_t, startup_program (Program): the startup program. Returns: - tuple: The cell value and hidden value of lstm unit. + tuple: The hidden value and cell value of lstm unit. Raises: ValueError: The ranks of **x_t**, **hidden_t_prev** and **cell_t_prev**\ @@ -929,7 +929,7 @@ def lstm_unit(x_t, x_t = fluid.layers.fc(input=x_t_data, size=10) prev_hidden = fluid.layers.fc(input=prev_hidden_data, size=20) prev_cell = fluid.layers.fc(input=prev_cell_data, size=30) - cell_value, hidden_value = fluid.layers.lstm_unit(x_t=x_t, + hidden_value, cell_value = fluid.layers.lstm_unit(x_t=x_t, hidden_t_prev=prev_hidden, cell_t_prev=prev_cell) """ @@ -977,4 +977,4 @@ def lstm_unit(x_t, "H": h}, attrs={"forget_bias": forget_bias}) - return c, h + return h, c diff --git a/python/paddle/v2/fluid/tests/test_layers.py b/python/paddle/v2/fluid/tests/test_layers.py index 7b56ae464c633d..d4a95bf6fc98fa 100644 --- a/python/paddle/v2/fluid/tests/test_layers.py +++ b/python/paddle/v2/fluid/tests/test_layers.py @@ -161,7 +161,7 @@ def test_sigmoid_cross_entropy(self): x=dat, label=lbl)) print(str(program)) - def test_seq_expand(self): + def test_sequence_expand(self): program = Program() with program_guard(program): x = layers.data(name='x', shape=[10], dtype='float32') From 9573256f9d802dfe1daf9f6887044931ff03f636 Mon Sep 17 00:00:00 2001 From: yangyaming Date: Tue, 19 Dec 2017 13:24:12 +0800 Subject: [PATCH 6/6] Remove main_program and startup_program. --- python/paddle/v2/fluid/layers/nn.py | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/python/paddle/v2/fluid/layers/nn.py b/python/paddle/v2/fluid/layers/nn.py index 1d03f357eb2394..2c38c232240fbe 100644 --- a/python/paddle/v2/fluid/layers/nn.py +++ b/python/paddle/v2/fluid/layers/nn.py @@ -764,7 +764,7 @@ def conv2d_transpose(input, return out -def sequence_expand(x, y, main_program=None, startup_program=None): +def sequence_expand(x, y): """Sequence Expand Layer. This layer will expand the input variable **x** according to LoD information of **y**. And the following examples will explain how sequence_expand works: @@ -808,8 +808,6 @@ def sequence_expand(x, y, main_program=None, startup_program=None): Args: x (Variable): The input variable which is a Tensor or LoDTensor. y (Variable): The input variable which is a LoDTensor. - main_program (Program): The main program. - startup_program (Program): The startup program. Returns: Variable: The expanded variable which is a LoDTensor. @@ -836,9 +834,7 @@ def lstm_unit(x_t, cell_t_prev, forget_bias=0.0, param_attr=None, - bias_attr=None, - main_program=None, - startup_program=None): + bias_attr=None): """Lstm unit layer. The equation of a lstm step is: .. math:: @@ -881,8 +877,6 @@ def lstm_unit(x_t, initializer, name etc. bias_attr (ParamAttr): The attributes of bias weights, if not False, bias weights will be created and be set to default value. - main_program (Program): The main program. - startup_program (Program): the startup program. Returns: tuple: The hidden value and cell value of lstm unit. @@ -923,18 +917,11 @@ def lstm_unit(x_t, bias_attr = ParamAttr() size = cell_t_prev.shape[1] - concat_out = concat( - input=[x_t, hidden_t_prev], - axis=1, - main_program=main_program, - startup_program=startup_program) + concat_out = concat(input=[x_t, hidden_t_prev], axis=1) fc_out = fc(input=concat_out, size=4 * size, param_attr=param_attr, - bias_attr=bias_attr, - act='linear', - main_program=main_program, - startup_program=startup_program) + bias_attr=bias_attr) dtype = x_t.dtype c = helper.create_tmp_variable(dtype) h = helper.create_tmp_variable(dtype)