Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add python wrapper for lstm unit op. #6669

Merged
merged 9 commits into from
Dec 19, 2017
13 changes: 7 additions & 6 deletions doc/api/v2/fluid/layers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -188,12 +188,6 @@ beam_search_decode
:noindex:


lstm
---------
.. autofunction:: paddle.v2.fluid.layers.lstm
:noindex:


lod_rank_table
---------
.. autofunction:: paddle.v2.fluid.layers.lod_rank_table
Expand Down Expand Up @@ -300,7 +294,14 @@ conv2d_transpose
.. autofunction:: paddle.v2.fluid.layers.conv2d_transpose
:noindex:


sequence_expand
---------
.. autofunction:: paddle.v2.fluid.layers.sequence_expand
:noindex:


lstm_unit
---------
.. autofunction:: paddle.v2.fluid.layers.lstm_unit
:noindex:
5 changes: 4 additions & 1 deletion paddle/operators/lstm_unit_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,10 @@ class LstmUnitOpMaker : public framework::OpProtoAndCheckerMaker {
LstmUnitOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "FC input before the non-linear activation.");
AddInput("X",
"Lstm unit only applies non-linear activations, please make sure"
"that linear tranformation has already been applied to `X`. "
"Linear tranformation can be applied by adding a `fc` layer");
AddInput(
"C_prev",
"The cell state tensor of last time-step in the Lstm Unit operator.");
Expand Down
117 changes: 113 additions & 4 deletions python/paddle/v2/fluid/layers/nn.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,15 @@
from ..layer_helper import LayerHelper
from ..initializer import Normal, Constant
from ..framework import Variable
from ..param_attr import ParamAttr
from tensor import concat

__all__ = [
'fc', 'embedding', 'dynamic_lstm', 'gru_unit', 'linear_chain_crf',
'crf_decoding', 'cos_sim', 'cross_entropy', 'square_error_cost', 'accuracy',
'chunk_eval', 'sequence_conv', 'conv2d', 'sequence_pool', 'pool2d',
'batch_norm', 'beam_search_decode', 'conv2d_transpose', 'sequence_expand'
'batch_norm', 'beam_search_decode', 'conv2d_transpose', 'sequence_expand',
'lstm_unit'
]


Expand Down Expand Up @@ -761,7 +764,7 @@ def conv2d_transpose(input,
return out


def sequence_expand(x, y, main_program=None, startup_program=None):
def sequence_expand(x, y):
"""Sequence Expand Layer. This layer will expand the input variable **x**
according to LoD information of **y**. And the following examples will
explain how sequence_expand works:
Expand Down Expand Up @@ -805,8 +808,6 @@ def sequence_expand(x, y, main_program=None, startup_program=None):
Args:
x (Variable): The input variable which is a Tensor or LoDTensor.
y (Variable): The input variable which is a LoDTensor.
main_program (Program): The main program.
startup_program (Program): The startup program.

Returns:
Variable: The expanded variable which is a LoDTensor.
Expand All @@ -826,3 +827,111 @@ def sequence_expand(x, y, main_program=None, startup_program=None):
type='sequence_expand', inputs={'X': x,
'Y': y}, outputs={'Out': tmp})
return tmp


def lstm_unit(x_t,
hidden_t_prev,
cell_t_prev,
forget_bias=0.0,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add ParamAttr for fc's weight and bias

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

param_attr=None,
bias_attr=None):
"""Lstm unit layer. The equation of a lstm step is:

.. math::

i_t & = \sigma(W_{x_i}x_{t} + W_{h_i}h_{t-1} + W_{c_i}c_{t-1} + b_i)

f_t & = \sigma(W_{x_f}x_{t} + W_{h_f}h_{t-1} + W_{c_f}c_{t-1} + b_f)

c_t & = f_tc_{t-1} + i_t tanh (W_{x_c}x_t+W_{h_c}h_{t-1} + b_c)

o_t & = \sigma(W_{x_o}x_{t} + W_{h_o}h_{t-1} + W_{c_o}c_t + b_o)

h_t & = o_t tanh(c_t)

The inputs of lstm unit includes :math:`x_t`, :math:`h_{t-1}` and
:math:`c_{t-1}`. The implementation separates the linear transformation
and non-linear transformation apart. Here, we take :math:`i_t` as an
example. The linear transformation is applied by calling a `fc` layer and
the equation is:

.. math::

L_{i_t} = W_{x_i}x_{t} + W_{h_i}h_{t-1} + W_{c_i}c_{t-1} + b_i

The non-linear transformation is applied by calling `lstm_unit_op` and the
equation is:

.. math::

i_t = \sigma(L_{i_t})

This layer has two outputs including :math:`h_t` and :math:`o_t`.

Args:
x_t (Variable): The input value of current step.
hidden_t_prev (Variable): The hidden value of lstm unit.
cell_t_prev (Variable): The cell value of lstm unit.
forget_bias (float): The forget bias of lstm unit.
param_attr (ParamAttr): The attributes of parameter weights, used to set
initializer, name etc.
bias_attr (ParamAttr): The attributes of bias weights, if not False,
bias weights will be created and be set to default value.

Returns:
tuple: The hidden value and cell value of lstm unit.

Raises:
ValueError: The ranks of **x_t**, **hidden_t_prev** and **cell_t_prev**\
not be 2 or the 1st dimensions of **x_t**, **hidden_t_prev** \
and **cell_t_prev** not be the same.

Examples:

.. code-block:: python

x_t = fluid.layers.fc(input=x_t_data, size=10)
prev_hidden = fluid.layers.fc(input=prev_hidden_data, size=20)
prev_cell = fluid.layers.fc(input=prev_cell_data, size=30)
hidden_value, cell_value = fluid.layers.lstm_unit(x_t=x_t,
hidden_t_prev=prev_hidden,
cell_t_prev=prev_cell)
"""
helper = LayerHelper('lstm_unit', **locals())

if len(x_t.shape) != 2:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should be removed, this will be check by infershape

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please remove all the shape check, if need, can add into infershape of operators.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To make the exception more accurate, I think shape checking is also necessary here.

raise ValueError("Rank of x_t must be 2.")

if len(hidden_t_prev.shape) != 2:
raise ValueError("Rank of hidden_t_prev must be 2.")

if len(cell_t_prev.shape) != 2:
raise ValueError("Rank of cell_t_prev must be 2.")

if x_t.shape[0] != hidden_t_prev.shape[0] or x_t.shape[
0] != cell_t_prev.shape[0]:
raise ValueError("The 1s dimension of x_t, hidden_t_prev and "
"cell_t_prev must be the same.")

if bias_attr is None:
bias_attr = ParamAttr()

size = cell_t_prev.shape[1]
concat_out = concat(input=[x_t, hidden_t_prev], axis=1)
fc_out = fc(input=concat_out,
size=4 * size,
param_attr=param_attr,
bias_attr=bias_attr)
dtype = x_t.dtype
c = helper.create_tmp_variable(dtype)
h = helper.create_tmp_variable(dtype)

helper.append_op(
type='lstm_unit',
inputs={"X": fc_out,
"C_prev": cell_t_prev},
outputs={"C": c,
"H": h},
attrs={"forget_bias": forget_bias})

return h, c
19 changes: 18 additions & 1 deletion python/paddle/v2/fluid/tests/test_layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ def test_sigmoid_cross_entropy(self):
x=dat, label=lbl))
print(str(program))

def test_seq_expand(self):
def test_sequence_expand(self):
program = Program()
with program_guard(program):
x = layers.data(name='x', shape=[10], dtype='float32')
Expand All @@ -170,6 +170,23 @@ def test_seq_expand(self):
self.assertIsNotNone(layers.sequence_expand(x=x, y=y))
print(str(program))

def test_lstm_unit(self):
program = Program()
with program_guard(program):
x_t_data = layers.data(
name='x_t_data', shape=[10, 10], dtype='float32')
x_t = layers.fc(input=x_t_data, size=10)
prev_hidden_data = layers.data(
name='prev_hidden_data', shape=[10, 20], dtype='float32')
prev_hidden = layers.fc(input=prev_hidden_data, size=20)
prev_cell_data = layers.data(
name='prev_cell', shape=[10, 30], dtype='float32')
prev_cell = layers.fc(input=prev_cell_data, size=30)
self.assertIsNotNone(
layers.lstm_unit(
x_t=x_t, hidden_t_prev=prev_hidden, cell_t_prev=prev_cell))
print(str(program))


if __name__ == '__main__':
unittest.main()