-
Notifications
You must be signed in to change notification settings - Fork 5.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add python wrapper for lstm unit op. #6669
Changes from all commits
734e87e
ed56ed9
a398e25
58d6946
69072ef
d993a4f
9ee9fef
2a058b1
9573256
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,12 +5,15 @@ | |
from ..layer_helper import LayerHelper | ||
from ..initializer import Normal, Constant | ||
from ..framework import Variable | ||
from ..param_attr import ParamAttr | ||
from tensor import concat | ||
|
||
__all__ = [ | ||
'fc', 'embedding', 'dynamic_lstm', 'gru_unit', 'linear_chain_crf', | ||
'crf_decoding', 'cos_sim', 'cross_entropy', 'square_error_cost', 'accuracy', | ||
'chunk_eval', 'sequence_conv', 'conv2d', 'sequence_pool', 'pool2d', | ||
'batch_norm', 'beam_search_decode', 'conv2d_transpose', 'sequence_expand' | ||
'batch_norm', 'beam_search_decode', 'conv2d_transpose', 'sequence_expand', | ||
'lstm_unit' | ||
] | ||
|
||
|
||
|
@@ -761,7 +764,7 @@ def conv2d_transpose(input, | |
return out | ||
|
||
|
||
def sequence_expand(x, y, main_program=None, startup_program=None): | ||
def sequence_expand(x, y): | ||
"""Sequence Expand Layer. This layer will expand the input variable **x** | ||
according to LoD information of **y**. And the following examples will | ||
explain how sequence_expand works: | ||
|
@@ -805,8 +808,6 @@ def sequence_expand(x, y, main_program=None, startup_program=None): | |
Args: | ||
x (Variable): The input variable which is a Tensor or LoDTensor. | ||
y (Variable): The input variable which is a LoDTensor. | ||
main_program (Program): The main program. | ||
startup_program (Program): The startup program. | ||
|
||
Returns: | ||
Variable: The expanded variable which is a LoDTensor. | ||
|
@@ -826,3 +827,111 @@ def sequence_expand(x, y, main_program=None, startup_program=None): | |
type='sequence_expand', inputs={'X': x, | ||
'Y': y}, outputs={'Out': tmp}) | ||
return tmp | ||
|
||
|
||
def lstm_unit(x_t, | ||
hidden_t_prev, | ||
cell_t_prev, | ||
forget_bias=0.0, | ||
param_attr=None, | ||
bias_attr=None): | ||
"""Lstm unit layer. The equation of a lstm step is: | ||
|
||
.. math:: | ||
|
||
i_t & = \sigma(W_{x_i}x_{t} + W_{h_i}h_{t-1} + W_{c_i}c_{t-1} + b_i) | ||
|
||
f_t & = \sigma(W_{x_f}x_{t} + W_{h_f}h_{t-1} + W_{c_f}c_{t-1} + b_f) | ||
|
||
c_t & = f_tc_{t-1} + i_t tanh (W_{x_c}x_t+W_{h_c}h_{t-1} + b_c) | ||
|
||
o_t & = \sigma(W_{x_o}x_{t} + W_{h_o}h_{t-1} + W_{c_o}c_t + b_o) | ||
|
||
h_t & = o_t tanh(c_t) | ||
|
||
The inputs of lstm unit includes :math:`x_t`, :math:`h_{t-1}` and | ||
:math:`c_{t-1}`. The implementation separates the linear transformation | ||
and non-linear transformation apart. Here, we take :math:`i_t` as an | ||
example. The linear transformation is applied by calling a `fc` layer and | ||
the equation is: | ||
|
||
.. math:: | ||
|
||
L_{i_t} = W_{x_i}x_{t} + W_{h_i}h_{t-1} + W_{c_i}c_{t-1} + b_i | ||
|
||
The non-linear transformation is applied by calling `lstm_unit_op` and the | ||
equation is: | ||
|
||
.. math:: | ||
|
||
i_t = \sigma(L_{i_t}) | ||
|
||
This layer has two outputs including :math:`h_t` and :math:`o_t`. | ||
|
||
Args: | ||
x_t (Variable): The input value of current step. | ||
hidden_t_prev (Variable): The hidden value of lstm unit. | ||
cell_t_prev (Variable): The cell value of lstm unit. | ||
forget_bias (float): The forget bias of lstm unit. | ||
param_attr (ParamAttr): The attributes of parameter weights, used to set | ||
initializer, name etc. | ||
bias_attr (ParamAttr): The attributes of bias weights, if not False, | ||
bias weights will be created and be set to default value. | ||
|
||
Returns: | ||
tuple: The hidden value and cell value of lstm unit. | ||
|
||
Raises: | ||
ValueError: The ranks of **x_t**, **hidden_t_prev** and **cell_t_prev**\ | ||
not be 2 or the 1st dimensions of **x_t**, **hidden_t_prev** \ | ||
and **cell_t_prev** not be the same. | ||
|
||
Examples: | ||
|
||
.. code-block:: python | ||
|
||
x_t = fluid.layers.fc(input=x_t_data, size=10) | ||
prev_hidden = fluid.layers.fc(input=prev_hidden_data, size=20) | ||
prev_cell = fluid.layers.fc(input=prev_cell_data, size=30) | ||
hidden_value, cell_value = fluid.layers.lstm_unit(x_t=x_t, | ||
hidden_t_prev=prev_hidden, | ||
cell_t_prev=prev_cell) | ||
""" | ||
helper = LayerHelper('lstm_unit', **locals()) | ||
|
||
if len(x_t.shape) != 2: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should be removed, this will be check by infershape There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please remove all the shape check, if need, can add into infershape of operators. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To make the exception more accurate, I think shape checking is also necessary here. |
||
raise ValueError("Rank of x_t must be 2.") | ||
|
||
if len(hidden_t_prev.shape) != 2: | ||
raise ValueError("Rank of hidden_t_prev must be 2.") | ||
|
||
if len(cell_t_prev.shape) != 2: | ||
raise ValueError("Rank of cell_t_prev must be 2.") | ||
|
||
if x_t.shape[0] != hidden_t_prev.shape[0] or x_t.shape[ | ||
0] != cell_t_prev.shape[0]: | ||
raise ValueError("The 1s dimension of x_t, hidden_t_prev and " | ||
"cell_t_prev must be the same.") | ||
|
||
if bias_attr is None: | ||
bias_attr = ParamAttr() | ||
|
||
size = cell_t_prev.shape[1] | ||
concat_out = concat(input=[x_t, hidden_t_prev], axis=1) | ||
fc_out = fc(input=concat_out, | ||
size=4 * size, | ||
param_attr=param_attr, | ||
bias_attr=bias_attr) | ||
dtype = x_t.dtype | ||
c = helper.create_tmp_variable(dtype) | ||
h = helper.create_tmp_variable(dtype) | ||
|
||
helper.append_op( | ||
type='lstm_unit', | ||
inputs={"X": fc_out, | ||
"C_prev": cell_t_prev}, | ||
outputs={"C": c, | ||
"H": h}, | ||
attrs={"forget_bias": forget_bias}) | ||
|
||
return h, c |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
add ParamAttr for fc's weight and bias
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.