From 8e89dd36f1c9f11ad7ca66cd384f7d46d66fdae9 Mon Sep 17 00:00:00 2001 From: Lu Qi <61354321+MarioLulab@users.noreply.github.com> Date: Fri, 22 Sep 2023 10:21:19 +0800 Subject: [PATCH] [GLCC]Part-3: Support jit.save and jit.load for pylayer op (#57066) * complete static_pylayer op * finish static_pylayer op context manager * finish single test * append import path * maybe modify test/ir/inference * percept static_pylayer op in dy2st --- paddle/fluid/framework/prune.cc | 181 +++++++-- python/paddle/jit/dy2static/py_layer.py | 15 +- python/paddle/static/io.py | 14 + python/paddle/static/nn/static_pylayer.py | 35 +- test/dygraph_to_static/test_pylayer.py | 344 ++++++++++++++++- test/legacy_test/test_jit_save_load.py | 4 +- .../test_program_prune_backward.py | 89 +++++ test/legacy_test/test_prune.py | 365 ++++++++++-------- test/legacy_test/test_static_pylayer.py | 252 +++++++++++- 9 files changed, 1078 insertions(+), 221 deletions(-) diff --git a/paddle/fluid/framework/prune.cc b/paddle/fluid/framework/prune.cc index d4c2021d5f6e16..93467b549d6e9a 100644 --- a/paddle/fluid/framework/prune.cc +++ b/paddle/fluid/framework/prune.cc @@ -30,6 +30,8 @@ const char kRecurrent[] = "recurrent"; // NOLINT const char kStates[] = "states"; // NOLINT const char kExStates[] = "ex_states"; // NOLINT +const char kPyLayer[] = "pylayer"; // NOLINT + bool HasDependentInputVar( const proto::OpDesc& op_desc, const std::unordered_set& dependent_vars) { @@ -86,6 +88,23 @@ int GetSubBlockIndex(const proto::OpDesc& op_desc) { return -1; } +void GetSubBlocksIndices(const proto::OpDesc& op_desc, + std::vector* indices) { + for (auto& attr : op_desc.attrs()) { + if (attr.type() == proto::AttrType::BLOCKS) { + PADDLE_ENFORCE_GT( + attr.blocks_idx_size(), + 0, + platform::errors::NotFound( + "Attribute blocks is not found in operator %s", op_desc.type())); + indices->resize(attr.blocks_idx_size()); + for (int i = 0; i < attr.blocks_idx_size(); i++) { + (*indices)[i] = attr.blocks_idx(i); + } + } + } +} + void SetSubBlockIndex(proto::OpDesc* op_desc, int sub_idx) { for (auto& attr : *op_desc->mutable_attrs()) { if (attr.type() == proto::AttrType::BLOCK) { @@ -99,10 +118,43 @@ void SetSubBlockIndex(proto::OpDesc* op_desc, int sub_idx) { } } +void SetSubBlocksIndices(proto::OpDesc* op_desc, + const std::vector& sub_indices) { + for (auto& attr : *op_desc->mutable_attrs()) { + if (attr.type() == proto::AttrType::BLOCKS) { + PADDLE_ENFORCE_GT( + attr.blocks_idx_size(), + 0, + platform::errors::NotFound( + "Attribute blocks is not found in operator %s", op_desc->type())); + attr.clear_blocks_idx(); + for (auto idx : sub_indices) { + attr.add_blocks_idx(idx); + } + } + } +} + bool HasSubBlock(const proto::OpDesc& op_desc) { return GetSubBlockIndex(op_desc) > 0; } +bool HasSubBlocks(const proto::OpDesc& op_desc) { + // ``blocks_idx_size() == 0`` indicates no sub blocks. + for (auto& attr : op_desc.attrs()) { + if (attr.type() == proto::AttrType::BLOCKS) { + PADDLE_ENFORCE_GT( + attr.blocks_idx_size(), + 0, + platform::errors::NotFound( + "Attribute blocks is not found in operator %s", op_desc.type())); + return true; + } + } + + return false; +} + int GetOpRole(const proto::OpDesc& op_desc) { for (auto& attr : op_desc.attrs()) { if (attr.name() == OpProtoAndCheckerMaker::OpRoleAttrName()) { @@ -150,14 +202,15 @@ int FindMapByValue(const std::map& m, int val) { } // In other two cases, the op that has feed vars as output vars is dependent: -// 1. op has subblock, like while/for/ifelse/recurrent +// 1. op has subblock, like while/for/ifelse/recurrent/pylayer // 2. op is in subblock bool IsSubBlockDependent(const proto::OpDesc& op_desc, const std::set& feed_vars, int parent_block_id) { for (auto& var : op_desc.outputs()) { for (auto& argu : var.arguments()) { - if ((HasSubBlock(op_desc) || parent_block_id != -1) && + if ((HasSubBlock(op_desc) || HasSubBlocks(op_desc) || + parent_block_id != -1) && feed_vars.count(argu) != 0) { return true; } @@ -289,7 +342,7 @@ void prune_impl(const proto::ProgramDesc& input, if (should_run[i]) { auto* op = op_field->Add(); *op = input.blocks(block_id).ops(static_cast(i)); - if (HasSubBlock(*op)) { + if (HasSubBlock(*op) || HasSubBlocks(*op)) { VLOG(2) << "Pruning op which has sub block: " << op->type(); // create sub_block_dependent_vars here to help prune the sub block std::unordered_set sub_block_dependent_vars; @@ -321,15 +374,41 @@ void prune_impl(const proto::ProgramDesc& input, } } } - // GetSubBlockIndex(*op) is the idx of the sub_block in the input desc - // output_block_id is the idx of the current block in the output desc - prune_impl(input, - output, - GetSubBlockIndex(*op), - output_block_id, - &sub_block_dependent_vars, - feed_var_names, - pruned_origin_block_id_map); + if (HasSubBlock(*op)) { + // GetSubBlockIndex(*op) is the idx of the sub_block in the input desc + // output_block_id is the idx of the current block in the output desc + prune_impl(input, + output, + GetSubBlockIndex(*op), + output_block_id, + &sub_block_dependent_vars, + feed_var_names, + pruned_origin_block_id_map); + } else if (HasSubBlocks(*op)) { + // GetSubBlocksIndices(*op) are the indices of the sub_blocks in the + // input desc output_block_id is the idx of the current block in the + // output desc + std::vector sub_indices; + GetSubBlocksIndices(*op, &sub_indices); + for (auto& sub_index : sub_indices) { + // create a copy of dependent_vars to avoid being overwrited by the + // other sub_block + std::unordered_set dependent_vars_copy = + sub_block_dependent_vars; + prune_impl(input, + output, + sub_index, + output_block_id, + &dependent_vars_copy, + feed_var_names, + pruned_origin_block_id_map); + } + } else { + PADDLE_ENFORCE(false, + platform::errors::PreconditionNotMet( + "Attr Block or Blocks must exist when recursively " + "calling prune_impl")); + } } } } @@ -402,12 +481,29 @@ std::map Prune(const proto::ProgramDesc& input, int origin_sub_idx = GetSubBlockIndex(op_desc); auto sub_idx = FindMapByValue(pruned_origin_block_id_map, origin_sub_idx); - PADDLE_ENFORCE_NE(sub_idx, - -1, - platform::errors::NotFound( - "The origin sub block id should be found in " - "pruned_progin_block_id_map")); + PADDLE_ENFORCE_NE( + sub_idx, + -1, + platform::errors::NotFound( + "The origin sub block id should be found in " + "pruned_progin_block_id_map when the op has sub_block")); SetSubBlockIndex(&op_desc, sub_idx); + } else if (HasSubBlocks(op_desc)) { + std::vector origin_sub_indices; + GetSubBlocksIndices(op_desc, &origin_sub_indices); + std::vector sub_indices; + for (int index : origin_sub_indices) { + auto sub_idx = FindMapByValue(pruned_origin_block_id_map, index); + PADDLE_ENFORCE_NE( + sub_idx, + -1, + platform::errors::NotFound( + "The origin sub block id should be found in " + "pruned_progin_block_id_map when the op has sub_blocks")); + sub_indices.push_back(sub_idx); + } + + SetSubBlocksIndices(&op_desc, sub_indices); } } } @@ -441,6 +537,19 @@ void PruneBackwardImpl(proto::BlockDesc* origin, proto::BlockDesc* pruned) { AppendOpInputVarNames(op_desc, &op_input_vars); AppendOpOutputVarNames(op_desc, &op_output_vars); *op = op_desc; + + // if the type of op is "pylayer", we need to update the ``blocks`` + // attribute because the backward block will be pruned + if (op->type() == kPyLayer && HasSubBlocks(*op)) { + std::vector sub_indices; + GetSubBlocksIndices(*op, &sub_indices); + if (sub_indices.size() > 1) { + // sub_indices contains both forward block id and backward block id + std::vector new_sub_indices(sub_indices.begin(), + sub_indices.end() - 1); + SetSubBlocksIndices(op, new_sub_indices); + } + } } } @@ -471,9 +580,10 @@ std::tuple> PruneBackward( // Copy original ProgramDesc, origin can't be change framework::ProgramDesc origin_clone(origin); - // Step 1. check if the program contains grad loss operator. - // If not, the program need no pruning. + // Step 1. check if the program contains grad loss operator or pylayer + // operator. If not, the program need no pruning. bool has_loss_grad_op = false; + bool has_pylayer_op = false; std::queue block_contains_loss; std::queue block_contains_loss_grad; for (size_t i = 0; i < origin_clone.Size(); i++) { @@ -485,13 +595,15 @@ std::tuple> PruneBackward( static_cast(OpRole::kLoss))) { op->SetIsTarget(false); has_loss_grad_op = true; - break; + } + if (op->Type() == kPyLayer) { + has_pylayer_op = true; } } } std::map pruned_progin_block_id_map; - if (!has_loss_grad_op) { + if (!has_loss_grad_op && !has_pylayer_op) { // No pruning, fast return a copy of the origin ProgramDesc with an empty // map, means default mapped, i.e.{0:0, 1:1, ..., n:n}. return std::make_tuple(framework::ProgramDesc(origin_clone), @@ -544,12 +656,29 @@ std::tuple> PruneBackward( int origin_sub_idx = GetSubBlockIndex(op_desc); auto sub_idx = FindMapByValue(pruned_progin_block_id_map, origin_sub_idx); - PADDLE_ENFORCE_NE(sub_idx, - -1, - platform::errors::NotFound( - "The origin sub block id is not found in " - "pruned_progin_block_id_map")); + PADDLE_ENFORCE_NE( + sub_idx, + -1, + platform::errors::NotFound( + "The origin sub block id is not found in " + "pruned_progin_block_id_map when the op has sub_block")); SetSubBlockIndex(&op_desc, sub_idx); + } else if (HasSubBlocks(op_desc)) { + std::vector origin_sub_indices; + GetSubBlocksIndices(op_desc, &origin_sub_indices); + std::vector sub_indices; + for (int index : origin_sub_indices) { + auto sub_idx = FindMapByValue(pruned_progin_block_id_map, index); + PADDLE_ENFORCE_NE( + sub_idx, + -1, + platform::errors::NotFound( + "The origin sub block id should be found in " + "pruned_progin_block_id_map when the op has sub_blocks")); + sub_indices.push_back(sub_idx); + } + + SetSubBlocksIndices(&op_desc, sub_indices); } } } diff --git a/python/paddle/jit/dy2static/py_layer.py b/python/paddle/jit/dy2static/py_layer.py index 1d238e667c6535..b32397b0aa3ee1 100644 --- a/python/paddle/jit/dy2static/py_layer.py +++ b/python/paddle/jit/dy2static/py_layer.py @@ -13,6 +13,7 @@ # limitations under the License. import functools +import inspect from paddle.base.framework import Variable from paddle.common_ops_import import LayerHelper @@ -73,9 +74,19 @@ def __init__(self, dyfunc_self): ) # NOTE: only support position args and Variables Now - def apply(self, *args): + def apply(self, *args, **kwargs): + # rearrange `position-args + keyword-args` into `position-args` + dyfunc_sig = inspect.signature(self.dyfunc_self.forward) + bound_args = dyfunc_sig.bind(self.dyfunc_self, *args, **kwargs) + bound_args.apply_defaults() + input_args = [ + item + for i, item in enumerate(bound_args.arguments.values()) + if i > 0 + ] # index 0 indicate `dyfunc_self` which shouldn't be put into `input_args` + return static_pylayer( forward_fn=self.forward_fn_with_ctx, - inputs=list(args), + inputs=input_args, backward_fn=self.backward_fn_with_ctx, ) diff --git a/python/paddle/static/io.py b/python/paddle/static/io.py index 8f68f3f9e89bfd..943e8525ba466d 100644 --- a/python/paddle/static/io.py +++ b/python/paddle/static/io.py @@ -274,6 +274,20 @@ def normalize_program(program, feed_vars, fetch_vars, **kwargs): op.desc.set_is_target(False) if op.type == "feed" or op.type == "fetch": remove_op_idx.append(i) + + if op.type == "pylayer": + sub_blocks_ids = op._blocks_attr_ids("blocks") + if len(sub_blocks_ids) > 1: + # pylayer op ``blocks`` attr contains forward block id and backward block id + backward_block_id = sub_blocks_ids[-1] + # remove backward block + copy_program.blocks.pop(backward_block_id) + # update attrs ``blocks`` + reserverd_blocks = [] + for block_id in sub_blocks_ids[:-1]: + reserverd_blocks.append(copy_program.block(block_id)) + op._update_desc_attr("blocks", reserverd_blocks) + for idx in remove_op_idx[::-1]: global_block._remove_op(idx) copy_program.desc.flush() diff --git a/python/paddle/static/nn/static_pylayer.py b/python/paddle/static/nn/static_pylayer.py index 91d0f9d2351ffe..3dcf35e50e54b0 100644 --- a/python/paddle/static/nn/static_pylayer.py +++ b/python/paddle/static/nn/static_pylayer.py @@ -45,11 +45,13 @@ def __exit__(self, exc_type, exc_val, exc_tb): class StaticPyLayerBlock: def __init__(self, inputs, name=None, pylayer_context=None): - for each_input in inputs: - check_type(each_input, "input", Variable, "StaticPyLayerBlock") + # used to specify the Variable type `Input` to `pylayer` op + self.fwd_inputs = [ + each_input + for each_input in inputs + if isinstance(each_input, Variable) + ] # filter non-Variable inputs - # used to specify the `Input` to `pylayer` op - self.fwd_inputs = inputs # used to specify the `Out` to `pylayer` op self.fwd_outputs = [] @@ -105,7 +107,7 @@ def complete_backward_block(self): parent_block = self.helper.main_program.block(inside_block.parent_idx) self._backward_block_id = inside_block.idx - # set OpRole to `backward` + # Set OpRole to `backward`. The operators marked as `backward` are expected to be pruned in PruneBackward. for op in inside_block.ops: op_role_attr_name = ( core.op_proto_and_checker_maker.kOpRoleAttrName() @@ -234,8 +236,6 @@ def copy_var_from_parent_block(parent_block_var, layer_helper): return current_block_var -# TODO(MarioLulab): -# Need to support non-Variable in ``inputs`` def static_pylayer(forward_fn, inputs, backward_fn=None, name=None): """ This API returns ``forward_fn(inputs)``, and two sub-block are created based on @@ -344,7 +344,9 @@ def static_pylayer(forward_fn, inputs, backward_fn=None, name=None): origin_output = forward_fn(*inputs) if origin_output is not None: output = map_structure(copy_to_parent_func, origin_output) - mgr.fwd_outputs = flatten(output) + mgr.fwd_outputs = [ + x for x in flatten(output) if isinstance(x, Variable) + ] else: mgr.fwd_outputs = [] @@ -358,7 +360,7 @@ def static_pylayer(forward_fn, inputs, backward_fn=None, name=None): # **Create the backward input** from the output of the op to build the # backward block, and then delete it. grad_var_ins = [] - for fwd_var in flatten(output): + for fwd_var in pylayer_block_manager.fwd_outputs: fwd_var_name = fwd_var.name bwd_var_name = _append_grad_suffix_(fwd_var_name) if not current_block.desc.has_var_recursive(fwd_var_name.encode()): @@ -405,7 +407,7 @@ def static_pylayer(forward_fn, inputs, backward_fn=None, name=None): but got {len(forward_input_names)} and {len(flat_grad_origin)}" # Step4. Rename var name with suffix of "@GRAD" - for bwd_output_name, fwd_input_name in zip( + for bwd_output, fwd_input_name in zip( flat_grad_origin, forward_input_names ): # NOTE(MarioLulab): Because `flat_grad_origin` are the Variables inside the backward block, which one by one corresponds @@ -428,12 +430,13 @@ def static_pylayer(forward_fn, inputs, backward_fn=None, name=None): # TODO(MarioLulab): We will validate the assumption above is whether a strong hypothesis or not. # attach old var name into new - bwd_out_new = _append_grad_suffix_( - fwd_input_name - ) # "X" => "X@GRAD" - mgr.var_old_to_new[ - bwd_output_name.name - ] = bwd_out_new # e.g. "tmp_0.mean_0": "X@GRAD" + if isinstance(bwd_output, Variable): + bwd_out_new = _append_grad_suffix_( + fwd_input_name + ) # "X" => "X@GRAD" + mgr.var_old_to_new[ + bwd_output.name + ] = bwd_out_new # e.g. "tmp_0.mean_0": "X@GRAD" # **Delete the backward input** for bwd_var in grad_var_ins: diff --git a/test/dygraph_to_static/test_pylayer.py b/test/dygraph_to_static/test_pylayer.py index 88558e3d628fb4..ee2d1248e5f634 100644 --- a/test/dygraph_to_static/test_pylayer.py +++ b/test/dygraph_to_static/test_pylayer.py @@ -15,9 +15,12 @@ """Tests for PyLayer of Dynamic-to-Static. Only test simple cases here.""" +import os +import tempfile import unittest import numpy as np +from legacy_test.test_jit_save_load import train import paddle from paddle.autograd.py_layer import PyLayer @@ -51,7 +54,7 @@ def backward(ctx, dy): class scaled_layer_2(PyLayer): @staticmethod def forward(ctx, x1, x2): - y = x1 * x2 + y = 3 * x1 + x2 / 5 return y @staticmethod @@ -75,6 +78,78 @@ def backward(ctx, dy): return grad +class cus_tanh_2(PyLayer): + @staticmethod + def forward(ctx, x, func1, func2=paddle.square): + ctx.func = func2 + y = func1(x) + ctx.save_for_backward(y) + return y + + @staticmethod + def backward(ctx, dy): + (y,) = ctx.saved_tensor() + grad = dy * (1 - ctx.func(y)) + return grad + + +class cus_tanh_3(PyLayer): + @staticmethod + def forward(ctx, x1, x2, func1, func2=paddle.square): + ctx.func = func2 + y1 = func1(x1) + y2 = func1(x2) + ctx.save_for_backward(y1, y2) + return 1, None, y1, y2, '' + + @staticmethod + def backward(ctx, dy1, dy2): + y1, y2 = ctx.saved_tensor() + re1 = dy1 * (1 - ctx.func(y1)) + re2 = dy2 * (1 - paddle.square(y2)) + return re1, None + + +def user_defined_tanh(x): + y = paddle.tanh(x) + return y + + +def user_defined_square(x): + y = paddle.square(x) + return y + + +class cus_tanh_4(PyLayer): + @staticmethod + def forward(ctx, x, func, name="cus_tanh_4"): + ctx.func = func + y = user_defined_tanh(x) + ctx.save_for_backward(y) + return y + + @staticmethod + def backward(ctx, dy): + (y,) = ctx.saved_tensor() + grad = dy * (1 - ctx.func(y)) + return grad + + +class cus_sigmoid(PyLayer): + @staticmethod + def forward(ctx, x, func1, func2): + ctx.func = func2 + y = 1 / (1 + func1(-x)) + ctx.save_for_backward(x) + return y + + @staticmethod + def backward(ctx, dy): + (x,) = ctx.saved_tensor() + grad = dy * ctx.func(x) * (1 - ctx.func(x)) + return grad + + class nested_layer(PyLayer): @staticmethod def forward(ctx, x1, x2): @@ -92,9 +167,9 @@ def backward(ctx, dy): class SimpleNet_1(paddle.nn.Layer): - def __init__(self): + def __init__(self, in_size, out_size): super().__init__() - self.linear = paddle.nn.Linear(4, 8) + self.linear = paddle.nn.Linear(in_size, out_size) @paddle.jit.to_static def forward(self, data): @@ -103,6 +178,30 @@ def forward(self, data): return z +class SimpleNet_2(paddle.nn.Layer): + def __init__(self, in_size, out_size): + super().__init__() + self.linear = paddle.nn.Linear(in_size, out_size) + + def forward(self, x): + y = self.linear(x) + out = cus_tanh_2.apply(y, func1=paddle.tanh) + return out + + +class SimpleNet_3(paddle.nn.Layer): + def __init__(self, in_size, out_size): + super().__init__() + self.linear = paddle.nn.Linear(in_size, out_size) + + def forward(self, x): + y = self.linear(x) + out = cus_sigmoid.apply( + y, func1=paddle.exp, func2=paddle.nn.functional.sigmoid + ) + return out + + class SimpleNetInplace(paddle.nn.Layer): def __init__(self): super().__init__() @@ -115,6 +214,48 @@ def forward(self, data): return z +class SimplePyLayerNet(paddle.nn.Layer): + def __init__(self, in_size, out_size): + super().__init__() + self.linear = paddle.nn.Linear(in_size, out_size) + + @paddle.jit.to_static + def forward(self, x): + y = self.linear(x) + out = cus_tanh_2.apply(y, func1=paddle.tanh) + out = paddle.mean(out) + return out + + +class SimplePyLayerNetMultiIn(paddle.nn.Layer): + def __init__(self, in_size, out_size): + super().__init__() + self.linear1 = paddle.nn.Linear(in_size, out_size) + self.linear2 = paddle.nn.Linear(in_size, out_size) + + @paddle.jit.to_static + def forward(self, x1, x2): + y1 = self.linear1(x1) + y2 = self.linear1(x2) + out = cus_tanh_2.apply(y1, func1=paddle.tanh) + out = out + y2 + out = paddle.mean(out) + return out + + +class SimplePyLayerNetStopGrad(paddle.nn.Layer): + def __init__(self, in_size, out_size): + super().__init__() + self.linear = paddle.nn.Linear(in_size, out_size) + + @paddle.jit.to_static + def forward(self, x): + y = self.linear(x) + y.stop_gradient = True + out = cus_tanh_2.apply(y, func1=paddle.tanh) + return out + + class TestPyLayerBase(unittest.TestCase): def setUp(self): self.place = "gpu" if paddle.is_compiled_with_cuda() else "cpu" @@ -269,10 +410,69 @@ def test_func(x1, x2): self._run_and_compare(input1, input2) + def test_apply_kwargs_pylayer(self): + @paddle.jit.to_static + def test_func(x1, x2): + y = scaled_layer_2.apply(x1=x2, x2=x1) + return y + + self.dygraph_func = test_func + + input1 = paddle.randn([2, 3]).astype("float32") + input2 = paddle.randn([2, 3]).astype("float32") + input1.stop_gradient = False + input2.stop_gradient = False + + self._run_and_compare(input1, input2) + + def test_non_variable_inputs(self): + @paddle.jit.to_static + def test_func(x): + y = cus_tanh_2.apply(x, func1=paddle.tanh) + return y + + self.dygraph_func = test_func + + input1 = paddle.randn([2, 3]).astype("float32") + input1.stop_gradient = False + + self._run_and_compare(input1) + + def test_simple_pylayer_return_none_with_no_grad(self): + @paddle.jit.to_static + def test_func(input1, input2): + z = cus_tanh_3.apply(input1, input2, paddle.tanh, paddle.square) + z = z[2] + z[3] + return z + + self.dygraph_func = test_func + + input1 = paddle.randn([2, 3]).astype("float32") + input2 = paddle.randn([2, 3]).astype("float32") + input1.stop_gradient = False + input2.stop_gradient = True + + self._run_and_compare(input1, input2) + + def test_non_variable_inputs_and_userdefined_call(self): + @paddle.jit.to_static + def test_func(input1): + y = cus_tanh_4.apply( + input1, func=user_defined_square, name="cus_tanh_test" + ) + return y + + self.dygraph_func = test_func + + input1 = paddle.randn([2, 3]).astype("float32") + input1.stop_gradient = False + + self._run_and_compare(input1) + class TestPyLayerInsideNet(TestPyLayerBase): def test_single_in_single_out(self): - simple_net = SimpleNet_1() + simple_net = SimpleNet_1(in_size=4, out_size=8) self.dygraph_func = simple_net input1 = paddle.randn([3, 4]).astype("float32") @@ -287,6 +487,142 @@ def test_inplace(self): input1.stop_gradient = False self._run_and_compare(input1) + def test_non_variable_args_pylayernet(self): + simple_net = SimplePyLayerNet(in_size=4, out_size=8) + self.dygraph_func = simple_net + + input1 = paddle.randn([3, 4]).astype("float32") + input1.stop_gradient = False + self._run_and_compare(input1) + + def test_pylayer_net_with_no_grad(self): + simple_net = SimplePyLayerNetMultiIn(in_size=4, out_size=8) + self.dygraph_func = simple_net + + input1 = paddle.randn([3, 4]).astype("float32") + input2 = paddle.randn([3, 4]).astype("float32") + input1.stop_gradient = False + input2.stop_gradient = True + self._run_and_compare(input1, input2) + + +class PyLayerTrainHelper(unittest.TestCase): + def setUp(self): + self.place = "gpu" if paddle.is_compiled_with_cuda() else "cpu" + + def _run_train(self, to_static, layer_builder, build_strategy=None): + """ + Tests model decorated by `dygraph_to_static_output` in static graph mode. For users, the model is defined in dygraph mode and trained in static graph mode. + """ + paddle.jit.enable_to_static(to_static) + + paddle.set_device(self.place) + np.random.seed(SEED) + paddle.seed(SEED) + paddle.framework.random._manual_program_seed(SEED) + + # net = self.build_layer() + net = layer_builder() + if to_static: + net = paddle.jit.to_static(net, build_strategy=build_strategy) + + _, _, avg_loss = train(net) + return avg_loss.numpy() + + +class TestTrainingPyLayer(PyLayerTrainHelper): + def test_tanh_pylayer(self): + build_layer = lambda: SimpleNet_2(784, 20) + + static_loss = self._run_train(to_static=True, layer_builder=build_layer) + dygraph_loss = self._run_train( + to_static=False, layer_builder=build_layer + ) + + np.testing.assert_allclose( + static_loss, + dygraph_loss, + rtol=1e-05, + err_msg=f'static_loss: {static_loss} \n dygraph_loss: {dygraph_loss}', + ) + + def test_sigmoid_pylayer(self): + build_layer = lambda: SimpleNet_3(784, 20) + + static_loss = self._run_train(to_static=True, layer_builder=build_layer) + dygraph_loss = self._run_train( + to_static=False, layer_builder=build_layer + ) + + np.testing.assert_allclose( + static_loss, + dygraph_loss, + rtol=1e-05, + err_msg=f'static_loss: {static_loss} \n dygraph_loss: {dygraph_loss}', + ) + + def test_pylayer_net_no_grad(self): + build_layer = lambda: SimplePyLayerNetStopGrad(784, 20) + + static_loss = self._run_train(to_static=True, layer_builder=build_layer) + dygraph_loss = self._run_train( + to_static=False, layer_builder=build_layer + ) + + np.testing.assert_allclose( + static_loss, + dygraph_loss, + rtol=1e-05, + err_msg=f'static_loss: {static_loss} \n dygraph_loss: {dygraph_loss}', + ) + + +class TestPyLayerJitSaveLoad(unittest.TestCase): + def setUp(self): + self.temp_dir = tempfile.TemporaryDirectory() + self.model_path = os.path.join( + self.temp_dir.name, "test_pylayer/jit_save_model" + ) + # enable dygraph mode + paddle.base.enable_dygraph() + # config seed + paddle.seed(SEED) + paddle.framework.random._manual_program_seed(SEED) + + def tearDown(self): + self.temp_dir.cleanup() + + def train_and_save_model(self, model_path=None): + layer = SimpleNet_1(784, 20) + example_inputs, layer, _ = train(layer) + final_model_path = model_path if model_path else self.model_path + orig_input_types = [type(x) for x in example_inputs] + paddle.jit.save( + layer=layer, path=final_model_path, input_spec=example_inputs + ) + new_input_types = [type(x) for x in example_inputs] + self.assertEqual(orig_input_types, new_input_types) + return layer + + def test_save_load(self): + # train and save model + train_layer = self.train_and_save_model() + # load model + loaded_layer = paddle.jit.load(self.model_path) + self.load_and_inference(train_layer, loaded_layer) + + def load_and_inference(self, train_layer, infer_layer): + train_layer.eval() + infer_layer.eval() + # inference & compare + x = paddle.base.dygraph.to_variable( + np.random.random((1, 784)).astype('float32') + ) + train_layer_result = train_layer(x).numpy() + infer_layer_result = infer_layer(x).numpy() + + np.testing.assert_array_equal(train_layer_result, infer_layer_result) + if __name__ == "__main__": unittest.main() diff --git a/test/legacy_test/test_jit_save_load.py b/test/legacy_test/test_jit_save_load.py index e2df76f4751946..71c5c06a716b24 100644 --- a/test/legacy_test/test_jit_save_load.py +++ b/test/legacy_test/test_jit_save_load.py @@ -301,7 +301,7 @@ def forward_general(self, x): def train(layer, input_size=784, label_size=1): # create optimizer sgd = paddle.optimizer.SGD( - learning_rate=0.01, parameter_list=layer.parameters() + learning_rate=0.01, parameters=layer.parameters() ) # create data loader train_loader = base.io.DataLoader.from_generator(capacity=5) @@ -316,7 +316,7 @@ def train(layer, input_size=784, label_size=1): cost = layer(img) loss = paddle.nn.functional.cross_entropy( - cost, label, reduction='none', use_softmax=False + cost, label, reduction='none', use_softmax=True ) avg_loss = paddle.mean(loss) diff --git a/test/legacy_test/test_program_prune_backward.py b/test/legacy_test/test_program_prune_backward.py index 237684e3b0bd97..581635d5a68ada 100755 --- a/test/legacy_test/test_program_prune_backward.py +++ b/test/legacy_test/test_program_prune_backward.py @@ -81,6 +81,27 @@ def loss2(pred, label): return avg_loss +def pylayer_net(use_feed=None): + x = paddle.static.data(name="x", shape=[-1, 4], dtype='float32') + label = paddle.static.data('label', shape=[-1, 1], dtype='int64') + + def forward_fn(x): + y = 3 * x + return y + + def backward_fn(dy): + grad = paddle.exp(dy) + return grad + + y = paddle.static.nn.static_pylayer(forward_fn, [x], backward_fn) + hidden = paddle.static.nn.fc(x=[y], size=4, activation="softmax") + loss = paddle.nn.functional.cross_entropy( + input=hidden, label=label, reduction='none', use_softmax=False + ) + loss = paddle.mean(loss, name='mean_softmax_loss') + return loss + + def optimization_in_cond_net(with_optimize=False): x = paddle.static.data(name="x", shape=[-1, 4], dtype='float32') label = paddle.static.data('label', shape=[-1, 1], dtype='int64') @@ -115,6 +136,31 @@ def loss2(opt, pred, label, with_optimize): return avg_loss +def optimization_in_pylayer_net(with_optimize=False): + x = paddle.static.data(name="x", shape=[-1, 4], dtype='float32') + label = paddle.static.data('label', shape=[-1, 1], dtype='int64') + + def forward_fn(x): + y = 3 * x + return y + + def backward_fn(dy): + grad = paddle.exp(dy) + return grad + + y = paddle.static.nn.static_pylayer(forward_fn, [x], backward_fn) + hidden = 3 * y + loss = paddle.nn.functional.softmax_with_cross_entropy( + logits=hidden, label=label + ) + loss = paddle.mean(loss, name='mean_softmax_loss') + sgd = paddle.optimizer.SGD(learning_rate=0.1) + if with_optimize: + sgd.minimize(loss) + + return loss + + class TestProgramPruneBackward(unittest.TestCase): def program_compare(self, program_a, program_b): assert isinstance( @@ -249,6 +295,19 @@ def optimizer(): method=cond_net, feed_dict=feed_dict, optimizer=optimizer ) + def test_pylayer(self): + def optimizer(): + optimizer = paddle.optimizer.SGD(learning_rate=0.01) + return optimizer + + with self.program_scope_guard(): + x_in = np.random.random(size=(10, 4)).astype('float32') + label_in = np.random.randint(1, size=(10, 1)).astype('int64') + feed_dict = {'x': x_in, 'label': label_in} + self.check_prune_correctness( + method=pylayer_net, feed_dict=feed_dict, optimizer=optimizer + ) + def test_optimization_in_cond(self): x_in = np.random.random(size=(10, 4)).astype('float32') label_in = np.random.randint(1, size=(10, 1)).astype('int64') @@ -279,6 +338,36 @@ def test_optimization_in_cond(self): self.program_compare(test_prog_orig, test_prog_prune) self.assertEqual(loss_data_orig, loss_data_prune) + def test_optimization_in_pylayer(self): + x_in = np.random.random(size=(10, 4)).astype('float32') + label_in = np.random.randint(1, size=(10, 1)).astype('int64') + feed_dict = {'x': x_in, 'label': label_in} + with self.program_scope_guard(): + loss = optimization_in_pylayer_net(False) + main_program = base.default_main_program() + test_prog_orig = main_program.clone(for_test=True) + place = core.CPUPlace() + exe = base.Executor(place) + exe.run(base.default_startup_program()) + (loss_data_orig,) = exe.run( + test_prog_orig, feed=feed_dict, fetch_list=[loss.name] + ) + + with self.program_scope_guard(): + loss = optimization_in_pylayer_net(True) + main_program = base.default_main_program() + test_prog_prune = main_program.clone(for_test=True) + + place = core.CPUPlace() + exe = base.Executor(place) + exe.run(base.default_startup_program()) + (loss_data_prune,) = exe.run( + test_prog_prune, feed=feed_dict, fetch_list=[loss.name] + ) + + self.program_compare(test_prog_orig, test_prog_prune) + self.assertEqual(loss_data_orig, loss_data_prune) + @contextlib.contextmanager def program_scope_guard(self): prog = base.Program() diff --git a/test/legacy_test/test_prune.py b/test/legacy_test/test_prune.py index 00b96074ab5c2e..91314d3c86b800 100644 --- a/test/legacy_test/test_prune.py +++ b/test/legacy_test/test_prune.py @@ -22,121 +22,82 @@ from paddle.base import framework -class TestPrune(unittest.TestCase): - def net(self): - x = paddle.static.data(name='x', shape=[-1, 2], dtype='float32') - x.desc.set_need_check_feed(False) - label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64") - label.desc.set_need_check_feed(False) - y = paddle.static.nn.fc(x=[x], size=2, activation="softmax") - loss = paddle.nn.functional.cross_entropy( - input=y, label=label, reduction='none', use_softmax=False - ) - loss = paddle.mean(x=loss) - return x, y, label, loss - - def test_prune_with_input(self): +class TestPruneBase(unittest.TestCase): + def run_net(self, net): program = framework.Program() startup_program = framework.Program() - block = program.global_block() with base.program_guard(program, startup_program): - (x, y, label, loss) = self.net() - self.assertEqual(len(block.ops), 5) + ret = net() + + return ret, program + + def check_prune_with_input( + self, + program, + feeded_var_names, + targets, + ops_before_pruned, + ops_after_pruned, + ): + block = program.global_block() + self.assertEqual(len(block.ops), len(ops_before_pruned)) self.assertEqual( [op.type for op in block.ops], - [ - "mul", - "elementwise_add", - "softmax", - "softmax_with_cross_entropy", - "reduce_mean", - ], + ops_before_pruned, ) pruned_program = program._prune_with_input( - feeded_var_names=[y.name, label.name], targets=[loss] + feeded_var_names=feeded_var_names, targets=targets + ) + self.assertEqual( + len(pruned_program.global_block().ops), len(ops_after_pruned) ) - self.assertEqual(len(pruned_program.global_block().ops), 2) self.assertEqual( [op.type for op in pruned_program.global_block().ops], - ["softmax_with_cross_entropy", "reduce_mean"], + ops_after_pruned, ) - def test_prune(self): - program = framework.Program() - startup_program = framework.Program() + def check_prune( + self, program, targets, ops_before_pruned, ops_after_pruned + ): block = program.global_block() - with base.program_guard(program, startup_program): - (x, y, label, loss) = self.net() - self.assertEqual(len(block.ops), 5) + self.assertEqual(len(block.ops), len(ops_before_pruned)) self.assertEqual( [op.type for op in block.ops], - [ - "mul", - "elementwise_add", - "softmax", - "softmax_with_cross_entropy", - "reduce_mean", - ], + ops_before_pruned, + ) + pruned_program = program._prune(targets=targets) + self.assertEqual( + len(pruned_program.global_block().ops), len(ops_after_pruned) ) - pruned_program = program._prune(targets=[loss]) - self.assertEqual(len(pruned_program.global_block().ops), 5) self.assertEqual( [op.type for op in pruned_program.global_block().ops], - [ - "mul", - "elementwise_add", - "softmax", - "softmax_with_cross_entropy", - "reduce_mean", - ], + ops_after_pruned, ) - def test_prune_target_not_list(self): - program = framework.Program() - startup_program = framework.Program() + def check_prune_target_not_list( + self, program, targets, ops_before_pruned, ops_after_pruned + ): block = program.global_block() - with base.program_guard(program, startup_program): - (x, y, label, loss) = self.net() - self.assertEqual(len(block.ops), 5) + self.assertEqual(len(block.ops), len(ops_before_pruned)) self.assertEqual( [op.type for op in block.ops], - [ - "mul", - "elementwise_add", - "softmax", - "softmax_with_cross_entropy", - "reduce_mean", - ], + ops_before_pruned, + ) + pruned_program = program._prune(targets=targets) + self.assertEqual( + len(pruned_program.global_block().ops), len(ops_after_pruned) ) - pruned_program = program._prune(targets=loss) - self.assertEqual(len(pruned_program.global_block().ops), 5) self.assertEqual( [op.type for op in pruned_program.global_block().ops], - [ - "mul", - "elementwise_add", - "softmax", - "softmax_with_cross_entropy", - "reduce_mean", - ], + ops_after_pruned, ) - def test_prune_target_none(self): - program = framework.Program() - startup_program = framework.Program() + def check_prune_target_none(self, program, ops_before_pruned): block = program.global_block() - with base.program_guard(program, startup_program): - (x, y, label, loss) = self.net() - self.assertEqual(len(block.ops), 5) + self.assertEqual(len(block.ops), len(ops_before_pruned)) self.assertEqual( [op.type for op in block.ops], - [ - "mul", - "elementwise_add", - "softmax", - "softmax_with_cross_entropy", - "reduce_mean", - ], + ops_before_pruned, ) try: pruned_program = program._prune(targets=None) @@ -147,6 +108,96 @@ def test_prune_target_none(self): ) +class TestPrune(TestPruneBase): + def net(self): + x = paddle.static.data(name='x', shape=[-1, 2], dtype='float32') + x.desc.set_need_check_feed(False) + label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64") + label.desc.set_need_check_feed(False) + y = paddle.static.nn.fc(x=[x], size=2, activation="softmax") + loss = paddle.nn.functional.cross_entropy( + input=y, label=label, reduction='none', use_softmax=False + ) + loss = paddle.mean(x=loss) + return x, y, label, loss + + def test_prune_with_input(self): + ops_before_pruned = [ + "mul", + "elementwise_add", + "softmax", + "softmax_with_cross_entropy", + "reduce_mean", + ] + + ops_after_pruned = ["softmax_with_cross_entropy", "reduce_mean"] + (x, y, label, loss), program = self.run_net(self.net) + + self.check_prune_with_input( + program, + [y.name, label.name], + [loss], + ops_before_pruned, + ops_after_pruned, + ) + + def test_prune(self): + ops_before_pruned = [ + "mul", + "elementwise_add", + "softmax", + "softmax_with_cross_entropy", + "reduce_mean", + ] + + ops_after_pruned = [ + "mul", + "elementwise_add", + "softmax", + "softmax_with_cross_entropy", + "reduce_mean", + ] + + (x, y, label, loss), program = self.run_net(self.net) + + self.check_prune(program, [loss], ops_before_pruned, ops_after_pruned) + + def test_prune_target_not_list(self): + ops_before_pruned = [ + "mul", + "elementwise_add", + "softmax", + "softmax_with_cross_entropy", + "reduce_mean", + ] + + ops_after_pruned = [ + "mul", + "elementwise_add", + "softmax", + "softmax_with_cross_entropy", + "reduce_mean", + ] + + (x, y, label, loss), program = self.run_net(self.net) + + self.check_prune_target_not_list( + program, loss, ops_before_pruned, ops_after_pruned + ) + + def test_prune_target_none(self): + ops_before_pruned = [ + "mul", + "elementwise_add", + "softmax", + "softmax_with_cross_entropy", + "reduce_mean", + ] + + (x, y, label, loss), program = self.run_net(self.net) + self.check_prune_target_none(program, ops_before_pruned) + + def mock(self, program, feed, fetch, optimize_ops): self.prune_called_times += 1 return program @@ -160,77 +211,83 @@ def _mock_guard(mock): base.Executor._prune_program = original +def net1(): + x = paddle.static.data(name='x', shape=[-1, 2], dtype='float32') + x.desc.set_need_check_feed(False) + label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64") + label.desc.set_need_check_feed(False) + w_param_attrs = base.ParamAttr( + name="fc_weight", + learning_rate=0.5, + initializer=paddle.nn.initializer.Constant(1.0), + trainable=True, + ) + y = paddle.static.nn.fc( + x=[x], size=2, activation="softmax", weight_attr=w_param_attrs + ) + loss1 = paddle.nn.functional.cross_entropy( + input=y, label=label, reduction='none', use_softmax=False + ) + loss1 = paddle.mean(x=loss1) + loss2 = paddle.nn.functional.cross_entropy( + input=y, label=label, reduction='none', use_softmax=False + ) + loss2 = paddle.mean(x=loss2) + loss1.persistable = True + loss2.persistable = True + return x, y, label, loss1, loss2, w_param_attrs + + +def net2(): + x1 = paddle.static.data(name='x1', shape=[-1, 2], dtype='float32') + x1.desc.set_need_check_feed(False) + x2 = paddle.static.data(name='x2', shape=[-1, 2], dtype='float32') + x2.desc.set_need_check_feed(False) + label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64") + label.desc.set_need_check_feed(False) + w1_param_attrs = base.ParamAttr( + name="fc_weight1", + learning_rate=0.5, + initializer=paddle.nn.initializer.Constant(1.0), + trainable=True, + ) + w2_param_attrs = base.ParamAttr( + name="fc_weight2", + learning_rate=0.5, + initializer=paddle.nn.initializer.Constant(1.0), + trainable=True, + ) + y1 = paddle.static.nn.fc( + x=[x1], size=2, activation="softmax", weight_attr=w1_param_attrs + ) + y2 = paddle.static.nn.fc( + x=[x2], size=2, activation="softmax", weight_attr=w2_param_attrs + ) + loss1 = paddle.nn.functional.cross_entropy( + input=y1, label=label, reduction='none', use_softmax=False + ) + loss1 = paddle.mean(x=loss1) + loss2 = paddle.nn.functional.cross_entropy( + input=y2, label=label, reduction='none', use_softmax=False + ) + loss2 = paddle.mean(x=loss2) + return ( + x1, + x2, + y1, + y2, + label, + loss1, + loss2, + w1_param_attrs, + w2_param_attrs, + ) + + class TestExecutorRunAutoPrune(unittest.TestCase): - def net1(self): - x = paddle.static.data(name='x', shape=[-1, 2], dtype='float32') - x.desc.set_need_check_feed(False) - label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64") - label.desc.set_need_check_feed(False) - w_param_attrs = base.ParamAttr( - name="fc_weight", - learning_rate=0.5, - initializer=paddle.nn.initializer.Constant(1.0), - trainable=True, - ) - y = paddle.static.nn.fc( - x=[x], size=2, activation="softmax", weight_attr=w_param_attrs - ) - loss1 = paddle.nn.functional.cross_entropy( - input=y, label=label, reduction='none', use_softmax=False - ) - loss1 = paddle.mean(x=loss1) - loss2 = paddle.nn.functional.cross_entropy( - input=y, label=label, reduction='none', use_softmax=False - ) - loss2 = paddle.mean(x=loss2) - loss1.persistable = True - loss2.persistable = True - return x, y, label, loss1, loss2, w_param_attrs - - def net2(self): - x1 = paddle.static.data(name='x1', shape=[-1, 2], dtype='float32') - x1.desc.set_need_check_feed(False) - x2 = paddle.static.data(name='x2', shape=[-1, 2], dtype='float32') - x2.desc.set_need_check_feed(False) - label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64") - label.desc.set_need_check_feed(False) - w1_param_attrs = base.ParamAttr( - name="fc_weight1", - learning_rate=0.5, - initializer=paddle.nn.initializer.Constant(1.0), - trainable=True, - ) - w2_param_attrs = base.ParamAttr( - name="fc_weight2", - learning_rate=0.5, - initializer=paddle.nn.initializer.Constant(1.0), - trainable=True, - ) - y1 = paddle.static.nn.fc( - x=[x1], size=2, activation="softmax", weight_attr=w1_param_attrs - ) - y2 = paddle.static.nn.fc( - x=[x2], size=2, activation="softmax", weight_attr=w2_param_attrs - ) - loss1 = paddle.nn.functional.cross_entropy( - input=y1, label=label, reduction='none', use_softmax=False - ) - loss1 = paddle.mean(x=loss1) - loss2 = paddle.nn.functional.cross_entropy( - input=y2, label=label, reduction='none', use_softmax=False - ) - loss2 = paddle.mean(x=loss2) - return ( - x1, - x2, - y1, - y2, - label, - loss1, - loss2, - w1_param_attrs, - w2_param_attrs, - ) + def setUp(self): + self.net1 = net1 + self.net2 = net2 def test_not_prune(self): """ diff --git a/test/legacy_test/test_static_pylayer.py b/test/legacy_test/test_static_pylayer.py index 3a1634e92bf58b..8b193d6e087be7 100644 --- a/test/legacy_test/test_static_pylayer.py +++ b/test/legacy_test/test_static_pylayer.py @@ -16,6 +16,7 @@ import unittest import numpy as np +from legacy_test.test_prune import TestExecutorRunAutoPrune, TestPruneBase import paddle from paddle import base @@ -27,6 +28,9 @@ class TestStaticPyLayerInputOutput(unittest.TestCase): + def setUp(self): + paddle.enable_static() + def test_return_single_var(self): """ pseudocode: @@ -34,8 +38,6 @@ def test_return_single_var(self): y = 3 * x """ - paddle.enable_static() - def forward_fn(x): return 3 * x @@ -65,8 +67,6 @@ def test_return_0d_tensor(self): y = 3 * x """ - paddle.enable_static() - def forward_fn(x): return 3 * x @@ -96,8 +96,6 @@ def test_0d_tensor_backward(self): dx = -5 * dy ''' - paddle.enable_static() - def forward_fn(x): return 3 * x @@ -132,8 +130,6 @@ def backward_fn(dy): self.assertEqual(x_grad.shape, ()) def test_return_var_typle(self): - paddle.enable_static() - def forward_fn(a, b): return 3 * a, -2 * b @@ -168,8 +164,6 @@ def forward_fn(a, b): ) def test_return_forward_none(self): - paddle.enable_static() - input_shape = (1, 3) def forward_fn(x): @@ -198,8 +192,6 @@ def test_wrong_structure_exception(self): wrong number of inputs and outputs returned by ``forward_fn`` and ``backward_fn`` """ - paddle.enable_static() - def forward_fn(a, b): return 3 * a, -b, paddle.mean(b) @@ -232,6 +224,9 @@ def backward_fn(daout, dbout): class TestControlFlowNestedStaticPyLayer(unittest.TestCase): + def setUp(self): + paddle.enable_static() + def test_cond_inside_static_pylayer(self): """ forward propagation: @@ -256,8 +251,6 @@ def backward_fn(diout, daout): return daout_scaled, daout * daout """ - paddle.enable_static() - def forward_fn(i, a): return i, paddle.static.nn.cond( i < 5.0, lambda: paddle.add(a, a), lambda: paddle.subtract(a, a) @@ -343,9 +336,10 @@ def backward_fn(diout, daout): class TestStaticPyLayerBackward(unittest.TestCase): - def test_identity_backward(self): + def setUp(self): paddle.enable_static() + def test_identity_backward(self): def forward_fn(x): return x @@ -405,8 +399,6 @@ def test_static_pylayer_backward(self): dx = tanh(dy) ''' - paddle.enable_static() - def forward_fn(x): return 3 * x @@ -455,5 +447,231 @@ def backward_fn(dy): ) +class TestStaticPyLayerPrune(TestPruneBase): + def setUp(self): + paddle.enable_static() + + def net(self): + def forward_fn(x): + y = 3 * x + return y + + def backward_fn(dy): + grad = paddle.exp(dy) + return grad + + x = paddle.static.data(name='x', shape=[-1, 2], dtype='float32') + x.desc.set_need_check_feed(False) + hidden = paddle.static.nn.fc(x=[x], size=4, activation="softmax") + y = paddle.static.nn.static_pylayer(forward_fn, [hidden], backward_fn) + loss = paddle.mean(y) + return x, hidden, y, loss + + def net_with_weight(self): + def forward_fn(x): + y = 3 * x + return y + + def backward_fn(dy): + grad = paddle.exp(dy) + return grad + + x = paddle.static.data(name='x', shape=[-1, 2], dtype='float32') + x.desc.set_need_check_feed(False) + label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64") + label.desc.set_need_check_feed(False) + w_param_attrs = base.ParamAttr( + name="fc_weight", + learning_rate=0.5, + initializer=paddle.nn.initializer.Constant(1.0), + trainable=True, + ) + + y = paddle.static.nn.static_pylayer(forward_fn, [x], backward_fn) + hidden = paddle.static.nn.fc( + x=[y], size=4, activation="softmax", weight_attr=w_param_attrs + ) + loss1 = paddle.nn.functional.cross_entropy( + input=hidden, label=label, reduction='none', use_softmax=False + ) + loss1 = paddle.mean(x=loss1) + loss2 = paddle.nn.functional.cross_entropy( + input=hidden, label=label, reduction='none', use_softmax=False + ) + loss2 = paddle.mean(x=loss2) + loss1.persistable = True + loss2.persistable = True + + return x, hidden, label, loss1, loss2, w_param_attrs + + def test_prune_with_input(self): + ops_before_pruned = [ + "mul", + "elementwise_add", + "softmax", + "pylayer", + "reduce_mean", + ] + + ops_after_pruned = ["pylayer", "reduce_mean"] + + (x, hidden, y, loss), program = self.run_net(self.net) + + self.check_prune_with_input( + program, [hidden.name], [loss], ops_before_pruned, ops_after_pruned + ) + + def test_prune(self): + ops_before_pruned = [ + "mul", + "elementwise_add", + "softmax", + "pylayer", + "reduce_mean", + ] + + ops_after_pruned = [ + "mul", + "elementwise_add", + "softmax", + "pylayer", + "reduce_mean", + ] + + (x, hidden, y, loss), program = self.run_net(self.net) + + self.check_prune(program, [loss], ops_before_pruned, ops_after_pruned) + + def test_prune_target_not_list(self): + ops_before_pruned = [ + "mul", + "elementwise_add", + "softmax", + "pylayer", + "reduce_mean", + ] + + ops_after_pruned = [ + "mul", + "elementwise_add", + "softmax", + "pylayer", + "reduce_mean", + ] + + (x, hidden, y, loss), program = self.run_net(self.net) + self.check_prune_target_not_list( + program, loss, ops_before_pruned, ops_after_pruned + ) + + def test_prune_target_none(self): + ops_before_pruned = [ + "mul", + "elementwise_add", + "softmax", + "pylayer", + "reduce_mean", + ] + + (x, hidden, y, loss), program = self.run_net(self.net) + self.check_prune_target_none(program, ops_before_pruned) + + +def net_with_weight1(): + def forward_fn(x): + y = 3 * x + return y + + def backward_fn(dy): + grad = paddle.exp(dy) + return grad + + x = paddle.static.data(name='x', shape=[-1, 2], dtype='float32') + x.desc.set_need_check_feed(False) + label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64") + label.desc.set_need_check_feed(False) + w_param_attrs = base.ParamAttr( + name="fc_weight", + learning_rate=0.5, + initializer=paddle.nn.initializer.Constant(1.0), + trainable=True, + ) + + y = paddle.static.nn.static_pylayer(forward_fn, [x], backward_fn) + hidden = paddle.static.nn.fc( + x=[y], size=4, activation="softmax", weight_attr=w_param_attrs + ) + loss1 = paddle.nn.functional.cross_entropy( + input=hidden, label=label, reduction='none', use_softmax=False + ) + loss1 = paddle.mean(x=loss1) + loss2 = paddle.nn.functional.cross_entropy( + input=hidden, label=label, reduction='none', use_softmax=False + ) + loss2 = paddle.mean(x=loss2) + loss1.persistable = True + loss2.persistable = True + + return x, hidden, label, loss1, loss2, w_param_attrs + + +def net_with_weight2(): + def forward_fn(x): + y = 3 * x + return y + + def backward_fn(dy): + grad = paddle.exp(dy) + return grad + + x1 = paddle.static.data(name='x1', shape=[-1, 2], dtype='float32') + x1.desc.set_need_check_feed(False) + x2 = paddle.static.data(name='x2', shape=[-1, 2], dtype='float32') + x2.desc.set_need_check_feed(False) + label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64") + label.desc.set_need_check_feed(False) + w1_param_attrs = base.ParamAttr( + name="fc_weight1", + learning_rate=0.5, + initializer=paddle.nn.initializer.Constant(1.0), + trainable=True, + ) + w2_param_attrs = base.ParamAttr( + name="fc_weight2", + learning_rate=0.5, + initializer=paddle.nn.initializer.Constant(1.0), + trainable=True, + ) + + y1 = paddle.static.nn.static_pylayer(forward_fn, [x1], backward_fn) + hidden1 = paddle.static.nn.fc( + x=[y1], size=4, activation="softmax", weight_attr=w1_param_attrs + ) + y2 = paddle.static.nn.static_pylayer(forward_fn, [x2], backward_fn) + hidden2 = paddle.static.nn.fc( + x=[y2], size=4, activation="softmax", weight_attr=w2_param_attrs + ) + + loss1 = paddle.nn.functional.cross_entropy( + input=hidden1, label=label, reduction='none', use_softmax=False + ) + loss1 = paddle.mean(x=loss1) + loss2 = paddle.nn.functional.cross_entropy( + input=hidden2, label=label, reduction='none', use_softmax=False + ) + loss2 = paddle.mean(x=loss2) + loss1.persistable = True + loss2.persistable = True + + return x1, x2, y1, y2, label, loss1, loss2, w1_param_attrs, w2_param_attrs + + +class TestStaticPyLayerExecutorAutoPrune(TestExecutorRunAutoPrune): + def setUp(self): + paddle.enable_static() + self.net1 = net_with_weight1 + self.net2 = net_with_weight2 + + if __name__ == '__main__': unittest.main()