From bce9b3ba83fa0a49ef41f8121c8cb9c58f6c54ab Mon Sep 17 00:00:00 2001 From: wangruting Date: Wed, 30 Aug 2023 01:44:07 +0000 Subject: [PATCH 1/7] tmp --- .../paddle_dialect/ir/pd_manual_op_vjp.cc | 91 +++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 paddle/fluid/ir/dialect/paddle_dialect/ir/pd_manual_op_vjp.cc diff --git a/paddle/fluid/ir/dialect/paddle_dialect/ir/pd_manual_op_vjp.cc b/paddle/fluid/ir/dialect/paddle_dialect/ir/pd_manual_op_vjp.cc new file mode 100644 index 0000000000000..c13ed627c42cd --- /dev/null +++ b/paddle/fluid/ir/dialect/paddle_dialect/ir/pd_manual_op_vjp.cc @@ -0,0 +1,91 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/ir/dialect/paddle_dialect/ir/pd_attribute.h" +#include "paddle/fluid/ir/dialect/paddle_dialect/ir/pd_op.h" +#include "paddle/fluid/primitive/rule/vjp/vjp.h" +#include "paddle/fluid/primitive/type/lazy_tensor.h" +#include "paddle/ir/core/builtin_op.h" +#include "paddle/ir/core/op_base.h" +#include "paddle/phi/common/int_array.h" + +// TODO(wanghao107) +// this file will be generated in pd_op.cc + +namespace paddle { +namespace dialect { +using IntArray = paddle::experimental::IntArray; + +std::vector> SumOp::Vjp( + ir::Operation* op, + const std::vector>& out_grads, + const std::vector>& stop_gradients) { + SumOp op_obj = op->dyn_cast(); + Tensor x(std::make_shared(op_obj.x())); + Tensor out_grad(std::make_shared(out_grads[0][0])); + + IntArray axis = op_obj.axis() + .GetDefiningOp() + ->attribute("value") + .dyn_cast() + .data(); + bool keepdim = op->attribute("keepdim").dyn_cast().data(); + bool reduce_all = false; + std::vector> tensor_res = primitive::sum_vjp( + x, out_grad, axis, keepdim, reduce_all, stop_gradients); + std::vector> res(2, std::vector(1)); + if (tensor_res[0][0].defined()) { + res[0][0] = + std::static_pointer_cast(tensor_res[0][0].impl()) + ->getValue() + .dyn_cast(); + } + return res; +} + +// std::vector> SplitOp::Vjp( +// ir::Operation* op, +// const std::vector>& out_grads, +// const std::vector>& stop_gradients) { +// SplitOp op_obj = op->dyn_cast(); + +// Tensor axis(std::make_shared(op_obj.axis())); +// std::vector out_grads_; +// for (size_t idx = 0; idx < out_grads[0].size(); idx++) { +// out_grads_.emplace_back( +// std::make_shared(out_grads[0][idx])); +// } + +// std::vector> tensor_res = +// primitive::split_vjp(out_grads_, axis, stop_gradients); + +// std::vector> res(tensor_res.size(), +// std::vector()); + +// for (uint64_t i = 0; i < tensor_res.size(); i++) { +// res[i].resize(tensor_res[i].size()); +// for (uint64_t j = 0; j < tensor_res[i].size(); j++) { +// if (tensor_res[i][j].defined()) { +// res[i][j] = std::static_pointer_cast( +// tensor_res[i][j].impl()) +// ->getValue() +// .dyn_cast(); +// } +// } +// } +// return res; +// } + +} // namespace dialect +} // namespace paddle From d52fe877d705700471ecec174763dc68e1c8e12f Mon Sep 17 00:00:00 2001 From: 0x45f Date: Sun, 8 Oct 2023 09:31:48 +0000 Subject: [PATCH 2/7] [PIR]Migrate maximum into pir --- python/paddle/pir_utils.py | 15 +++++++++++++++ python/paddle/tensor/math.py | 2 +- test/legacy_test/test_maximum_op.py | 2 ++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/python/paddle/pir_utils.py b/python/paddle/pir_utils.py index 28d261b0155fc..8d8657f524e8b 100644 --- a/python/paddle/pir_utils.py +++ b/python/paddle/pir_utils.py @@ -13,6 +13,8 @@ # limitations under the License. +from functools import wraps + import paddle @@ -95,3 +97,16 @@ def _switch_to_old_ir(self): "IrGuard._switch_to_old_ir only work when paddle.framework.in_pir_mode() is false, \ please set FLAGS_enable_pir_api = false" ) + + +def test_with_pir_api(): + def decorator(func): + @wraps(func) + def impl(*args, **kwargs): + func(*args, **kwargs) + with IrGuard(): + func(*args, **kwargs) + + return impl + + return decorator diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py index d56bb8ab0768b..ec1cd6376dd5b 100644 --- a/python/paddle/tensor/math.py +++ b/python/paddle/tensor/math.py @@ -1226,7 +1226,7 @@ def maximum(x, y, name=None): Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True, [5. , 3. , inf.]) """ - if in_dynamic_mode(): + if in_dynamic_or_pir_mode(): return _C_ops.maximum(x, y) else: return _elementwise_op(LayerHelper('elementwise_max', **locals())) diff --git a/test/legacy_test/test_maximum_op.py b/test/legacy_test/test_maximum_op.py index 818bdb65fee68..3cd8f39a57f11 100644 --- a/test/legacy_test/test_maximum_op.py +++ b/test/legacy_test/test_maximum_op.py @@ -18,6 +18,7 @@ import paddle from paddle.base import core +from paddle.pir_utils import test_with_pir_api class ApiMaximumTest(unittest.TestCase): @@ -39,6 +40,7 @@ def setUp(self): self.np_expected3 = np.maximum(self.input_a, self.input_c) self.np_expected4 = np.maximum(self.input_b, self.input_c) + @test_with_pir_api def test_static_api(self): paddle.enable_static() with paddle.static.program_guard( From 9e5a0b1437d22295dbf37250cc6f98642576ae18 Mon Sep 17 00:00:00 2001 From: 0x45f Date: Mon, 9 Oct 2023 02:48:38 +0000 Subject: [PATCH 3/7] Polish code --- python/paddle/pir_utils.py | 15 ++++++--------- test/legacy_test/test_maximum_op.py | 4 ++++ 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/python/paddle/pir_utils.py b/python/paddle/pir_utils.py index 8d8657f524e8b..f16d411262a22 100644 --- a/python/paddle/pir_utils.py +++ b/python/paddle/pir_utils.py @@ -99,14 +99,11 @@ def _switch_to_old_ir(self): ) -def test_with_pir_api(): - def decorator(func): - @wraps(func) - def impl(*args, **kwargs): +def test_with_pir_api(func): + @wraps(func) + def impl(*args, **kwargs): + func(*args, **kwargs) + with IrGuard(): func(*args, **kwargs) - with IrGuard(): - func(*args, **kwargs) - return impl - - return decorator + return impl diff --git a/test/legacy_test/test_maximum_op.py b/test/legacy_test/test_maximum_op.py index 3cd8f39a57f11..a0e660112bd03 100644 --- a/test/legacy_test/test_maximum_op.py +++ b/test/legacy_test/test_maximum_op.py @@ -121,3 +121,7 @@ def test_dynamic_api(self): res = paddle.maximum(b, c) res = res.numpy() np.testing.assert_allclose(res, self.np_expected4, rtol=1e-05) + + +if __name__ == '__main__': + unittest.main() From 2218be272f341471e15a03e9138f1598af72e6ec Mon Sep 17 00:00:00 2001 From: wangruting Date: Mon, 9 Oct 2023 07:58:25 +0000 Subject: [PATCH 4/7] add ir_grad of static_gradient --- python/paddle/base/backward.py | 58 +++++++++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 4 deletions(-) diff --git a/python/paddle/base/backward.py b/python/paddle/base/backward.py index 6d30823d4bf4a..2190eca8ff9d3 100755 --- a/python/paddle/base/backward.py +++ b/python/paddle/base/backward.py @@ -2060,11 +2060,11 @@ def append_backward( block, [loss], [], block_no_grad_set, op_path_dict ) - no_grad_vars = _find_no_grad_vars( + no_grad_set = _find_no_grad_vars( block, op_path, [loss], block_no_grad_set ) - block_no_grad_set.update(no_grad_vars) + block_no_grad_set.update(no_grad_set) no_grad_dict[block_idx].update( list(map(_append_grad_suffix_, block_no_grad_set)) ) @@ -2510,10 +2510,10 @@ def calc_gradient_helper( block.program._sync_with_cpp() # find no grad var by op_path - no_grad_vars = _find_no_grad_vars( + no_grad_set = _find_no_grad_vars( block, op_path, tmp_targets, block_no_grad_set ) - block_no_grad_set.update(no_grad_vars) + block_no_grad_set.update(no_grad_set) no_grad_dict[0].update(list(map(_append_grad_suffix_, block_no_grad_set))) grad_to_var = dict() @@ -2636,6 +2636,56 @@ def gradients(targets, inputs, target_gradients=None, no_grad_set=None): >>> print(z) [var x@GRAD : LOD_TENSOR.shape(-1, 2, 8, 8).dtype(float32).stop_gradient(False)] """ + if framework.in_pir_mode(): + check_type( + outputs, + 'outputs', + ((paddle.pir.Value, paddle.pir.OpResult), list, tuple), + 'paddle.autograd.ir_backward.grad', + ) + check_type( + inputs, + 'inputs', + ((paddle.pir.Value, paddle.pir.OpResult), list, tuple), + 'paddle.autograd.ir_backward.grad', + ) + check_type( + grad_outputs, + 'grad_outputs', + ((paddle.pir.Value, paddle.pir.OpResult), list, tuple, type(None)), + 'paddle.autograd.ir_backward.grad', + ) + + check_type( + no_grad_set, + 'no_grad_set', + ( + (paddle.pir.Value, paddle.pir.OpResult), + list, + tuple, + set, + type(None), + ), + 'paddle.autograd.ir_backward.grad', + ) + outputs = _as_list(outputs) + inputs = _as_list(inputs) + grad_outputs = _as_list(grad_outputs) + if no_grad_set is None: + no_grad_set = set() + elif no_grad_set is not set: + no_grad_set = set(no_grad_set) + else: + no_grad_set = no_grad_set + from paddle.autograd.ir_backward import ( + calc_gradient as pir_calc_gradient, + ) + + input_grad = pir_calc_gradient( + outputs, inputs, grad_outputs, no_grad_set + ) + return input_grad + check_type( targets, 'targets', From 02040b1b3d163907cd20a67be2accde392abee47 Mon Sep 17 00:00:00 2001 From: wangruting Date: Mon, 9 Oct 2023 08:48:20 +0000 Subject: [PATCH 5/7] add test --- test/legacy_test/test_calc_gradient.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/legacy_test/test_calc_gradient.py b/test/legacy_test/test_calc_gradient.py index 945acf18bb932..7a1754f8f0ccd 100644 --- a/test/legacy_test/test_calc_gradient.py +++ b/test/legacy_test/test_calc_gradient.py @@ -85,7 +85,11 @@ def test2(self): self.assertEqual(12, out[0]) +from paddle.pir_utils import test_with_pir_api + + class TestGradientWithPrune(unittest.TestCase): + @test_with_pir_api def test_prune(self): with paddle.base.scope_guard(paddle.static.Scope()): x = paddle.static.data(name='x', shape=[3], dtype='float32') From 615c4876f1a3149e4196f1f6bf1e0ee118b956bc Mon Sep 17 00:00:00 2001 From: wangruting Date: Mon, 9 Oct 2023 11:57:02 +0000 Subject: [PATCH 6/7] modify bug --- python/paddle/base/backward.py | 14 +++++++------- python/paddle/pir_utils.py | 17 +++++++++++++++-- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/python/paddle/base/backward.py b/python/paddle/base/backward.py index 2190eca8ff9d3..4c3c158924de2 100755 --- a/python/paddle/base/backward.py +++ b/python/paddle/base/backward.py @@ -2638,8 +2638,8 @@ def gradients(targets, inputs, target_gradients=None, no_grad_set=None): """ if framework.in_pir_mode(): check_type( - outputs, - 'outputs', + targets, + 'targets', ((paddle.pir.Value, paddle.pir.OpResult), list, tuple), 'paddle.autograd.ir_backward.grad', ) @@ -2650,8 +2650,8 @@ def gradients(targets, inputs, target_gradients=None, no_grad_set=None): 'paddle.autograd.ir_backward.grad', ) check_type( - grad_outputs, - 'grad_outputs', + target_gradients, + 'target_gradients', ((paddle.pir.Value, paddle.pir.OpResult), list, tuple, type(None)), 'paddle.autograd.ir_backward.grad', ) @@ -2668,9 +2668,9 @@ def gradients(targets, inputs, target_gradients=None, no_grad_set=None): ), 'paddle.autograd.ir_backward.grad', ) - outputs = _as_list(outputs) + targets = _as_list(targets) inputs = _as_list(inputs) - grad_outputs = _as_list(grad_outputs) + target_gradients = _as_list(target_gradients) if no_grad_set is None: no_grad_set = set() elif no_grad_set is not set: @@ -2682,7 +2682,7 @@ def gradients(targets, inputs, target_gradients=None, no_grad_set=None): ) input_grad = pir_calc_gradient( - outputs, inputs, grad_outputs, no_grad_set + targets, inputs, target_gradients, no_grad_set ) return input_grad diff --git a/python/paddle/pir_utils.py b/python/paddle/pir_utils.py index f16d411262a22..2c3eef3197c23 100644 --- a/python/paddle/pir_utils.py +++ b/python/paddle/pir_utils.py @@ -66,9 +66,17 @@ def _switch_to_pir(self): {"FLAGS_enable_new_ir_in_executor": True} ) paddle.pir.register_paddle_dialect() - paddle.static.Program = paddle.pir.Program + paddle.base.Program = paddle.pir.Program paddle.base.program_guard = paddle.pir.core.program_guard + paddle.base.default_main_program = ( + paddle.pir.core.default_main_program + ) + paddle.base.default_startup_program = ( + paddle.pir.core.default_startup_program + ) + + paddle.static.Program = paddle.pir.Program paddle.static.program_guard = paddle.pir.core.program_guard paddle.static.default_main_program = ( paddle.pir.core.default_main_program @@ -84,9 +92,14 @@ def _switch_to_old_ir(self): paddle.framework.set_flags( {"FLAGS_enable_new_ir_in_executor": False} ) - paddle.static.Program = self.old_Program + paddle.base.Program = self.old_Program paddle.base.program_guard = self.old_program_guard + paddle.base.default_main_program = self.old_default_main_program + paddle.base.default_startup_program = ( + self.old_default_startup_program + ) + paddle.static.Program = self.old_Program paddle.static.program_guard = self.old_program_guard paddle.static.default_main_program = self.old_default_main_program paddle.static.default_startup_program = ( From b48c16349edd9458e588d9ef52b9e36647bf5adb Mon Sep 17 00:00:00 2001 From: wangruting Date: Tue, 10 Oct 2023 01:48:31 +0000 Subject: [PATCH 7/7] modify --- python/paddle/pir_utils.py | 21 ++++++++++----------- test/legacy_test/test_calc_gradient.py | 4 ++-- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/python/paddle/pir_utils.py b/python/paddle/pir_utils.py index 2c3eef3197c23..a2b5244cad7c5 100644 --- a/python/paddle/pir_utils.py +++ b/python/paddle/pir_utils.py @@ -69,13 +69,12 @@ def _switch_to_pir(self): paddle.base.Program = paddle.pir.Program paddle.base.program_guard = paddle.pir.core.program_guard - paddle.base.default_main_program = ( - paddle.pir.core.default_main_program - ) - paddle.base.default_startup_program = ( - paddle.pir.core.default_startup_program - ) - + # paddle.base.default_main_program = ( + # paddle.pir.core.default_main_program + # ) + # paddle.base.default_startup_program = ( + # paddle.pir.core.default_startup_program + # ) paddle.static.Program = paddle.pir.Program paddle.static.program_guard = paddle.pir.core.program_guard paddle.static.default_main_program = ( @@ -95,10 +94,10 @@ def _switch_to_old_ir(self): paddle.base.Program = self.old_Program paddle.base.program_guard = self.old_program_guard - paddle.base.default_main_program = self.old_default_main_program - paddle.base.default_startup_program = ( - self.old_default_startup_program - ) + # paddle.base.default_main_program = self.old_default_main_program + # paddle.base.default_startup_program = ( + # self.old_default_startup_program + # ) paddle.static.Program = self.old_Program paddle.static.program_guard = self.old_program_guard paddle.static.default_main_program = self.old_default_main_program diff --git a/test/legacy_test/test_calc_gradient.py b/test/legacy_test/test_calc_gradient.py index 7a1754f8f0ccd..41f3772260c77 100644 --- a/test/legacy_test/test_calc_gradient.py +++ b/test/legacy_test/test_calc_gradient.py @@ -99,8 +99,8 @@ def test_prune(self): x1_grad = base.gradients(y, x) exe = base.Executor(base.CPUPlace()) - main = base.default_main_program() - exe.run(base.default_startup_program()) + main = paddle.static.default_main_program() + exe.run(paddle.static.default_startup_program()) out = exe.run( main, feed={'x': np.ones([3]).astype('float32')},