From 87a25fbda08c67cb23d15f22d820f2024e2f2d8c Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Mon, 26 Sep 2022 14:40:51 +0800 Subject: [PATCH] Enable eager mode on xpu (#46227) * enable eager mode on xpu, test=kunlun * add numpy support to xpu * fix tensor using error * fix error, test=kunlun * fix failed tests, test=kunlun --- paddle/fluid/pybind/eager_method.cc | 27 +++++++++++++++++++ paddle/phi/api/include/tensor.h | 8 ++++++ paddle/phi/api/lib/tensor.cc | 2 ++ python/paddle/fluid/framework.py | 10 +++---- .../unittests/xpu/test_dropout_op_xpu.py | 18 ++++++++----- .../xpu/test_fused_gemm_epilogue_op_xpu.py | 19 ++++++------- .../test_fused_resnet_basic_block_op_xpu.py | 1 + python/paddle/incubate/xpu/resnet_block.py | 4 +-- 8 files changed, 66 insertions(+), 23 deletions(-) diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc index 04820bdb30e7d..3521a9d5399e4 100644 --- a/paddle/fluid/pybind/eager_method.cc +++ b/paddle/fluid/pybind/eager_method.cc @@ -215,6 +215,33 @@ static PyObject* tensor_method_numpy(TensorObject* self, kind); } #endif +#if defined(PADDLE_WITH_XPU) + } else if (self->tensor.is_xpu()) { + platform::CPUPlace place; + if (self->tensor.is_selected_rows()) { + VLOG(6) << "Getting SelectedRows's numpy value"; + auto* selected_rows = + static_cast(self->tensor.impl().get()); + auto* dense_tensor = static_cast( + selected_rows->mutable_value()); + paddle::memory::Copy( + place, + reinterpret_cast(pybind11::detail::array_proxy(array)->data), + dense_tensor->place(), + dense_tensor->data(), + sizeof_dtype * numel); + } else { + VLOG(6) << "Getting DenseTensor's numpy value"; + auto dense_tensor = + std::dynamic_pointer_cast(self->tensor.impl()); + paddle::memory::Copy( + place, + reinterpret_cast(pybind11::detail::array_proxy(array)->data), + dense_tensor->place(), + dense_tensor->data(), + sizeof_dtype * numel); + } +#endif #ifdef PADDLE_WITH_CUSTOM_DEVICE } else if (self->tensor.is_custom_device()) { if (self->tensor.is_selected_rows()) { diff --git a/paddle/phi/api/include/tensor.h b/paddle/phi/api/include/tensor.h index 67cedaf6710ab..87ab05a2dcc90 100644 --- a/paddle/phi/api/include/tensor.h +++ b/paddle/phi/api/include/tensor.h @@ -286,6 +286,14 @@ class PADDLE_API Tensor final { */ bool is_gpu_pinned() const; + /** + * @brief Determine whether the tensor device is XPU + * + * @return true + * @return false + */ + bool is_xpu() const; + /** * @brief Determine whether the tensor device is CustomDevice * diff --git a/paddle/phi/api/lib/tensor.cc b/paddle/phi/api/lib/tensor.cc index 70ee28bc2561e..dfc5c427df7ec 100644 --- a/paddle/phi/api/lib/tensor.cc +++ b/paddle/phi/api/lib/tensor.cc @@ -157,6 +157,8 @@ bool Tensor::is_gpu_pinned() const { return paddle::platform::is_cuda_pinned_place(place()); } +bool Tensor::is_xpu() const { return paddle::platform::is_xpu_place(place()); } + bool Tensor::is_custom_device() const { return paddle::platform::is_custom_place(place()); } diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index d4882905d57ba..1bb77f38372e6 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -181,9 +181,9 @@ def _fallback_legacy_dygraph(): global _in_eager_mode_ global _is_first_import_ need_fallback = False - # Only enable eager on CPU/GPU - is_not_support = core.is_compiled_with_xpu() or core.is_compiled_with_npu( - ) or core.is_compiled_with_ipu() or core.is_compiled_with_mlu() + # Only enable eager on CPU/GPU/XPU + is_not_support = core.is_compiled_with_npu() or core.is_compiled_with_ipu( + ) or core.is_compiled_with_mlu() if _in_eager_mode_ and is_not_support: # switch into legacy dygraph mode @@ -245,8 +245,8 @@ def _non_static_mode(): @signature_safe_contextmanager def _test_eager_guard(place=None): - # FIXME(dev): We haven't fully verified eager mode on XPU/NPU et.al but - # only GPU/CPU. Remove this after we improve this feature. + # FIXME(dev): We haven't fully verified eager mode on NPU et.al but + # only GPU/CPU/XPU. Remove this after we improve this feature. already_fallback = _fallback_legacy_dygraph() if not already_fallback: _disable_legacy_dygraph() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_dropout_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_dropout_op_xpu.py index 3227d76a64222..6a1f7ba2161a4 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_dropout_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_dropout_op_xpu.py @@ -19,6 +19,7 @@ import unittest import numpy as np import paddle.fluid.core as core +from paddle import _legacy_C_ops from op_test import OpTest, skip_check_grad_ci import paddle import paddle.fluid as fluid @@ -185,7 +186,8 @@ def test_backward_downscale_in_infer(self): input = paddle.uniform([40, 40], dtype=self.in_type) input.stop_gradient = False - out, mask = core.ops.dropout(input, 'dropout_prob', 0.5) + out, mask = _legacy_C_ops.dropout(input, 'dropout_prob', + 0.5) out.backward() np.testing.assert_allclose( @@ -199,9 +201,10 @@ def test_backward_upscale_train(self): prob = 0.5 input = paddle.uniform([40, 40], dtype=self.in_type) input.stop_gradient = False - out, mask = core.ops.dropout(input, 'dropout_prob', prob, - "dropout_implementation", - "upscale_in_train") + out, mask = _legacy_C_ops.dropout(input, 'dropout_prob', + prob, + "dropout_implementation", + "upscale_in_train") out.backward() np.testing.assert_allclose( @@ -215,9 +218,10 @@ def test_backward_upscale_train_2(self): prob = 0.3 input = paddle.uniform([40, 40], dtype=self.in_type) input.stop_gradient = False - out, mask = core.ops.dropout(input, 'dropout_prob', prob, - "dropout_implementation", - "upscale_in_train") + out, mask = _legacy_C_ops.dropout(input, 'dropout_prob', + prob, + "dropout_implementation", + "upscale_in_train") out.backward() np.testing.assert_allclose( diff --git a/python/paddle/fluid/tests/unittests/xpu/test_fused_gemm_epilogue_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_fused_gemm_epilogue_op_xpu.py index 2e1d5848e6c6c..35b2b57372760 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_fused_gemm_epilogue_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_fused_gemm_epilogue_op_xpu.py @@ -21,6 +21,7 @@ import numpy as np import paddle import paddle.fluid.core as core +from paddle import _legacy_C_ops from op_test_xpu import XPUOpTest from xpu.get_test_cover_info import create_test_class, get_xpu_op_support_types, XPUOpTestWrapper @@ -251,15 +252,15 @@ def test_case_act(self): x.stop_gradient = False y.stop_gradient = False - out1 = core.ops.fused_gemm_epilogue(x, y, bias, 'trans_x', False, - 'trans_y', False, 'activation', - 'none') - out2 = core.ops.fused_gemm_epilogue(x, y, bias, 'trans_x', False, - 'trans_y', False, 'activation', - 'relu') - out3 = core.ops.fused_gemm_epilogue(x, y, bias, 'trans_x', False, - 'trans_y', False, 'activation', - 'gelu') + out1 = _legacy_C_ops.fused_gemm_epilogue(x, y, bias, 'trans_x', False, + 'trans_y', False, 'activation', + 'none') + out2 = _legacy_C_ops.fused_gemm_epilogue(x, y, bias, 'trans_x', False, + 'trans_y', False, 'activation', + 'relu') + out3 = _legacy_C_ops.fused_gemm_epilogue(x, y, bias, 'trans_x', False, + 'trans_y', False, 'activation', + 'gelu') out_np1 = get_output(x_np, y_np, bias_np, 'none') out_np2 = get_output(x_np, y_np, bias_np, 'relu') diff --git a/python/paddle/fluid/tests/unittests/xpu/test_fused_resnet_basic_block_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_fused_resnet_basic_block_op_xpu.py index 4e70975a3622e..4afa3725f2f7d 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_fused_resnet_basic_block_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_fused_resnet_basic_block_op_xpu.py @@ -24,6 +24,7 @@ import paddle.fluid as fluid import paddle.nn as nn from paddle.fluid import core +from paddle import _legacy_C_ops from paddle.incubate.xpu.resnet_block import ResNetBasicBlock from paddle.fluid.framework import default_main_program from xpu.get_test_cover_info import create_test_class, get_xpu_op_support_types, XPUOpTestWrapper diff --git a/python/paddle/incubate/xpu/resnet_block.py b/python/paddle/incubate/xpu/resnet_block.py index 39b439730759c..2a93821c38c41 100644 --- a/python/paddle/incubate/xpu/resnet_block.py +++ b/python/paddle/incubate/xpu/resnet_block.py @@ -71,7 +71,7 @@ def resnet_basic_block(x, trainable_statistics=False, find_conv_max=True): - if fluid.framework.in_dygraph_mode(): + if fluid.framework._non_static_mode(): attrs = ('stride1', stride1, 'stride2', stride2, 'stride3', stride3, 'padding1', padding1, 'padding2', padding2, 'padding3', padding3, 'dilation1', dilation1, 'dilation2', dilation2, @@ -83,7 +83,7 @@ def resnet_basic_block(x, find_conv_max) out, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _ = \ - getattr(_C_ops, "resnet_basic_block")(x, filter1, scale1, bias1, mean1, var1, filter2, scale2, bias2, mean2, var2, \ + _legacy_C_ops.resnet_basic_block(x, filter1, scale1, bias1, mean1, var1, filter2, scale2, bias2, mean2, var2, \ filter3, scale3, bias3, mean3, var3, mean1, var1, mean2, var2, mean3, var3, *attrs) return out helper = LayerHelper('resnet_basic_block', **locals())