diff --git a/paddle/fluid/distributed/collective/process_group.h b/paddle/fluid/distributed/collective/process_group.h index 8767dfa60cf181..c8bb357739881b 100644 --- a/paddle/fluid/distributed/collective/process_group.h +++ b/paddle/fluid/distributed/collective/process_group.h @@ -20,12 +20,12 @@ #include #include +#include "paddle/common/errors.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/device_context.h" #include "paddle/phi/core/distributed/types.h" #include "paddle/phi/core/distributed/utils.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" constexpr auto kWaitTimeout = std::chrono::milliseconds(0); diff --git a/paddle/fluid/distributed/collective/process_group_bkcl.cc b/paddle/fluid/distributed/collective/process_group_bkcl.cc index 81f52bc97f3342..f38b3e525eefcb 100644 --- a/paddle/fluid/distributed/collective/process_group_bkcl.cc +++ b/paddle/fluid/distributed/collective/process_group_bkcl.cc @@ -14,6 +14,7 @@ #include "paddle/fluid/distributed/collective/process_group_bkcl.h" +#include "paddle/common/errors.h" #include "paddle/fluid/distributed/collective/bkcl_tools.h" #include "paddle/fluid/distributed/collective/common.h" #include "paddle/fluid/framework/convert_utils.h" @@ -23,7 +24,6 @@ #include "paddle/phi/core/device_context.h" #include "paddle/phi/core/distributed/check/static_check.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" namespace paddle { namespace distributed { diff --git a/paddle/fluid/distributed/collective/process_group_with_stream.h b/paddle/fluid/distributed/collective/process_group_with_stream.h index 0cea9bb3ed87e6..58d1a042fec3c8 100644 --- a/paddle/fluid/distributed/collective/process_group_with_stream.h +++ b/paddle/fluid/distributed/collective/process_group_with_stream.h @@ -14,9 +14,9 @@ #pragma once +#include "paddle/common/errors.h" #include "paddle/fluid/distributed/collective/process_group.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" namespace paddle { namespace distributed { diff --git a/paddle/fluid/distributed/collective/process_group_without_stream.h b/paddle/fluid/distributed/collective/process_group_without_stream.h index dd22c0f1e4cbdb..a3c103574cbc5a 100644 --- a/paddle/fluid/distributed/collective/process_group_without_stream.h +++ b/paddle/fluid/distributed/collective/process_group_without_stream.h @@ -14,9 +14,9 @@ #pragma once +#include "paddle/common/errors.h" #include "paddle/fluid/distributed/collective/process_group.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" namespace paddle { namespace distributed { diff --git a/paddle/fluid/distributed/fleet_executor/compute_interceptor.cc b/paddle/fluid/distributed/fleet_executor/compute_interceptor.cc index 7817b9bc0e9dfe..4190019e0d1738 100644 --- a/paddle/fluid/distributed/fleet_executor/compute_interceptor.cc +++ b/paddle/fluid/distributed/fleet_executor/compute_interceptor.cc @@ -14,12 +14,12 @@ #include "paddle/fluid/distributed/fleet_executor/compute_interceptor.h" +#include "paddle/common/errors.h" #include "paddle/fluid/distributed/fleet_executor/carrier.h" #include "paddle/fluid/distributed/fleet_executor/task_node.h" #include "paddle/fluid/framework/executor_gc_helper.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/jit/serializer.h" -#include "paddle/phi/core/errors.h" namespace paddle { namespace distributed { diff --git a/paddle/fluid/distributed/fleet_executor/cond_interceptor.cc b/paddle/fluid/distributed/fleet_executor/cond_interceptor.cc index 2e3389af5feb59..704dd16400065c 100644 --- a/paddle/fluid/distributed/fleet_executor/cond_interceptor.cc +++ b/paddle/fluid/distributed/fleet_executor/cond_interceptor.cc @@ -14,13 +14,13 @@ #include "paddle/fluid/distributed/fleet_executor/cond_interceptor.h" #include +#include "paddle/common/errors.h" #include "paddle/fluid/distributed/fleet_executor/task_node.h" #include "paddle/fluid/framework/executor_gc_helper.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/platform/errors.h" #include "paddle/fluid/platform/place.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/errors.h" namespace paddle { namespace distributed { diff --git a/paddle/fluid/distributed/fleet_executor/start_interceptor.cc b/paddle/fluid/distributed/fleet_executor/start_interceptor.cc index 830f619ed3c00c..1fe4aaea15fc4d 100644 --- a/paddle/fluid/distributed/fleet_executor/start_interceptor.cc +++ b/paddle/fluid/distributed/fleet_executor/start_interceptor.cc @@ -14,9 +14,9 @@ #include "paddle/fluid/distributed/fleet_executor/start_interceptor.h" +#include "paddle/common/errors.h" #include "paddle/fluid/distributed/fleet_executor/task_node.h" #include "paddle/fluid/framework/operator.h" -#include "paddle/phi/core/errors.h" namespace paddle { namespace distributed { diff --git a/paddle/fluid/framework/ir/auto_mixed_precision_pass.cc b/paddle/fluid/framework/ir/auto_mixed_precision_pass.cc index d29ef0f9ad1fad..61080c52c94bac 100644 --- a/paddle/fluid/framework/ir/auto_mixed_precision_pass.cc +++ b/paddle/fluid/framework/ir/auto_mixed_precision_pass.cc @@ -14,6 +14,7 @@ #include "paddle/fluid/framework/ir/auto_mixed_precision_pass.h" +#include "paddle/common/errors.h" #include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/framework/operator.h" #include "paddle/phi/common/bfloat16.h" @@ -21,7 +22,6 @@ #include "paddle/phi/common/place.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #ifdef PADDLE_WITH_CUSTOM_DEVICE #include "paddle/phi/backends/device_manager.h" #endif diff --git a/paddle/fluid/framework/ir/mkldnn/multi_gru_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/multi_gru_fuse_pass.cc index 08aafa4a60a0e7..a1f74d3423006b 100644 --- a/paddle/fluid/framework/ir/mkldnn/multi_gru_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/multi_gru_fuse_pass.cc @@ -16,10 +16,10 @@ #include +#include "paddle/common/errors.h" #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/phi/core/errors.h" #include "paddle/utils/string/pretty_log.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/mkldnn/multi_gru_seq_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/multi_gru_seq_fuse_pass.cc index 697a34904c817e..f9e8722ccf3978 100644 --- a/paddle/fluid/framework/ir/mkldnn/multi_gru_seq_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/multi_gru_seq_fuse_pass.cc @@ -19,11 +19,11 @@ #include #include +#include "paddle/common/errors.h" #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/platform/mkldnn_helper.h" -#include "paddle/phi/core/errors.h" #include "paddle/utils/string/pretty_log.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/trt_support_nhwc_pass.cc b/paddle/fluid/framework/ir/trt_support_nhwc_pass.cc index 5a086acd7cac2e..a59e1be1595036 100644 --- a/paddle/fluid/framework/ir/trt_support_nhwc_pass.cc +++ b/paddle/fluid/framework/ir/trt_support_nhwc_pass.cc @@ -18,13 +18,13 @@ #include #include +#include "paddle/common/errors.h" #include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/data_layout_transform.h" #include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/framework/ir/node.h" #include "paddle/phi/common/data_type.h" #include "paddle/phi/common/layout.h" -#include "paddle/phi/core/errors.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/new_executor/interpreter/job.h b/paddle/fluid/framework/new_executor/interpreter/job.h index 952702d6e2f0a5..1ff08d062d23c6 100644 --- a/paddle/fluid/framework/new_executor/interpreter/job.h +++ b/paddle/fluid/framework/new_executor/interpreter/job.h @@ -16,8 +16,8 @@ #include #include +#include "paddle/common/errors.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/core/macros.h" namespace paddle { diff --git a/paddle/fluid/imperative/layout_autotune.cc b/paddle/fluid/imperative/layout_autotune.cc index 18baaf98fdf11c..7903c212ec90aa 100644 --- a/paddle/fluid/imperative/layout_autotune.cc +++ b/paddle/fluid/imperative/layout_autotune.cc @@ -14,12 +14,12 @@ #include "paddle/fluid/imperative/layout_autotune.h" +#include "paddle/common/errors.h" #include "paddle/fluid/eager/api/utils/global_utils.h" #include "paddle/fluid/framework/op_info.h" #include "paddle/fluid/imperative/layout_transformer.h" #include "paddle/phi/backends/gpu/gpu_info.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" namespace paddle { namespace imperative { diff --git a/paddle/fluid/imperative/layout_transformer.h b/paddle/fluid/imperative/layout_transformer.h index 61bd4f9dfe2b8f..a18207df0260e0 100644 --- a/paddle/fluid/imperative/layout_transformer.h +++ b/paddle/fluid/imperative/layout_transformer.h @@ -13,12 +13,12 @@ // limitations under the License. #pragma once +#include "paddle/common/errors.h" #include "paddle/fluid/framework/framework.pb.h" #include "paddle/fluid/imperative/layout_autotune.h" #include "paddle/fluid/imperative/tracer.h" #include "paddle/fluid/imperative/var_helper.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/core/tensor_utils.h" namespace paddle { namespace imperative { diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc index d3e4ce93ca01e5..5e705b4fb9877a 100644 --- a/paddle/fluid/inference/analysis/ir_pass_manager.cc +++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc @@ -22,13 +22,13 @@ #include #include +#include "paddle/common/errors.h" #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/inference/analysis/argument.h" #include "paddle/fluid/string/pretty_log.h" #include "paddle/phi/common/data_type.h" -#include "paddle/phi/core/errors.h" namespace paddle { namespace inference { diff --git a/paddle/fluid/inference/api/resource_manager.cc b/paddle/fluid/inference/api/resource_manager.cc index 2414aaee1b78b5..2806204f4b9406 100644 --- a/paddle/fluid/inference/api/resource_manager.cc +++ b/paddle/fluid/inference/api/resource_manager.cc @@ -20,6 +20,7 @@ #include #include +#include "paddle/common/errors.h" #include "paddle/fluid/memory/allocation/allocator_facade.h" #include "paddle/fluid/platform/device/gpu/gpu_types.h" #include "paddle/phi/backends/gpu/forwards.h" @@ -28,7 +29,6 @@ #include "paddle/phi/backends/gpu/gpu_resources.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/allocator.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/core/generator.h" #include "unsupported/Eigen/CXX11/Tensor" diff --git a/paddle/fluid/inference/tensorrt/convert/generic_and_custom_plugin_creater.cc b/paddle/fluid/inference/tensorrt/convert/generic_and_custom_plugin_creater.cc index 9f14c8c1b64fb8..e811827a7296c1 100644 --- a/paddle/fluid/inference/tensorrt/convert/generic_and_custom_plugin_creater.cc +++ b/paddle/fluid/inference/tensorrt/convert/generic_and_custom_plugin_creater.cc @@ -12,13 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/common/errors.h" #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/helper.h" #include "paddle/fluid/inference/tensorrt/plugin/generic_plugin.h" #include "paddle/fluid/inference/tensorrt/plugin_arg_mapping_context.h" #include "paddle/phi/api/ext/op_meta_info.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" namespace paddle { namespace inference { diff --git a/paddle/fluid/jit/layer.cc b/paddle/fluid/jit/layer.cc index d6986b51306ebd..0b2e20f77837a2 100644 --- a/paddle/fluid/jit/layer.cc +++ b/paddle/fluid/jit/layer.cc @@ -14,9 +14,9 @@ #include "paddle/fluid/jit/layer.h" +#include "paddle/common/errors.h" #include "paddle/fluid/framework/variable.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #include "paddle/fluid/jit/compilation_unit.h" #include "paddle/fluid/jit/engine/base_engine.h" diff --git a/paddle/fluid/jit/property.cc b/paddle/fluid/jit/property.cc index 9b0c50a954624c..687468df83a3dc 100644 --- a/paddle/fluid/jit/property.cc +++ b/paddle/fluid/jit/property.cc @@ -18,10 +18,10 @@ limitations under the License. */ #include "glog/logging.h" +#include "paddle/common/errors.h" #include "paddle/fluid/framework/variable.h" #include "paddle/fluid/jit/property.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" namespace paddle { namespace jit { diff --git a/paddle/fluid/operators/fused/fused_attention_utils.h b/paddle/fluid/operators/fused/fused_attention_utils.h index 7d17041133bcd7..b198c4a5792912 100644 --- a/paddle/fluid/operators/fused/fused_attention_utils.h +++ b/paddle/fluid/operators/fused/fused_attention_utils.h @@ -23,8 +23,8 @@ PHI_DECLARE_bool(dynamic_static_unified_comm); #endif +#include "paddle/common/errors.h" #include "paddle/phi/core/distributed/comm_context_manager.h" -#include "paddle/phi/core/errors.h" namespace phi { namespace fusion { diff --git a/paddle/fluid/operators/fused/fused_feedforward_op.cu b/paddle/fluid/operators/fused/fused_feedforward_op.cu index ee40633e4252b3..656f8ba6ad0acb 100644 --- a/paddle/fluid/operators/fused/fused_feedforward_op.cu +++ b/paddle/fluid/operators/fused/fused_feedforward_op.cu @@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/common/errors.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/fused/fused_attention_utils.h" #include "paddle/phi/api/include/tensor.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/broadcast_function.h" #include "paddle/phi/kernels/funcs/elementwise_functor.h" diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h index 0d170eae31cfb1..b6fce494f5a740 100644 --- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h +++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h @@ -23,6 +23,7 @@ #include #include +#include "paddle/common/errors.h" #include "paddle/fluid/framework/data_device_transform.h" #include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/op_registry.h" @@ -39,7 +40,6 @@ #include "paddle/phi/common/data_type.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/kernels/cast_kernel.h" #include "paddle/phi/kernels/funcs/data_type_transform.h" #include "paddle/utils/string/string_helper.h" diff --git a/paddle/fluid/pir/transforms/transform_general_functions.h b/paddle/fluid/pir/transforms/transform_general_functions.h index 77c790235b8329..ab279f0ab3a958 100644 --- a/paddle/fluid/pir/transforms/transform_general_functions.h +++ b/paddle/fluid/pir/transforms/transform_general_functions.h @@ -14,9 +14,9 @@ #pragma once +#include "paddle/common/errors.h" #include "paddle/phi/core/ddim.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #include "paddle/pir/core/operation.h" #include "paddle/pir/core/parameter.h" #include "paddle/pir/core/type.h" diff --git a/paddle/fluid/platform/errors.h b/paddle/fluid/platform/errors.h index 758af3e2d9137e..e3c307820f84bb 100644 --- a/paddle/fluid/platform/errors.h +++ b/paddle/fluid/platform/errors.h @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#include "paddle/phi/core/errors.h" +#include "paddle/common/errors.h" namespace paddle { namespace platform { namespace errors = ::phi::errors; diff --git a/paddle/fluid/pybind/eval_frame_tools.cc b/paddle/fluid/pybind/eval_frame_tools.cc index 3b8df99eb2a3f3..da09c2478c02cd 100644 --- a/paddle/fluid/pybind/eval_frame_tools.cc +++ b/paddle/fluid/pybind/eval_frame_tools.cc @@ -18,9 +18,9 @@ #include +#include "paddle/common/errors.h" #include "paddle/fluid/platform/profiler/event_tracing.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" /*============================ Dict Tree ================================*/ diff --git a/paddle/phi/api/lib/context_pool.cc b/paddle/phi/api/lib/context_pool.cc index ee1e21a58e2f1b..1caa57770f54e2 100644 --- a/paddle/phi/api/lib/context_pool.cc +++ b/paddle/phi/api/lib/context_pool.cc @@ -14,10 +14,10 @@ limitations under the License. */ #include "paddle/phi/api/include/context_pool.h" +#include "paddle/common/enforce.h" #include "paddle/phi/backends/context_pool.h" #include "paddle/phi/common/memory_utils.h" #include "paddle/phi/core/allocator.h" -#include "paddle/phi/core/enforce.h" #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #include "paddle/phi/core/cuda_stream.h" diff --git a/paddle/phi/api/lib/op_meta_info.cc b/paddle/phi/api/lib/op_meta_info.cc index 14334aa7c42a6d..20cb6a142e2f03 100644 --- a/paddle/phi/api/lib/op_meta_info.cc +++ b/paddle/phi/api/lib/op_meta_info.cc @@ -20,9 +20,9 @@ limitations under the License. */ #include #include "glog/logging.h" +#include "paddle/common/enforce.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/distributed/auto_parallel/dist_tensor.h" -#include "paddle/phi/core/enforce.h" namespace paddle { diff --git a/paddle/phi/api/lib/scalar.cc b/paddle/phi/api/lib/scalar.cc index 75232adb9be45d..a217c4da021f0a 100644 --- a/paddle/phi/api/lib/scalar.cc +++ b/paddle/phi/api/lib/scalar.cc @@ -14,10 +14,10 @@ limitations under the License. */ #include "paddle/common/scalar.h" +#include "paddle/common/enforce.h" #include "paddle/phi/api/include/tensor.h" #include "paddle/phi/api/lib/tensor_copy.h" #include "paddle/phi/common/place.h" -#include "paddle/phi/core/enforce.h" namespace paddle { namespace experimental { diff --git a/paddle/phi/api/lib/tensor.cc b/paddle/phi/api/lib/tensor.cc index 206d5082e62dd1..edfc76ffe21ab6 100644 --- a/paddle/phi/api/lib/tensor.cc +++ b/paddle/phi/api/lib/tensor.cc @@ -20,14 +20,14 @@ limitations under the License. */ #include "glog/logging.h" +#include "paddle/common/ddim.h" +#include "paddle/common/enforce.h" #include "paddle/phi/api/include/context_pool.h" #include "paddle/phi/api/lib/utils/allocator.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_info.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/distributed/auto_parallel/dist_tensor.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/selected_rows.h" #include "paddle/phi/core/sparse_coo_tensor.h" #include "paddle/phi/core/sparse_csr_tensor.h" diff --git a/paddle/phi/api/profiler/common_event.h b/paddle/phi/api/profiler/common_event.h index 76b9d5fa609b9b..d9e3ed74fd397a 100644 --- a/paddle/phi/api/profiler/common_event.h +++ b/paddle/phi/api/profiler/common_event.h @@ -18,10 +18,10 @@ #include #include +#include "paddle/common/ddim.h" #include "paddle/phi/api/profiler/event.h" // import EventRole, TODO(TIEXING): remove later #include "paddle/phi/api/profiler/trace_event.h" #include "paddle/phi/core/attribute.h" -#include "paddle/phi/core/ddim.h" namespace phi { diff --git a/paddle/phi/api/profiler/device_tracer.cc b/paddle/phi/api/profiler/device_tracer.cc index e294130da7bab8..8f5c1c79cb1a1a 100644 --- a/paddle/phi/api/profiler/device_tracer.cc +++ b/paddle/phi/api/profiler/device_tracer.cc @@ -22,7 +22,7 @@ limitations under the License. */ #include // NOLINT #include "glog/logging.h" -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" #include "paddle/utils/flags.h" PD_DECLARE_bool(enable_host_event_recorder_hook); diff --git a/paddle/phi/api/profiler/profiler.cc b/paddle/phi/api/profiler/profiler.cc index 6dc419658d3c27..7a0d819a257267 100644 --- a/paddle/phi/api/profiler/profiler.cc +++ b/paddle/phi/api/profiler/profiler.cc @@ -22,12 +22,12 @@ limitations under the License. */ #include "glog/logging.h" +#include "paddle/common/enforce.h" #include "paddle/phi/api/profiler/common_event.h" #include "paddle/phi/api/profiler/device_tracer.h" #include "paddle/phi/api/profiler/host_event_recorder.h" #include "paddle/phi/api/profiler/host_tracer.h" #include "paddle/phi/api/profiler/profiler_helper.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/os_info.h" #ifdef PADDLE_WITH_CUDA #include "paddle/phi/backends/dynload/nvtx.h" diff --git a/paddle/phi/api/profiler/supplement_tracing.h b/paddle/phi/api/profiler/supplement_tracing.h index e93ad63b607ade..fc20f041ec02a7 100644 --- a/paddle/phi/api/profiler/supplement_tracing.h +++ b/paddle/phi/api/profiler/supplement_tracing.h @@ -18,8 +18,8 @@ limitations under the License. */ #include #include +#include "paddle/common/ddim.h" #include "paddle/phi/core/attribute.h" -#include "paddle/phi/core/ddim.h" namespace phi { diff --git a/paddle/phi/api/yaml/generator/tensor_operants_gen.py b/paddle/phi/api/yaml/generator/tensor_operants_gen.py index 444cfb1ddd1c81..845cf8afae2199 100644 --- a/paddle/phi/api/yaml/generator/tensor_operants_gen.py +++ b/paddle/phi/api/yaml/generator/tensor_operants_gen.py @@ -443,8 +443,8 @@ class TEST_API OperantsManager { #include "paddle/phi/api/include/operants_manager.h" #include "glog/logging.h" -#include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" +#include "paddle/common/enforce.h" +#include "paddle/common/errors.h" #include "paddle/utils/flags.h" """ diff --git a/paddle/phi/backends/c_comm_lib.h b/paddle/phi/backends/c_comm_lib.h index cc86d0cae00915..0b6cee136d2a58 100644 --- a/paddle/phi/backends/c_comm_lib.h +++ b/paddle/phi/backends/c_comm_lib.h @@ -15,11 +15,11 @@ #pragma once #include +#include "paddle/common/enforce.h" +#include "paddle/common/errors.h" #include "paddle/common/macros.h" #include "paddle/phi/common/data_type.h" #include "paddle/phi/common/place.h" -#include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/common/reduce_type.h" diff --git a/paddle/phi/backends/callback_manager.cc b/paddle/phi/backends/callback_manager.cc index 0d658258fa4c05..4b9ccd7c798e23 100644 --- a/paddle/phi/backends/callback_manager.cc +++ b/paddle/phi/backends/callback_manager.cc @@ -16,8 +16,8 @@ #include +#include "paddle/common/enforce.h" #include "paddle/phi/backends/device_guard.h" -#include "paddle/phi/core/enforce.h" namespace phi { diff --git a/paddle/phi/backends/context_pool.cc b/paddle/phi/backends/context_pool.cc index 7824fc3b160b10..4f7fefa3dfa027 100644 --- a/paddle/phi/backends/context_pool.cc +++ b/paddle/phi/backends/context_pool.cc @@ -16,8 +16,8 @@ limitations under the License. */ #include "glog/logging.h" +#include "paddle/common/enforce.h" #include "paddle/phi/common/memory_utils.h" -#include "paddle/phi/core/enforce.h" namespace phi { diff --git a/paddle/phi/backends/cpu/cpu_context.cc b/paddle/phi/backends/cpu/cpu_context.cc index 1a3ae7ae351d6b..ca7f93f3aea852 100644 --- a/paddle/phi/backends/cpu/cpu_context.cc +++ b/paddle/phi/backends/cpu/cpu_context.cc @@ -14,8 +14,8 @@ #include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/common/enforce.h" #include "paddle/phi/common/place.h" -#include "paddle/phi/core/enforce.h" // NOTE: The paddle framework should add WITH_EIGEN option to support compile // without eigen. diff --git a/paddle/phi/backends/custom/enforce_custom.h b/paddle/phi/backends/custom/enforce_custom.h index c98d4580d3cdb8..96a653e266e4fc 100644 --- a/paddle/phi/backends/custom/enforce_custom.h +++ b/paddle/phi/backends/custom/enforce_custom.h @@ -16,8 +16,8 @@ limitations under the License. */ #ifdef PADDLE_WITH_CUSTOM_DEVICE #include +#include "paddle/common/enforce.h" #include "paddle/phi/backends/device_ext.h" -#include "paddle/phi/core/enforce.h" namespace phi { template diff --git a/paddle/phi/backends/device_base.cc b/paddle/phi/backends/device_base.cc index 5b6b8fcfc2fe9a..d014cf59695328 100644 --- a/paddle/phi/backends/device_base.cc +++ b/paddle/phi/backends/device_base.cc @@ -15,7 +15,7 @@ #include "paddle/phi/backends/device_base.h" #include "glog/logging.h" -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" #include "paddle/utils/flags.h" PD_DECLARE_double(fraction_of_gpu_memory_to_use); diff --git a/paddle/phi/backends/device_code.h b/paddle/phi/backends/device_code.h index 8debb4dc9c45ee..1ecdf5cb3bac41 100644 --- a/paddle/phi/backends/device_code.h +++ b/paddle/phi/backends/device_code.h @@ -20,8 +20,8 @@ limitations under the License. */ #include #include +#include "paddle/common/enforce.h" #include "paddle/phi/common/place.h" -#include "paddle/phi/core/enforce.h" #ifdef PADDLE_WITH_CUDA #include "paddle/phi/backends/dynload/cuda_driver.h" #include "paddle/phi/backends/dynload/nvrtc.h" diff --git a/paddle/phi/backends/device_memory_aligment.h b/paddle/phi/backends/device_memory_aligment.h index 8508d5206558d2..2276f76dc8e4d0 100644 --- a/paddle/phi/backends/device_memory_aligment.h +++ b/paddle/phi/backends/device_memory_aligment.h @@ -15,10 +15,10 @@ limitations under the License. */ #pragma once #include +#include "paddle/common/enforce.h" +#include "paddle/common/errors.h" #include "paddle/phi/backends/cpu/cpu_info.h" #include "paddle/phi/common/place.h" -#include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/backends/gpu/gpu_info.h" #include "paddle/phi/backends/xpu/xpu_info.h" diff --git a/paddle/phi/backends/dynload/cublasLt.h b/paddle/phi/backends/dynload/cublasLt.h index 95800e1f64aacd..6604374a22db7a 100644 --- a/paddle/phi/backends/dynload/cublasLt.h +++ b/paddle/phi/backends/dynload/cublasLt.h @@ -44,7 +44,7 @@ extern void *cublasLt_dso_handle; using cublasLt_func = \ decltype(::__name(std::declval()...)) (*)(Args...); \ std::call_once(cublasLt_dso_flag, []() { \ - cublasLt_dso_handle = phi::dynload::GetCublasLtDsoHandle(); \ + cublasLt_dso_handle = common::dynload::GetCublasLtDsoHandle(); \ }); \ static void *p_##__name = dlsym(cublasLt_dso_handle, #__name); \ return reinterpret_cast(p_##__name)(args...); \ diff --git a/paddle/phi/backends/dynload/cuda_driver.cc b/paddle/phi/backends/dynload/cuda_driver.cc index d9fd89a0c65a6f..27000ccfe332e9 100644 --- a/paddle/phi/backends/dynload/cuda_driver.cc +++ b/paddle/phi/backends/dynload/cuda_driver.cc @@ -14,7 +14,7 @@ limitations under the License. */ #include "paddle/phi/backends/dynload/cuda_driver.h" -namespace phi { +namespace common { namespace dynload { std::once_flag cuda_dso_flag; @@ -34,4 +34,4 @@ bool HasCUDADriver() { } } // namespace dynload -} // namespace phi +} // namespace common diff --git a/paddle/phi/backends/dynload/cuda_driver.h b/paddle/phi/backends/dynload/cuda_driver.h index 1a5f243c31257a..a949bcdb2b0f40 100644 --- a/paddle/phi/backends/dynload/cuda_driver.h +++ b/paddle/phi/backends/dynload/cuda_driver.h @@ -34,7 +34,7 @@ extern bool HasCUDADriver(); auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \ using cuda_func = decltype(&::__name); \ std::call_once(cuda_dso_flag, []() { \ - cuda_dso_handle = phi::dynload::GetCUDADsoHandle(); \ + cuda_dso_handle = common::dynload::GetCUDADsoHandle(); \ }); \ static void* p_##__name = dlsym(cuda_dso_handle, #__name); \ return reinterpret_cast(p_##__name)(args...); \ diff --git a/paddle/phi/backends/dynload/cudnn.cc b/paddle/phi/backends/dynload/cudnn.cc index dbef1c002e8d93..9dcd8c69499703 100644 --- a/paddle/phi/backends/dynload/cudnn.cc +++ b/paddle/phi/backends/dynload/cudnn.cc @@ -14,7 +14,7 @@ limitations under the License. */ #include "paddle/common/backends/dynload/cudnn.h" -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" namespace phi { namespace dynload { diff --git a/paddle/phi/backends/dynload/cudnn_frontend.h b/paddle/phi/backends/dynload/cudnn_frontend.h index 0ea9d7cf7adf9f..7bfb51c73a5a95 100644 --- a/paddle/phi/backends/dynload/cudnn_frontend.h +++ b/paddle/phi/backends/dynload/cudnn_frontend.h @@ -23,8 +23,8 @@ limitations under the License. */ PD_DECLARE_bool(enable_cudnn_frontend); // Redirect the CUDNN APIs in the cudnn_frontend namespace to -// the functions in phi::dynload -#define CUDNN_FRONTEND_OVERRIDE_SYMBOL(__name) using phi::dynload::__name +// the functions in common::dynload +#define CUDNN_FRONTEND_OVERRIDE_SYMBOL(__name) using common::dynload::__name #define CUDNN_FRONTEND_APPLY_EACH(__macro) \ __macro(cudnnBackendCreateDescriptor); \ diff --git a/paddle/phi/backends/dynload/cufft.cc b/paddle/phi/backends/dynload/cufft.cc index a15969ecc3f87c..d9ac967208ba40 100644 --- a/paddle/phi/backends/dynload/cufft.cc +++ b/paddle/phi/backends/dynload/cufft.cc @@ -14,7 +14,7 @@ limitations under the License. */ #include "paddle/phi/backends/dynload/cufft.h" -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" namespace phi { namespace dynload { diff --git a/paddle/phi/backends/dynload/cufft.h b/paddle/phi/backends/dynload/cufft.h index 30c9ec6e8f7dab..84cce45235c985 100644 --- a/paddle/phi/backends/dynload/cufft.h +++ b/paddle/phi/backends/dynload/cufft.h @@ -37,7 +37,7 @@ extern void EnforceCUFFTLoaded(const char* fn_name); auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \ using cufft_func = decltype(&::__name); \ std::call_once(cufft_dso_flag, []() { \ - cufft_dso_handle = phi::dynload::GetCUFFTDsoHandle(); \ + cufft_dso_handle = common::dynload::GetCUFFTDsoHandle(); \ }); \ EnforceCUFFTLoaded(#__name); \ static void* p_##__name = dlsym(cufft_dso_handle, #__name); \ diff --git a/paddle/phi/backends/dynload/cupti.h b/paddle/phi/backends/dynload/cupti.h index 8e02009b547039..6195255df9d741 100644 --- a/paddle/phi/backends/dynload/cupti.h +++ b/paddle/phi/backends/dynload/cupti.h @@ -43,7 +43,7 @@ extern void *cupti_dso_handle; inline CUptiResult CUPTIAPI operator()(Args... args) { \ using cuptiFunc = decltype(&::__name); \ std::call_once(cupti_dso_flag, []() { \ - cupti_dso_handle = phi::dynload::GetCUPTIDsoHandle(); \ + cupti_dso_handle = common::dynload::GetCUPTIDsoHandle(); \ }); \ static void *p_##__name = dlsym(cupti_dso_handle, #__name); \ return reinterpret_cast(p_##__name)(args...); \ diff --git a/paddle/phi/backends/dynload/curand.cc b/paddle/phi/backends/dynload/curand.cc index 6666b7f23962d8..36ca5b696b6c1e 100644 --- a/paddle/phi/backends/dynload/curand.cc +++ b/paddle/phi/backends/dynload/curand.cc @@ -14,7 +14,7 @@ limitations under the License. */ #include "paddle/common/backends/dynload/curand.h" -namespace phi { +namespace common { namespace dynload { std::once_flag curand_dso_flag; @@ -25,4 +25,4 @@ void *curand_dso_handle; CURAND_RAND_ROUTINE_EACH(DEFINE_WRAP); } // namespace dynload -} // namespace phi +} // namespace common diff --git a/paddle/phi/backends/dynload/cusparse.h b/paddle/phi/backends/dynload/cusparse.h index ec8c80d6749b4b..f5484ee43a1a8a 100644 --- a/paddle/phi/backends/dynload/cusparse.h +++ b/paddle/phi/backends/dynload/cusparse.h @@ -26,18 +26,18 @@ namespace dynload { extern std::once_flag cusparse_dso_flag; extern void *cusparse_dso_handle; -#define DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP(__name) \ - struct DynLoad__##__name { \ - template \ - cusparseStatus_t operator()(Args... args) { \ - using Func = decltype(&::__name); \ - std::call_once(cusparse_dso_flag, []() { \ - cusparse_dso_handle = phi::dynload::GetCusparseDsoHandle(); \ - }); \ - static void *p_##__name = dlsym(cusparse_dso_handle, #__name); \ - return reinterpret_cast(p_##__name)(args...); \ - } \ - }; \ +#define DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + cusparseStatus_t operator()(Args... args) { \ + using Func = decltype(&::__name); \ + std::call_once(cusparse_dso_flag, []() { \ + cusparse_dso_handle = common::dynload::GetCusparseDsoHandle(); \ + }); \ + static void *p_##__name = dlsym(cusparse_dso_handle, #__name); \ + return reinterpret_cast(p_##__name)(args...); \ + } \ + }; \ extern DynLoad__##__name __name #if defined(PADDLE_WITH_CUDA) diff --git a/paddle/phi/backends/dynload/cusparseLt.h b/paddle/phi/backends/dynload/cusparseLt.h index bdaae044ee1d96..f293e9e82be8b3 100644 --- a/paddle/phi/backends/dynload/cusparseLt.h +++ b/paddle/phi/backends/dynload/cusparseLt.h @@ -34,18 +34,18 @@ extern void *cusparselt_dso_handle; * * note: default dynamic linked libs */ -#define DECLARE_DYNAMIC_LOAD_CUSPARSELT_WRAP(__name) \ - struct DynLoad__##__name { \ - template \ - cusparseStatus_t operator()(Args... args) { \ - using cusparseltFunc = decltype(&::__name); \ - std::call_once(cusparselt_dso_flag, []() { \ - cusparselt_dso_handle = phi::dynload::GetCusparseLtDsoHandle(); \ - }); \ - static void *p_##__name = dlsym(cusparselt_dso_handle, #__name); \ - return reinterpret_cast(p_##__name)(args...); \ - } \ - }; \ +#define DECLARE_DYNAMIC_LOAD_CUSPARSELT_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + cusparseStatus_t operator()(Args... args) { \ + using cusparseltFunc = decltype(&::__name); \ + std::call_once(cusparselt_dso_flag, []() { \ + cusparselt_dso_handle = common::dynload::GetCusparseLtDsoHandle(); \ + }); \ + static void *p_##__name = dlsym(cusparselt_dso_handle, #__name); \ + return reinterpret_cast(p_##__name)(args...); \ + } \ + }; \ extern DynLoad__##__name __name #if defined(PADDLE_WITH_CUDA) #if CUDA_VERSION >= 11020 diff --git a/paddle/phi/backends/dynload/dynamic_loader.cc b/paddle/phi/backends/dynload/dynamic_loader.cc index 3e38732fb0c066..e0d7da9ee24cce 100644 --- a/paddle/phi/backends/dynload/dynamic_loader.cc +++ b/paddle/phi/backends/dynload/dynamic_loader.cc @@ -18,8 +18,8 @@ limitations under the License. */ #include #include "paddle/common/backends/dynload/port.h" +#include "paddle/common/enforce.h" #include "paddle/phi/backends/dynload/cupti_lib_path.h" -#include "paddle/phi/core/enforce.h" #if defined(_WIN32) #include diff --git a/paddle/phi/backends/dynload/flashattn.h b/paddle/phi/backends/dynload/flashattn.h index 799d31346e0606..e229f76f62843d 100644 --- a/paddle/phi/backends/dynload/flashattn.h +++ b/paddle/phi/backends/dynload/flashattn.h @@ -26,18 +26,18 @@ namespace dynload { extern std::once_flag flashattn_dso_flag; extern void* flashattn_dso_handle; -#define DYNAMIC_LOAD_FLASHATTN_WRAP(__name) \ - struct DynLoad__##__name { \ - template \ - auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \ - using flashattnFunc = decltype(&::__name); \ - std::call_once(flashattn_dso_flag, []() { \ - flashattn_dso_handle = phi::dynload::GetFlashAttnDsoHandle(); \ - }); \ - static void* p_##__name = dlsym(flashattn_dso_handle, #__name); \ - return reinterpret_cast(p_##__name)(args...); \ - } \ - }; \ +#define DYNAMIC_LOAD_FLASHATTN_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \ + using flashattnFunc = decltype(&::__name); \ + std::call_once(flashattn_dso_flag, []() { \ + flashattn_dso_handle = common::dynload::GetFlashAttnDsoHandle(); \ + }); \ + static void* p_##__name = dlsym(flashattn_dso_handle, #__name); \ + return reinterpret_cast(p_##__name)(args...); \ + } \ + }; \ extern DynLoad__##__name __name #define DECLARE_DYNAMIC_LOAD_FLASHATTN_WRAP(__name) \ diff --git a/paddle/phi/backends/dynload/hiprtc.h b/paddle/phi/backends/dynload/hiprtc.h index e4cf485dd22db1..66cb5d3ebec203 100644 --- a/paddle/phi/backends/dynload/hiprtc.h +++ b/paddle/phi/backends/dynload/hiprtc.h @@ -34,7 +34,7 @@ extern bool HasNVRTC(); auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \ using hiprtc_func = decltype(&::__name); \ std::call_once(hiprtc_dso_flag, []() { \ - hiprtc_dso_handle = phi::dynload::GetNVRTCDsoHandle(); \ + hiprtc_dso_handle = common::dynload::GetNVRTCDsoHandle(); \ }); \ static void* p_##__name = dlsym(hiprtc_dso_handle, #__name); \ return reinterpret_cast(p_##__name)(args...); \ diff --git a/paddle/phi/backends/dynload/lapack.h b/paddle/phi/backends/dynload/lapack.h index b7ff843a1e273e..f010aa01f2e328 100644 --- a/paddle/phi/backends/dynload/lapack.h +++ b/paddle/phi/backends/dynload/lapack.h @@ -325,7 +325,7 @@ extern void *lapack_dso_handle; auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \ using lapackFunc = decltype(&::__name); \ std::call_once(lapack_dso_flag, []() { \ - lapack_dso_handle = phi::dynload::GetLAPACKDsoHandle(); \ + lapack_dso_handle = common::dynload::GetLAPACKDsoHandle(); \ }); \ static void *p_##_name = dlsym(lapack_dso_handle, #__name); \ return reinterpret_cast(p_##_name)(args...); \ diff --git a/paddle/phi/backends/dynload/miopen.cc b/paddle/phi/backends/dynload/miopen.cc index b8f328b4aae34e..248d899e2477e3 100644 --- a/paddle/phi/backends/dynload/miopen.cc +++ b/paddle/phi/backends/dynload/miopen.cc @@ -14,7 +14,7 @@ limitations under the License. */ #include "paddle/phi/backends/dynload/miopen.h" -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" namespace phi { namespace dynload { diff --git a/paddle/phi/backends/dynload/mklrt.h b/paddle/phi/backends/dynload/mklrt.h index 53d704b2b5c3ba..564cd95450a663 100644 --- a/paddle/phi/backends/dynload/mklrt.h +++ b/paddle/phi/backends/dynload/mklrt.h @@ -38,7 +38,7 @@ extern void* mklrt_dso_handle; auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \ using mklrtFunc = decltype(&::__name); \ std::call_once(mklrt_dso_flag, []() { \ - mklrt_dso_handle = phi::dynload::GetMKLRTDsoHandle(); \ + mklrt_dso_handle = common::dynload::GetMKLRTDsoHandle(); \ }); \ static void* p_##__name = dlsym(mklrt_dso_handle, #__name); \ return reinterpret_cast(p_##__name)(args...); \ diff --git a/paddle/phi/backends/dynload/nvjpeg.h b/paddle/phi/backends/dynload/nvjpeg.h index b2257dd7e5d159..1018e04d5ca0d8 100644 --- a/paddle/phi/backends/dynload/nvjpeg.h +++ b/paddle/phi/backends/dynload/nvjpeg.h @@ -29,7 +29,7 @@ extern void *nvjpeg_dso_handle; nvjpegStatus_t operator()(Args... args) { \ using nvjpegFunc = decltype(&::__name); \ std::call_once(nvjpeg_dso_flag, []() { \ - nvjpeg_dso_handle = phi::dynload::GetNvjpegDsoHandle(); \ + nvjpeg_dso_handle = common::dynload::GetNvjpegDsoHandle(); \ }); \ static void *p_##__name = dlsym(nvjpeg_dso_handle, #__name); \ return reinterpret_cast(p_##__name)(args...); \ diff --git a/paddle/phi/backends/dynload/nvrtc.cc b/paddle/phi/backends/dynload/nvrtc.cc index 0ed370801c6acd..cb12021f6b81fb 100644 --- a/paddle/phi/backends/dynload/nvrtc.cc +++ b/paddle/phi/backends/dynload/nvrtc.cc @@ -14,7 +14,7 @@ limitations under the License. */ #include "paddle/phi/backends/dynload/nvrtc.h" -namespace phi { +namespace common { namespace dynload { std::once_flag nvrtc_dso_flag; @@ -31,4 +31,4 @@ bool HasNVRTC() { } } // namespace dynload -} // namespace phi +} // namespace common diff --git a/paddle/phi/backends/dynload/nvrtc.h b/paddle/phi/backends/dynload/nvrtc.h index ce5be605cdf50f..5275032464b774 100644 --- a/paddle/phi/backends/dynload/nvrtc.h +++ b/paddle/phi/backends/dynload/nvrtc.h @@ -21,7 +21,7 @@ limitations under the License. */ #include "paddle/common/backends/dynload/dynamic_loader.h" #include "paddle/common/backends/dynload/port.h" -namespace phi { +namespace common { namespace dynload { extern std::once_flag nvrtc_dso_flag; @@ -34,7 +34,7 @@ extern bool HasNVRTC(); auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \ using nvrtc_func = decltype(&::__name); \ std::call_once(nvrtc_dso_flag, []() { \ - nvrtc_dso_handle = phi::dynload::GetNVRTCDsoHandle(); \ + nvrtc_dso_handle = common::dynload::GetNVRTCDsoHandle(); \ }); \ static void* p_##__name = dlsym(nvrtc_dso_handle, #__name); \ return reinterpret_cast(p_##__name)(args...); \ @@ -61,4 +61,4 @@ NVRTC_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_NVRTC_WRAP); #undef DECLARE_DYNAMIC_LOAD_NVRTC_WRAP } // namespace dynload -} // namespace phi +} // namespace common diff --git a/paddle/phi/backends/dynload/nvtx.h b/paddle/phi/backends/dynload/nvtx.h index c2817764d036a2..ef7ee636d8b935 100644 --- a/paddle/phi/backends/dynload/nvtx.h +++ b/paddle/phi/backends/dynload/nvtx.h @@ -32,7 +32,7 @@ extern void *nvtx_dso_handle; int operator()(Args... args) { \ using nvtxFunc = decltype(&::__name); \ std::call_once(nvtx_dso_flag, []() { \ - nvtx_dso_handle = phi::dynload::GetNvtxDsoHandle(); \ + nvtx_dso_handle = common::dynload::GetNvtxDsoHandle(); \ }); \ static void *p_##__name = dlsym(nvtx_dso_handle, #__name); \ return reinterpret_cast(p_##__name)(args...); \ diff --git a/paddle/phi/backends/dynload/rocblas.h b/paddle/phi/backends/dynload/rocblas.h index da36aeeaf885e1..866a83d60486e3 100644 --- a/paddle/phi/backends/dynload/rocblas.h +++ b/paddle/phi/backends/dynload/rocblas.h @@ -42,7 +42,7 @@ extern void *rocblas_dso_handle; rocblas_status operator()(Args... args) { \ using rocblas_func = decltype(&::__name); \ std::call_once(rocblas_dso_flag, []() { \ - rocblas_dso_handle = phi::dynload::GetCublasDsoHandle(); \ + rocblas_dso_handle = common::dynload::GetCublasDsoHandle(); \ }); \ static void *p_##__name = dlsym(rocblas_dso_handle, #__name); \ return reinterpret_cast(p_##__name)(args...); \ diff --git a/paddle/phi/backends/dynload/rocm_driver.h b/paddle/phi/backends/dynload/rocm_driver.h index c1dd53caeea281..cff4023dfb3372 100644 --- a/paddle/phi/backends/dynload/rocm_driver.h +++ b/paddle/phi/backends/dynload/rocm_driver.h @@ -34,7 +34,7 @@ extern bool HasCUDADriver(); auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \ using rocm_func = decltype(&::__name); \ std::call_once(rocm_dso_flag, []() { \ - rocm_dso_handle = phi::dynload::GetCUDADsoHandle(); \ + rocm_dso_handle = common::dynload::GetCUDADsoHandle(); \ }); \ static void* p_##__name = dlsym(rocm_dso_handle, #__name); \ return reinterpret_cast(p_##__name)(args...); \ diff --git a/paddle/phi/backends/dynload/rocsparse.h b/paddle/phi/backends/dynload/rocsparse.h index 0c24e03bff0258..f24d3d79c5f4f2 100644 --- a/paddle/phi/backends/dynload/rocsparse.h +++ b/paddle/phi/backends/dynload/rocsparse.h @@ -35,18 +35,18 @@ extern void *rocsparse_dso_handle; * * note: default dynamic linked libs */ -#define DECLARE_DYNAMIC_LOAD_ROCSPARSE_WRAP(__name) \ - struct DynLoad__##__name { \ - template \ - rocsparse_status operator()(Args... args) { \ - using rocsparse_func = decltype(&::__name); \ - std::call_once(rocsparse_dso_flag, []() { \ - rocsparse_dso_handle = phi::dynload::GetCusparseDsoHandle(); \ - }); \ - static void *p_##__name = dlsym(rocsparse_dso_handle, #__name); \ - return reinterpret_cast(p_##__name)(args...); \ - } \ - }; \ +#define DECLARE_DYNAMIC_LOAD_ROCSPARSE_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + rocsparse_status operator()(Args... args) { \ + using rocsparse_func = decltype(&::__name); \ + std::call_once(rocsparse_dso_flag, []() { \ + rocsparse_dso_handle = common::dynload::GetCusparseDsoHandle(); \ + }); \ + static void *p_##__name = dlsym(rocsparse_dso_handle, #__name); \ + return reinterpret_cast(p_##__name)(args...); \ + } \ + }; \ extern DynLoad__##__name __name #if defined(PADDLE_WITH_HIP) diff --git a/paddle/phi/backends/dynload/tensorrt.h b/paddle/phi/backends/dynload/tensorrt.h index 7a74f93358b281..2001a427db7d4e 100644 --- a/paddle/phi/backends/dynload/tensorrt.h +++ b/paddle/phi/backends/dynload/tensorrt.h @@ -22,7 +22,7 @@ limitations under the License. */ #include // NOLINT #include "paddle/common/backends/dynload/dynamic_loader.h" -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" namespace phi { namespace dynload { @@ -41,7 +41,7 @@ extern void* tensorrt_plugin_dso_handle; template \ void* operator()(Args... args) { \ std::call_once(tensorrt_dso_flag, []() { \ - tensorrt_dso_handle = phi::dynload::GetTensorRtHandle(); \ + tensorrt_dso_handle = common::dynload::GetTensorRtHandle(); \ }); \ static void* p_##__name = dlsym(tensorrt_dso_handle, #__name); \ if (p_##__name == nullptr) { \ @@ -59,7 +59,7 @@ extern void* tensorrt_plugin_dso_handle; template \ auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \ std::call_once(tensorrt_dso_flag, []() { \ - tensorrt_dso_handle = phi::dynload::GetTensorRtHandle(); \ + tensorrt_dso_handle = common::dynload::GetTensorRtHandle(); \ }); \ static void* p_##__name = dlsym(tensorrt_dso_handle, #__name); \ PADDLE_ENFORCE_NOT_NULL( \ @@ -76,7 +76,8 @@ extern void* tensorrt_plugin_dso_handle; template \ auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \ std::call_once(tensorrt_plugin_dso_flag, []() { \ - tensorrt_plugin_dso_handle = phi::dynload::GetTensorRtPluginHandle(); \ + tensorrt_plugin_dso_handle = \ + common::dynload::GetTensorRtPluginHandle(); \ }); \ static void* p_##__name = dlsym(tensorrt_plugin_dso_handle, #__name); \ PADDLE_ENFORCE_NOT_NULL(p_##__name, \ diff --git a/paddle/phi/backends/dynload/warpctc.h b/paddle/phi/backends/dynload/warpctc.h index a767d785f79ee6..a91d760ca28bd7 100644 --- a/paddle/phi/backends/dynload/warpctc.h +++ b/paddle/phi/backends/dynload/warpctc.h @@ -37,7 +37,7 @@ extern void* warpctc_dso_handle; auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \ using warpctcFunc = decltype(&::__name); \ std::call_once(warpctc_dso_flag, []() { \ - warpctc_dso_handle = phi::dynload::GetWarpCTCDsoHandle(); \ + warpctc_dso_handle = common::dynload::GetWarpCTCDsoHandle(); \ }); \ static void* p_##__name = dlsym(warpctc_dso_handle, #__name); \ return reinterpret_cast(p_##__name)(args...); \ diff --git a/paddle/phi/backends/dynload/warprnnt.h b/paddle/phi/backends/dynload/warprnnt.h index 5c9315bf23b757..01c7cb5f835e0a 100644 --- a/paddle/phi/backends/dynload/warprnnt.h +++ b/paddle/phi/backends/dynload/warprnnt.h @@ -31,18 +31,18 @@ extern void* warprnnt_dso_handle; * (for each function) to dynamic load warprnnt routine * via operator overloading. */ -#define DYNAMIC_LOAD_WARPRNNT_WRAP(__name) \ - struct DynLoad__##__name { \ - template \ - auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \ - using warprnntFunc = decltype(&::__name); \ - std::call_once(warprnnt_dso_flag, []() { \ - warprnnt_dso_handle = phi::dynload::GetWarpRNNTDsoHandle(); \ - }); \ - static void* p_##__name = dlsym(warprnnt_dso_handle, #__name); \ - return reinterpret_cast(p_##__name)(args...); \ - } \ - }; \ +#define DYNAMIC_LOAD_WARPRNNT_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \ + using warprnntFunc = decltype(&::__name); \ + std::call_once(warprnnt_dso_flag, []() { \ + warprnnt_dso_handle = common::dynload::GetWarpRNNTDsoHandle(); \ + }); \ + static void* p_##__name = dlsym(warprnnt_dso_handle, #__name); \ + return reinterpret_cast(p_##__name)(args...); \ + } \ + }; \ extern DynLoad__##__name __name #define DECLARE_DYNAMIC_LOAD_WARPRNNT_WRAP(__name) \ diff --git a/paddle/phi/backends/dynload/xpti.h b/paddle/phi/backends/dynload/xpti.h index 9c65d606ba5bd0..d84b20640ad958 100644 --- a/paddle/phi/backends/dynload/xpti.h +++ b/paddle/phi/backends/dynload/xpti.h @@ -34,7 +34,7 @@ extern void *xpti_dso_handle; XPTIResult operator()(Args... args) { \ using xptiFunc = decltype(&::__name); \ std::call_once(xpti_dso_flag, []() { \ - xpti_dso_handle = phi::dynload::GetXPTIDsoHandle(); \ + xpti_dso_handle = common::dynload::GetXPTIDsoHandle(); \ }); \ static void *p_##__name = dlsym(xpti_dso_handle, #__name); \ return reinterpret_cast(p_##__name)(args...); \ diff --git a/paddle/phi/backends/gpu/cuda/cuda_device_function.h b/paddle/phi/backends/gpu/cuda/cuda_device_function.h index 3c2b347776edbe..409e96006624da 100644 --- a/paddle/phi/backends/gpu/cuda/cuda_device_function.h +++ b/paddle/phi/backends/gpu/cuda/cuda_device_function.h @@ -18,8 +18,8 @@ limitations under the License. */ #define PADDLE_CUDA_FP16 #include "paddle/common/bfloat16.h" #include "paddle/common/complex.h" +#include "paddle/common/enforce.h" #include "paddle/common/float16.h" -#include "paddle/phi/core/enforce.h" namespace phi { namespace backends { diff --git a/paddle/phi/backends/gpu/cuda/cuda_graph.h b/paddle/phi/backends/gpu/cuda/cuda_graph.h index cbf66a945a6ec3..38f9718494c712 100644 --- a/paddle/phi/backends/gpu/cuda/cuda_graph.h +++ b/paddle/phi/backends/gpu/cuda/cuda_graph.h @@ -26,14 +26,14 @@ #include "glog/logging.h" +#include "paddle/common/enforce.h" +#include "paddle/common/errors.h" #include "paddle/common/macros.h" #include "paddle/phi/backends/context_pool.h" #include "paddle/phi/backends/device_code.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/common/memory_utils.h" #include "paddle/phi/common/place.h" -#include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #include "paddle/utils/optional.h" #if CUDA_VERSION < 11000 diff --git a/paddle/phi/backends/gpu/cuda/cuda_helper.h b/paddle/phi/backends/gpu/cuda/cuda_helper.h index 61ed6fe65c0e70..b380e86ba62372 100644 --- a/paddle/phi/backends/gpu/cuda/cuda_helper.h +++ b/paddle/phi/backends/gpu/cuda/cuda_helper.h @@ -18,9 +18,9 @@ #include // NOLINT #include "paddle/common/bfloat16.h" +#include "paddle/common/enforce.h" #include "paddle/common/float16.h" #include "paddle/phi/common/data_type.h" -#include "paddle/phi/core/enforce.h" namespace phi { namespace backends { diff --git a/paddle/phi/backends/gpu/cuda/cuda_info.cc b/paddle/phi/backends/gpu/cuda/cuda_info.cc index 0af1beb782fcf0..14df0645a6d5cd 100644 --- a/paddle/phi/backends/gpu/cuda/cuda_info.cc +++ b/paddle/phi/backends/gpu/cuda/cuda_info.cc @@ -16,7 +16,7 @@ #include "glog/logging.h" -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" static std::once_flag g_device_props_size_init_flag; static std::vector> g_device_props_init_flags; diff --git a/paddle/phi/backends/gpu/cuda/cudnn_desc.h b/paddle/phi/backends/gpu/cuda/cudnn_desc.h index d4fb6930bcc550..fdbc28ffc23d8b 100644 --- a/paddle/phi/backends/gpu/cuda/cudnn_desc.h +++ b/paddle/phi/backends/gpu/cuda/cudnn_desc.h @@ -23,8 +23,8 @@ #include #include +#include "paddle/common/data_type.h" #include "paddle/phi/backends/gpu/cuda/cudnn_helper.h" -#include "paddle/phi/core/utils/data_type.h" namespace phi { namespace backends { @@ -87,7 +87,7 @@ class ActivationDescriptor { void operator()(T* t) { if (t != nullptr) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnDestroyActivationDescriptor(t)); + common::dynload::cudnnDestroyActivationDescriptor(t)); t = nullptr; } } @@ -95,12 +95,12 @@ class ActivationDescriptor { ActivationDescriptor() { T* raw_ptr; PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnCreateActivationDescriptor(&raw_ptr)); + common::dynload::cudnnCreateActivationDescriptor(&raw_ptr)); desc_.reset(raw_ptr); } template void set(cudnnActivationMode_t mode, const T& coef) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetActivationDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnSetActivationDescriptor( desc_.get(), mode, CUDNN_NOT_PROPAGATE_NAN, static_cast(coef))); } @@ -118,7 +118,7 @@ class TensorDescriptor { void operator()(T* t) { if (t != nullptr) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnDestroyTensorDescriptor(t)); + common::dynload::cudnnDestroyTensorDescriptor(t)); t = nullptr; } } @@ -126,7 +126,7 @@ class TensorDescriptor { TensorDescriptor() { T* raw_ptr; PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnCreateTensorDescriptor(&raw_ptr)); + common::dynload::cudnnCreateTensorDescriptor(&raw_ptr)); desc_.reset(raw_ptr); } T* desc() { return desc_.get(); } @@ -142,7 +142,7 @@ class TensorDescriptor { if (groups > 1) { dims_with_group[1] = dims_with_group[1] / groups; } - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetTensorNdDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnSetTensorNdDescriptor( desc_.get(), ToCudnnDataType(tensor.dtype()), dims_with_group.size(), @@ -160,11 +160,11 @@ class TensorDescriptor { transformed_dims = dims; } PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnSetTensorNdDescriptorEx(desc_.get(), - format, - dtype, - transformed_dims.size(), - transformed_dims.data())); + common::dynload::cudnnSetTensorNdDescriptorEx(desc_.get(), + format, + dtype, + transformed_dims.size(), + transformed_dims.data())); } void set(const phi::DenseTensor& tensor, const cudnnTensorFormat_t format) { @@ -184,7 +184,7 @@ class FilterDescriptor { void operator()(T* t) { if (t != nullptr) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnDestroyFilterDescriptor(t)); + common::dynload::cudnnDestroyFilterDescriptor(t)); t = nullptr; } } @@ -192,7 +192,7 @@ class FilterDescriptor { FilterDescriptor() { T* raw_ptr; PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnCreateFilterDescriptor(&raw_ptr)); + common::dynload::cudnnCreateFilterDescriptor(&raw_ptr)); desc_.reset(raw_ptr); } T* desc() { return desc_.get(); } @@ -212,11 +212,11 @@ class FilterDescriptor { transformed_dims[1] = transformed_dims[1] / groups; } PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnSetFilterNdDescriptor(desc_.get(), - dtype, - format, - transformed_dims.size(), - transformed_dims.data())); + common::dynload::cudnnSetFilterNdDescriptor(desc_.get(), + dtype, + format, + transformed_dims.size(), + transformed_dims.data())); } void set(const phi::DenseTensor& tensor, @@ -238,7 +238,7 @@ class ConvolutionDescriptor { void operator()(T* t) { if (t != nullptr) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnDestroyConvolutionDescriptor(t)); + common::dynload::cudnnDestroyConvolutionDescriptor(t)); t = nullptr; } } @@ -246,7 +246,7 @@ class ConvolutionDescriptor { ConvolutionDescriptor() { T* raw_ptr; PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnCreateConvolutionDescriptor(&raw_ptr)); + common::dynload::cudnnCreateConvolutionDescriptor(&raw_ptr)); desc_.reset(raw_ptr); } T* desc() { return desc_.get(); } @@ -262,32 +262,32 @@ class ConvolutionDescriptor { cudnnDataType_t compute_type = (dtype == CUDNN_DATA_DOUBLE) ? CUDNN_DATA_DOUBLE : CUDNN_DATA_FLOAT; T* desc = desc_.get(); - PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnSetConvolutionNdDescriptor(desc, - pads.size(), - pads.data(), - strides.data(), - dilations.data(), - CUDNN_CROSS_CORRELATION, - compute_type)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnSetConvolutionNdDescriptor( + desc, + pads.size(), + pads.data(), + strides.data(), + dilations.data(), + CUDNN_CROSS_CORRELATION, + compute_type)); #if CUDNN_VERSION_MIN(7, 0, 1) PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnSetConvolutionGroupCount(desc, groups)); + common::dynload::cudnnSetConvolutionGroupCount(desc, groups)); #if CUDA_VERSION >= 9000 && CUDNN_VERSION_MIN(7, 0, 1) PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnSetConvolutionMathType(desc, CUDNN_DEFAULT_MATH)); + common::dynload::cudnnSetConvolutionMathType(desc, CUDNN_DEFAULT_MATH)); if (dtype == CUDNN_DATA_HALF) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetConvolutionMathType( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnSetConvolutionMathType( desc, CUDNN_TENSOR_OP_MATH)); #if CUDA_VERSION >= 11000 #if CUDNN_VERSION_MIN(8, 1, 0) } else if (dtype == CUDNN_DATA_BFLOAT16) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetConvolutionMathType( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnSetConvolutionMathType( desc, CUDNN_TENSOR_OP_MATH)); #endif // CUDNN_VERSION_MIN(8,1,0) } else if (dtype == CUDNN_DATA_FLOAT && !allow_tf32) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnSetConvolutionMathType(desc, CUDNN_FMA_MATH)); + common::dynload::cudnnSetConvolutionMathType(desc, CUDNN_FMA_MATH)); #endif // CUDA_VERSION >= 11000 } #endif diff --git a/paddle/phi/backends/gpu/cuda/cudnn_helper.h b/paddle/phi/backends/gpu/cuda/cudnn_helper.h index 5acd3f32075ca8..f527211f83c420 100644 --- a/paddle/phi/backends/gpu/cuda/cudnn_helper.h +++ b/paddle/phi/backends/gpu/cuda/cudnn_helper.h @@ -19,12 +19,12 @@ limitations under the License. */ #include "paddle/common/backends/dynload/cudnn.h" #include "paddle/common/bfloat16.h" +#include "paddle/common/enforce.h" +#include "paddle/common/errors.h" #include "paddle/common/float16.h" #include "paddle/common/macros.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #include "paddle/utils/flags.h" PD_DECLARE_bool(cudnn_deterministic); @@ -195,11 +195,11 @@ class ScopedTensorDescriptor { public: ScopedTensorDescriptor() { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnCreateTensorDescriptor(&desc_)); + common::dynload::cudnnCreateTensorDescriptor(&desc_)); } ~ScopedTensorDescriptor() PADDLE_MAY_THROW { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnDestroyTensorDescriptor(desc_)); + common::dynload::cudnnDestroyTensorDescriptor(desc_)); } inline cudnnTensorDescriptor_t descriptor(const cudnnTensorFormat_t format, @@ -222,26 +222,27 @@ class ScopedTensorDescriptor { if (dims.size() == 4) { if (format == CUDNN_TENSOR_NCHW) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnSetTensorNdDescriptor(desc_, - type, - dims_with_group.size(), - dims_with_group.data(), - strides.data())); + common::dynload::cudnnSetTensorNdDescriptor(desc_, + type, + dims_with_group.size(), + dims_with_group.data(), + strides.data())); } else { // CUDNN_TENSOR_NHWC - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetTensor4dDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnSetTensor4dDescriptor( desc_, format, type, dims[0], dims[3], dims[1], dims[2])); } } else if (dims.size() == 5) { if (format == CUDNN_TENSOR_NCHW) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnSetTensorNdDescriptor(desc_, - type, - dims_with_group.size(), - dims_with_group.data(), - strides.data())); + common::dynload::cudnnSetTensorNdDescriptor(desc_, + type, + dims_with_group.size(), + dims_with_group.data(), + strides.data())); } else { // CUDNN_TENSOR_NHWC - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetTensorNdDescriptorEx( - desc_, format, type, dims.size(), dims.data())); + PADDLE_ENFORCE_GPU_SUCCESS( + common::dynload::cudnnSetTensorNdDescriptorEx( + desc_, format, type, dims.size(), dims.data())); } } return desc_; @@ -258,7 +259,7 @@ class ScopedTensorDescriptor { inline cudnnTensorDescriptor_t descriptor(const cudnnDataType_t cudnn_type, const std::vector& dim, const std::vector& stride) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetTensorNdDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnSetTensorNdDescriptor( desc_, cudnn_type, dim.size(), dim.data(), stride.data())); return desc_; } @@ -281,12 +282,12 @@ class ScopedRNNTensorDescriptor { public: ScopedRNNTensorDescriptor() { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnCreateRNNDataDescriptor(&desc_)); + common::dynload::cudnnCreateRNNDataDescriptor(&desc_)); } ~ScopedRNNTensorDescriptor() PADDLE_MAY_THROW { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnDestroyRNNDataDescriptor(desc_)); + common::dynload::cudnnDestroyRNNDataDescriptor(desc_)); } inline cudnnRNNDataDescriptor_t descriptor( @@ -305,7 +306,7 @@ class ScopedRNNTensorDescriptor { layout = CUDNN_RNN_DATA_LAYOUT_BATCH_MAJOR_UNPACKED; } - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetRNNDataDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnSetRNNDataDescriptor( desc_, cudnn_type, layout, @@ -345,11 +346,11 @@ class ScopedDropoutDescriptor { public: ScopedDropoutDescriptor() { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnCreateDropoutDescriptor(&desc_)); + common::dynload::cudnnCreateDropoutDescriptor(&desc_)); } ~ScopedDropoutDescriptor() PADDLE_MAY_THROW { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnDestroyDropoutDescriptor(desc_)); + common::dynload::cudnnDestroyDropoutDescriptor(desc_)); } inline cudnnDropoutDescriptor_t descriptor(const cudnnHandle_t& handle, @@ -361,22 +362,22 @@ class ScopedDropoutDescriptor { size_t state_size) { if (dropout_state_ == nullptr) { // for no dropout or test PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnSetDropoutDescriptor(desc_, - handle, - 0 /* dropout */, - nullptr, - 0 /* state_size */, - 0 /* seed */)); + common::dynload::cudnnSetDropoutDescriptor(desc_, + handle, + 0 /* dropout */, + nullptr, + 0 /* state_size */, + 0 /* seed */)); return desc_; } auto* dropout_state_data = dropout_state_->data(); if (!initialized) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetDropoutDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnSetDropoutDescriptor( desc_, handle, dropout_prob_, dropout_state_data, state_size, seed)); } else { auto dropout_state_dims = phi::vectorize(dropout_state_->dims()); state_size = dropout_state_dims[0]; - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnRestoreDropoutDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnRestoreDropoutDescriptor( desc_, handle, dropout_prob_, dropout_state_data, state_size, 0)); } return desc_; @@ -391,10 +392,12 @@ class ScopedDropoutDescriptor { class ScopedRNNDescriptor { public: ScopedRNNDescriptor() { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnCreateRNNDescriptor(&desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + common::dynload::cudnnCreateRNNDescriptor(&desc_)); } ~ScopedRNNDescriptor() PADDLE_MAY_THROW { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnDestroyRNNDescriptor(desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + common::dynload::cudnnDestroyRNNDescriptor(desc_)); } inline cudnnRNNDescriptor_t desc() { return desc_; } @@ -408,11 +411,11 @@ class ScopedFilterDescriptor { public: ScopedFilterDescriptor() { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnCreateFilterDescriptor(&desc_)); + common::dynload::cudnnCreateFilterDescriptor(&desc_)); } ~ScopedFilterDescriptor() PADDLE_MAY_THROW { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnDestroyFilterDescriptor(desc_)); + common::dynload::cudnnDestroyFilterDescriptor(desc_)); } inline cudnnFilterDescriptor_t descriptor(const cudnnTensorFormat_t format, @@ -429,11 +432,11 @@ class ScopedFilterDescriptor { // NOTE: input filter(C) of the filter is already asserted to be C/groups. } PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnSetFilterNdDescriptor(desc_, - type, - format, - kernel_with_group.size(), - kernel_with_group.data())); + common::dynload::cudnnSetFilterNdDescriptor(desc_, + type, + format, + kernel_with_group.size(), + kernel_with_group.data())); return desc_; } @@ -456,11 +459,11 @@ class ScopedConvolutionDescriptor { public: ScopedConvolutionDescriptor() { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnCreateConvolutionDescriptor(&desc_)); + common::dynload::cudnnCreateConvolutionDescriptor(&desc_)); } ~ScopedConvolutionDescriptor() PADDLE_MAY_THROW { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnDestroyConvolutionDescriptor(desc_)); + common::dynload::cudnnDestroyConvolutionDescriptor(desc_)); } inline cudnnConvolutionDescriptor_t descriptor( @@ -486,14 +489,14 @@ class ScopedConvolutionDescriptor { cudnnDataType_t compute_type = (type == CUDNN_DATA_DOUBLE) ? CUDNN_DATA_DOUBLE : CUDNN_DATA_FLOAT; - PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnSetConvolutionNdDescriptor(desc_, - pads.size(), - pads.data(), - strides.data(), - dilations.data(), - CUDNN_CROSS_CORRELATION, - compute_type)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnSetConvolutionNdDescriptor( + desc_, + pads.size(), + pads.data(), + strides.data(), + dilations.data(), + CUDNN_CROSS_CORRELATION, + compute_type)); return desc_; } @@ -514,11 +517,11 @@ class ScopedPoolingDescriptor { public: ScopedPoolingDescriptor() { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnCreatePoolingDescriptor(&desc_)); + common::dynload::cudnnCreatePoolingDescriptor(&desc_)); } ~ScopedPoolingDescriptor() PADDLE_MAY_THROW { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnDestroyPoolingDescriptor(desc_)); + common::dynload::cudnnDestroyPoolingDescriptor(desc_)); } inline cudnnPoolingDescriptor_t descriptor(const PoolingMode& mode, @@ -540,7 +543,7 @@ class ScopedPoolingDescriptor { "received size of kernel is %d, size of strides is %d.", kernel.size(), strides.size())); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetPoolingNdDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnSetPoolingNdDescriptor( desc_, (GetPoolingMode(mode)), CUDNN_PROPAGATE_NAN, // Always propagate nans. @@ -560,18 +563,18 @@ class ScopedSpatialTransformerDescriptor { public: ScopedSpatialTransformerDescriptor() { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnCreateSpatialTransformerDescriptor(&desc_)); + common::dynload::cudnnCreateSpatialTransformerDescriptor(&desc_)); } ~ScopedSpatialTransformerDescriptor() PADDLE_MAY_THROW { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnDestroySpatialTransformerDescriptor(desc_)); + common::dynload::cudnnDestroySpatialTransformerDescriptor(desc_)); } template inline cudnnSpatialTransformerDescriptor_t descriptor(const int nbDims, const int dimA[]) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnSetSpatialTransformerNdDescriptor( + common::dynload::cudnnSetSpatialTransformerNdDescriptor( desc_, CUDNN_SAMPLER_BILINEAR, CudnnDataType::type, @@ -589,11 +592,11 @@ class ScopedActivationDescriptor { public: ScopedActivationDescriptor() { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnCreateActivationDescriptor(&desc_)); + common::dynload::cudnnCreateActivationDescriptor(&desc_)); } ~ScopedActivationDescriptor() PADDLE_MAY_THROW { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnDestroyActivationDescriptor(desc_)); + common::dynload::cudnnDestroyActivationDescriptor(desc_)); } template @@ -630,7 +633,7 @@ class ScopedActivationDescriptor { "Unrecognized CUDNN activation mode: %d.", static_cast(activation_mode))); } - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetActivationDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnSetActivationDescriptor( desc_, mode, CUDNN_NOT_PROPAGATE_NAN, relu_ceiling)); return desc_; } @@ -645,17 +648,17 @@ class ScopedCTCLossDescriptor { public: ScopedCTCLossDescriptor() { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnCreateCTCLossDescriptor(&desc_)); + common::dynload::cudnnCreateCTCLossDescriptor(&desc_)); } ~ScopedCTCLossDescriptor() PADDLE_MAY_THROW { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnDestroyCTCLossDescriptor(desc_)); + common::dynload::cudnnDestroyCTCLossDescriptor(desc_)); } template inline cudnnCTCLossDescriptor_t descriptor() { - PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnSetCTCLossDescriptor(desc_, CudnnDataType::type)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnSetCTCLossDescriptor( + desc_, CudnnDataType::type)); return desc_; } diff --git a/paddle/phi/backends/gpu/gpu_context.cc b/paddle/phi/backends/gpu/gpu_context.cc index f4598583466851..ccb4dc5126f3e3 100644 --- a/paddle/phi/backends/gpu/gpu_context.cc +++ b/paddle/phi/backends/gpu/gpu_context.cc @@ -55,7 +55,7 @@ limitations under the License. */ // without eigen. #include "unsupported/Eigen/CXX11/Tensor" -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" namespace phi { @@ -381,7 +381,7 @@ struct GPUContext::Impl { } else { blas_tensor_core_handle_ = blas_tensor_core_handle_creator_(); } - PADDLE_RETRY_CUDA_SUCCESS(phi::dynload::cublasSetMathMode( + PADDLE_RETRY_CUDA_SUCCESS(common::dynload::cublasSetMathMode( blas_tensor_core_handle_, CUBLAS_TENSOR_OP_MATH)); } #endif @@ -393,7 +393,7 @@ struct GPUContext::Impl { blas_tf32_tensor_core_handle_ = blas_tf32_tensor_core_handle_creator_(); } - PADDLE_RETRY_CUDA_SUCCESS(phi::dynload::cublasSetMathMode( + PADDLE_RETRY_CUDA_SUCCESS(common::dynload::cublasSetMathMode( blas_tf32_tensor_core_handle_, CUBLAS_TF32_TENSOR_OP_MATH)); } #endif @@ -461,12 +461,12 @@ struct GPUContext::Impl { void DestroyInternalDnnHandle() { #ifdef PADDLE_WITH_HIP if (owned_ && dnn_handle_ != nullptr) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenDestroy(dnn_handle_)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::miopenDestroy(dnn_handle_)); dnn_handle_ = nullptr; } #else if (owned_ && dnn_handle_ != nullptr) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnDestroy(dnn_handle_)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnDestroy(dnn_handle_)); dnn_handle_ = nullptr; } #endif // PADDLE_WITH_HIP @@ -583,7 +583,7 @@ struct GPUContext::Impl { } else { blas_tensor_core_handle_ = blas_tensor_core_handle_creator_(); } - PADDLE_RETRY_CUDA_SUCCESS(phi::dynload::cublasSetMathMode( + PADDLE_RETRY_CUDA_SUCCESS(common::dynload::cublasSetMathMode( blas_tensor_core_handle_, CUBLAS_TENSOR_OP_MATH)); } #endif @@ -595,7 +595,7 @@ struct GPUContext::Impl { blas_tf32_tensor_core_handle_ = blas_tf32_tensor_core_handle_creator_(); } - PADDLE_RETRY_CUDA_SUCCESS(phi::dynload::cublasSetMathMode( + PADDLE_RETRY_CUDA_SUCCESS(common::dynload::cublasSetMathMode( blas_tf32_tensor_core_handle_, CUBLAS_TF32_TENSOR_OP_MATH)); } #endif @@ -628,7 +628,7 @@ struct GPUContext::Impl { } else { blas_tensor_core_handle_ = blas_tensor_core_handle_creator_(); } - PADDLE_RETRY_CUDA_SUCCESS(phi::dynload::cublasSetMathMode( + PADDLE_RETRY_CUDA_SUCCESS(common::dynload::cublasSetMathMode( blas_tensor_core_handle_, CUBLAS_TENSOR_OP_MATH)); } #endif @@ -640,7 +640,7 @@ struct GPUContext::Impl { blas_tf32_tensor_core_handle_ = blas_tf32_tensor_core_handle_creator_(); } - PADDLE_RETRY_CUDA_SUCCESS(phi::dynload::cublasSetMathMode( + PADDLE_RETRY_CUDA_SUCCESS(common::dynload::cublasSetMathMode( blas_tf32_tensor_core_handle_, CUBLAS_TF32_TENSOR_OP_MATH)); } #endif diff --git a/paddle/phi/backends/gpu/gpu_info.h b/paddle/phi/backends/gpu/gpu_info.h index 132493f2c62cd2..f2348082d07dda 100644 --- a/paddle/phi/backends/gpu/gpu_info.h +++ b/paddle/phi/backends/gpu/gpu_info.h @@ -22,7 +22,7 @@ limitations under the License. */ #include "paddle/common/backends/gpu/gpu_types.h" -namespace phi { +namespace common { namespace backends { namespace gpu { @@ -142,6 +142,6 @@ class GPUDeviceGuard { } // namespace gpu } // namespace backends -} // namespace phi +} // namespace common #endif diff --git a/paddle/phi/backends/gpu/gpu_launch_config.h b/paddle/phi/backends/gpu/gpu_launch_config.h index fd712baf754803..87c98db8427058 100644 --- a/paddle/phi/backends/gpu/gpu_launch_config.h +++ b/paddle/phi/backends/gpu/gpu_launch_config.h @@ -31,8 +31,8 @@ #include #include "glog/logging.h" +#include "paddle/common/enforce.h" #include "paddle/phi/backends/gpu/gpu_context.h" -#include "paddle/phi/core/enforce.h" // CUDA performs better when thread_per_block is between [64, 512] #define PREDEFINED_BLOCK_SIZE 512 diff --git a/paddle/phi/backends/gpu/gpu_resources.cc b/paddle/phi/backends/gpu/gpu_resources.cc index bf611705ed59b0..8f60db495655a5 100644 --- a/paddle/phi/backends/gpu/gpu_resources.cc +++ b/paddle/phi/backends/gpu/gpu_resources.cc @@ -40,7 +40,7 @@ #include "glog/logging.h" #include "unsupported/Eigen/CXX11/Tensor" -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" namespace phi { @@ -229,24 +229,24 @@ void DestoryStream(gpuStream_t stream) { void InitBlasHandle(blasHandle_t* blas_handle, gpuStream_t stream) { #ifdef PADDLE_WITH_HIP - phi::dynload::rocblas_create_handle(blas_handle); - phi::dynload::rocblas_set_stream(*blas_handle, stream); + common::dynload::rocblas_create_handle(blas_handle); + common::dynload::rocblas_set_stream(*blas_handle, stream); #else // PADDLE_WITH_CUDA - PADDLE_RETRY_CUDA_SUCCESS(phi::dynload::cublasCreate(blas_handle)); + PADDLE_RETRY_CUDA_SUCCESS(common::dynload::cublasCreate(blas_handle)); PADDLE_RETRY_CUDA_SUCCESS( - phi::dynload::cublasSetStream(*blas_handle, stream)); + common::dynload::cublasSetStream(*blas_handle, stream)); #endif // PADDLE_WITH_HIP } void DestroyBlasHandle(blasHandle_t handle) { #ifdef PADDLE_WITH_HIP if (handle != nullptr) { - phi::dynload::rocblas_destroy_handle(handle); + common::dynload::rocblas_destroy_handle(handle); handle = nullptr; } #else if (handle != nullptr) { - phi::dynload::cublasDestroy(handle); + common::dynload::cublasDestroy(handle); handle = nullptr; } #endif // PADDLE_WITH_HIP @@ -254,21 +254,21 @@ void DestroyBlasHandle(blasHandle_t handle) { void InitBlasLtHandle(blasLtHandle_t* blaslt_handle) { #if defined(PADDLE_WITH_CUDA) && CUDA_VERSION >= 11060 - phi::dynload::cublasLtCreate(blaslt_handle); + common::dynload::cublasLtCreate(blaslt_handle); #endif } void DestroyBlasLtHandle(blasLtHandle_t handle) { #if defined(PADDLE_WITH_CUDA) && CUDA_VERSION >= 11060 if (handle != nullptr) { - phi::dynload::cublasLtDestroy(handle); + common::dynload::cublasLtDestroy(handle); handle = nullptr; } #endif } void InitDnnHandle(dnnHandle_t* handle, gpuStream_t stream, Place place) { - if (phi::dynload::HasCUDNN()) { + if (common::dynload::HasCUDNN()) { #ifdef PADDLE_WITH_HIP size_t miopen_major, miopen_minor, miopen_patch; PADDLE_ENFORCE_GPU_SUCCESS( @@ -290,7 +290,7 @@ void InitDnnHandle(dnnHandle_t* handle, gpuStream_t stream, Place place) { PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenCreate(handle)); PADDLE_ENFORCE_GPU_SUCCESS(dynload::miopenSetStream(*handle, stream)); #else - auto version = phi::dynload::cudnnGetVersion(); + auto version = common::dynload::cudnnGetVersion(); auto local_cudnn_major = (version < 9000) ? version / 1000 : version / 10000; auto local_cudnn_minor = @@ -305,8 +305,8 @@ void InitDnnHandle(dnnHandle_t* handle, gpuStream_t stream, Place place) { << "Please recompile or reinstall Paddle with compatible CUDNN " "version."; } - PADDLE_RETRY_CUDA_SUCCESS(phi::dynload::cudnnCreate(handle)); - PADDLE_RETRY_CUDA_SUCCESS(phi::dynload::cudnnSetStream(*handle, stream)); + PADDLE_RETRY_CUDA_SUCCESS(common::dynload::cudnnCreate(handle)); + PADDLE_RETRY_CUDA_SUCCESS(common::dynload::cudnnSetStream(*handle, stream)); #endif } else { *handle = nullptr; @@ -316,12 +316,12 @@ void InitDnnHandle(dnnHandle_t* handle, gpuStream_t stream, Place place) { void DestroyDnnHandle(dnnHandle_t handle) { #ifdef PADDLE_WITH_HIP if (handle != nullptr) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenDestroy(handle)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::miopenDestroy(handle)); handle = nullptr; } #else if (handle != nullptr) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnDestroy(handle)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnDestroy(handle)); handle = nullptr; } #endif // PADDLE_WITH_HIP @@ -329,15 +329,17 @@ void DestroyDnnHandle(dnnHandle_t handle) { void InitSolverHandle(solverHandle_t* handle, gpuStream_t stream) { #ifndef PADDLE_WITH_HIP - PADDLE_RETRY_CUDA_SUCCESS(phi::dynload::cusolverDnCreate(handle)); - PADDLE_RETRY_CUDA_SUCCESS(phi::dynload::cusolverDnSetStream(*handle, stream)); + PADDLE_RETRY_CUDA_SUCCESS(common::dynload::cusolverDnCreate(handle)); + PADDLE_RETRY_CUDA_SUCCESS( + common::dynload::cusolverDnSetStream(*handle, stream)); #endif } void DestroySolverHandle(solverHandle_t solver_handle) { #ifndef PADDLE_WITH_HIP if (solver_handle != nullptr) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cusolverDnDestroy(solver_handle)); + PADDLE_ENFORCE_GPU_SUCCESS( + common::dynload::cusolverDnDestroy(solver_handle)); solver_handle = nullptr; } #endif @@ -352,8 +354,8 @@ void InitSparseHandle(sparseHandle_t* handle, gpuStream_t stream) { PADDLE_RETRY_CUDA_SUCCESS(dynload::cusparseSetStream(*handle, stream)); #endif #elif defined(PADDLE_WITH_HIP) - phi::dynload::rocsparse_create_handle(handle); - phi::dynload::rocsparse_set_stream(*handle, stream); + common::dynload::rocsparse_create_handle(handle); + common::dynload::rocsparse_set_stream(*handle, stream); #endif } @@ -367,7 +369,7 @@ void DestroySparseHandle(sparseHandle_t handle) { #endif #elif defined(PADDLE_WITH_HIP) if (handle != nullptr) { - phi::dynload::rocsparse_destroy_handle(handle); + common::dynload::rocsparse_destroy_handle(handle); handle = nullptr; } #endif diff --git a/paddle/phi/backends/gpu/gpu_utils.h b/paddle/phi/backends/gpu/gpu_utils.h index 0bb0aef7be1f13..c598c488807dea 100644 --- a/paddle/phi/backends/gpu/gpu_utils.h +++ b/paddle/phi/backends/gpu/gpu_utils.h @@ -18,7 +18,7 @@ #include -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" #include "unsupported/Eigen/CXX11/Tensor" namespace phi { diff --git a/paddle/phi/backends/gpu/rocm/miopen_desc.h b/paddle/phi/backends/gpu/rocm/miopen_desc.h index ae0e274ca650ef..f921b338cacca2 100644 --- a/paddle/phi/backends/gpu/rocm/miopen_desc.h +++ b/paddle/phi/backends/gpu/rocm/miopen_desc.h @@ -23,8 +23,8 @@ #include #include +#include "paddle/common/data_type.h" #include "paddle/phi/backends/gpu/rocm/miopen_helper.h" -#include "paddle/phi/core/utils/data_type.h" namespace phi { namespace backends { @@ -75,7 +75,7 @@ class ActivationDescriptor { void operator()(T* t) { if (t != nullptr) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenDestroyActivationDescriptor(t)); + common::dynload::miopenDestroyActivationDescriptor(t)); t = nullptr; } } @@ -83,12 +83,12 @@ class ActivationDescriptor { ActivationDescriptor() { T* raw_ptr; PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenCreateActivationDescriptor(&raw_ptr)); + common::dynload::miopenCreateActivationDescriptor(&raw_ptr)); desc_.reset(raw_ptr); } template void set(miopenActivationMode_t mode, const T& coef) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetActivationDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::miopenSetActivationDescriptor( desc_.get(), mode, static_cast(coef), 0.0, 0.0)); } @@ -106,7 +106,7 @@ class TensorDescriptor { void operator()(T* t) { if (t != nullptr) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenDestroyTensorDescriptor(t)); + common::dynload::miopenDestroyTensorDescriptor(t)); t = nullptr; } } @@ -114,7 +114,7 @@ class TensorDescriptor { TensorDescriptor() { T* raw_ptr; PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenCreateTensorDescriptor(&raw_ptr)); + common::dynload::miopenCreateTensorDescriptor(&raw_ptr)); desc_.reset(raw_ptr); } T* desc() { return desc_.get(); } @@ -131,7 +131,7 @@ class TensorDescriptor { if (groups > 1) { dims_with_group[1] = dims_with_group[1] / groups; } - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetTensorDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::miopenSetTensorDescriptor( (miopenTensorDescriptor_t)(desc_.get()), ToCudnnDataType(tensor.dtype()), static_cast(dims_with_group.size()), @@ -155,7 +155,7 @@ class TensorDescriptor { if (groups > 1) { dims_with_group[1] = dims_with_group[1] / groups; } - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetTensorDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::miopenSetTensorDescriptor( (miopenTensorDescriptor_t)(desc_.get()), ToCudnnDataType(tensor.dtype()), static_cast(dims_with_group.size()), @@ -174,7 +174,7 @@ class FilterDescriptor { void operator()(T* t) { if (t != nullptr) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenDestroyTensorDescriptor(t)); + common::dynload::miopenDestroyTensorDescriptor(t)); t = nullptr; } } @@ -182,7 +182,7 @@ class FilterDescriptor { FilterDescriptor() { T* raw_ptr; PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenCreateTensorDescriptor(&raw_ptr)); + common::dynload::miopenCreateTensorDescriptor(&raw_ptr)); desc_.reset(raw_ptr); } T* desc() { return desc_.get(); } @@ -205,7 +205,7 @@ class FilterDescriptor { if (groups > 1) { dims_with_group[1] = dims_with_group[1] / groups; } - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetTensorDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::miopenSetTensorDescriptor( (miopenTensorDescriptor_t)(desc_.get()), ToCudnnDataType(tensor.dtype()), static_cast(dims_with_group.size()), @@ -224,7 +224,7 @@ class ConvolutionDescriptor { void operator()(T* t) { if (t != nullptr) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenDestroyConvolutionDescriptor(t)); + common::dynload::miopenDestroyConvolutionDescriptor(t)); t = nullptr; } } @@ -232,7 +232,7 @@ class ConvolutionDescriptor { ConvolutionDescriptor() { T* raw_ptr; PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenCreateConvolutionDescriptor(&raw_ptr)); + common::dynload::miopenCreateConvolutionDescriptor(&raw_ptr)); desc_.reset(raw_ptr); } T* desc() { return desc_.get(); } @@ -244,14 +244,15 @@ class ConvolutionDescriptor { const std::vector& dilations, bool allow_tf32, const int groups = 1) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenInitConvolutionNdDescriptor( - (miopenConvolutionDescriptor_t)desc_.get(), - static_cast(pads.size()), - const_cast(pads.data()), - const_cast(strides.data()), - const_cast(dilations.data()), - miopenConvolution)); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetConvolutionGroupCount( + PADDLE_ENFORCE_GPU_SUCCESS( + common::dynload::miopenInitConvolutionNdDescriptor( + (miopenConvolutionDescriptor_t)desc_.get(), + static_cast(pads.size()), + const_cast(pads.data()), + const_cast(strides.data()), + const_cast(dilations.data()), + miopenConvolution)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::miopenSetConvolutionGroupCount( (miopenConvolutionDescriptor_t)desc_.get(), groups)); } diff --git a/paddle/phi/backends/gpu/rocm/miopen_helper.h b/paddle/phi/backends/gpu/rocm/miopen_helper.h index 61dab08f5db583..b27bec1aebc1ee 100644 --- a/paddle/phi/backends/gpu/rocm/miopen_helper.h +++ b/paddle/phi/backends/gpu/rocm/miopen_helper.h @@ -20,13 +20,13 @@ limitations under the License. */ #include "paddle/utils/flags.h" #include "paddle/common/bfloat16.h" +#include "paddle/common/enforce.h" +#include "paddle/common/errors.h" #include "paddle/common/float16.h" #include "paddle/common/macros.h" #include "paddle/phi/backends/dynload/miopen.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" // MIOPEN do not have epslion definition #define CUDNN_BN_MIN_EPSILON 1e-05 @@ -204,11 +204,11 @@ class ScopedTensorDescriptor { public: ScopedTensorDescriptor() { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenCreateTensorDescriptor(&desc_)); + common::dynload::miopenCreateTensorDescriptor(&desc_)); } ~ScopedTensorDescriptor() PADDLE_MAY_THROW { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenDestroyTensorDescriptor(desc_)); + common::dynload::miopenDestroyTensorDescriptor(desc_)); } inline miopenTensorDescriptor_t descriptor(const miopenTensorFormat_t format, @@ -234,14 +234,14 @@ class ScopedTensorDescriptor { MIOPEN_TENSOR_NCHW, phi::errors::InvalidArgument("format should ONLY be NCHW in MIOPEN.")); if (dims.size() == 4) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetTensorDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::miopenSetTensorDescriptor( desc_, type, dims_with_group.size(), const_cast(dims_with_group.data()), const_cast(strides.data()))); } else if (dims.size() == 5) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetTensorDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::miopenSetTensorDescriptor( desc_, type, dims_with_group.size(), @@ -262,7 +262,7 @@ class ScopedTensorDescriptor { inline miopenTensorDescriptor_t descriptor(const miopenDataType_t miopen_type, const std::vector& dim, const std::vector& stride) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetTensorDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::miopenSetTensorDescriptor( desc_, miopen_type, dim.size(), @@ -288,11 +288,11 @@ class ScopedDropoutDescriptor { public: ScopedDropoutDescriptor() { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenCreateDropoutDescriptor(&desc_)); + common::dynload::miopenCreateDropoutDescriptor(&desc_)); } ~ScopedDropoutDescriptor() PADDLE_MAY_THROW { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenDestroyDropoutDescriptor(desc_)); + common::dynload::miopenDestroyDropoutDescriptor(desc_)); } inline miopenDropoutDescriptor_t descriptor(const miopenHandle_t& handle, @@ -303,43 +303,44 @@ class ScopedDropoutDescriptor { int seed, size_t state_size) { if (dropout_state_ == nullptr) { // for no dropout or test - PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenSetDropoutDescriptor(desc_, - handle, - 0 /* dropout */, - nullptr, - 0 /* state_size */, - 0 /* seed */, - false, - false, - MIOPEN_RNG_PSEUDO_XORWOW)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::miopenSetDropoutDescriptor( + desc_, + handle, + 0 /* dropout */, + nullptr, + 0 /* state_size */, + 0 /* seed */, + false, + false, + MIOPEN_RNG_PSEUDO_XORWOW)); return desc_; } auto* dropout_state_data = dropout_state_->data(); if (!initialized) { - PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenSetDropoutDescriptor(desc_, - handle, - dropout_prob_, - dropout_state_data, - state_size, - seed, - false, - false, - MIOPEN_RNG_PSEUDO_XORWOW)); - } else { - auto dropout_state_dims = dropout_state_->dims(); - state_size = dropout_state_dims[0]; - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenRestoreDropoutDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::miopenSetDropoutDescriptor( desc_, handle, dropout_prob_, dropout_state_data, state_size, - 0, + seed, false, false, MIOPEN_RNG_PSEUDO_XORWOW)); + } else { + auto dropout_state_dims = dropout_state_->dims(); + state_size = dropout_state_dims[0]; + PADDLE_ENFORCE_GPU_SUCCESS( + common::dynload::miopenRestoreDropoutDescriptor( + desc_, + handle, + dropout_prob_, + dropout_state_data, + state_size, + 0, + false, + false, + MIOPEN_RNG_PSEUDO_XORWOW)); } return desc_; } @@ -353,10 +354,12 @@ class ScopedDropoutDescriptor { class ScopedRNNDescriptor { public: ScopedRNNDescriptor() { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenCreateRNNDescriptor(&desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + common::dynload::miopenCreateRNNDescriptor(&desc_)); } ~ScopedRNNDescriptor() PADDLE_MAY_THROW { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenDestroyRNNDescriptor(desc_)); + PADDLE_ENFORCE_GPU_SUCCESS( + common::dynload::miopenDestroyRNNDescriptor(desc_)); } inline miopenRNNDescriptor_t desc() { return desc_; } @@ -370,11 +373,11 @@ class ScopedFilterDescriptor { public: ScopedFilterDescriptor() { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenCreateTensorDescriptor(&desc_)); + common::dynload::miopenCreateTensorDescriptor(&desc_)); } ~ScopedFilterDescriptor() PADDLE_MAY_THROW { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenDestroyTensorDescriptor(desc_)); + common::dynload::miopenDestroyTensorDescriptor(desc_)); } inline miopenTensorDescriptor_t descriptor(const miopenTensorFormat_t format, @@ -395,7 +398,7 @@ class ScopedFilterDescriptor { for (int k = kernel_with_group.size() - 2; k >= 0; k--) { stride_dim[k] = stride_dim[k + 1] * kernel_with_group[k + 1]; } - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetTensorDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::miopenSetTensorDescriptor( desc_, type, kernel_with_group.size(), @@ -423,11 +426,11 @@ class ScopedConvolutionDescriptor { public: ScopedConvolutionDescriptor() { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenCreateConvolutionDescriptor(&desc_)); + common::dynload::miopenCreateConvolutionDescriptor(&desc_)); } ~ScopedConvolutionDescriptor() PADDLE_MAY_THROW { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenDestroyConvolutionDescriptor(desc_)); + common::dynload::miopenDestroyConvolutionDescriptor(desc_)); } inline miopenConvolutionDescriptor_t descriptor( @@ -450,13 +453,14 @@ class ScopedConvolutionDescriptor { "of pads is %d, size of dilations is %d.", pads.size(), dilations.size())); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenInitConvolutionNdDescriptor( - desc_, - pads.size(), - const_cast(pads.data()), - const_cast(strides.data()), - const_cast(dilations.data()), - miopenConvolution)); + PADDLE_ENFORCE_GPU_SUCCESS( + common::dynload::miopenInitConvolutionNdDescriptor( + desc_, + pads.size(), + const_cast(pads.data()), + const_cast(strides.data()), + const_cast(dilations.data()), + miopenConvolution)); return desc_; } @@ -477,11 +481,11 @@ class ScopedPoolingDescriptor { public: ScopedPoolingDescriptor() { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenCreatePoolingDescriptor(&desc_)); + common::dynload::miopenCreatePoolingDescriptor(&desc_)); } ~ScopedPoolingDescriptor() PADDLE_MAY_THROW { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenDestroyPoolingDescriptor(desc_)); + common::dynload::miopenDestroyPoolingDescriptor(desc_)); } inline miopenPoolingDescriptor_t descriptor(const PoolingMode& mode, @@ -503,7 +507,7 @@ class ScopedPoolingDescriptor { "received size of kernel is %d, size of strides is %d.", kernel.size(), strides.size())); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetNdPoolingDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::miopenSetNdPoolingDescriptor( desc_, GetPoolingMode(mode), kernel.size(), @@ -522,11 +526,11 @@ class ScopedActivationDescriptor { public: ScopedActivationDescriptor() { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenCreateActivationDescriptor(&desc_)); + common::dynload::miopenCreateActivationDescriptor(&desc_)); } ~ScopedActivationDescriptor() PADDLE_MAY_THROW { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenDestroyActivationDescriptor(desc_)); + common::dynload::miopenDestroyActivationDescriptor(desc_)); } template @@ -561,7 +565,7 @@ class ScopedActivationDescriptor { "Unrecognized MIOPEN activation mode: %d.", static_cast(activation_mode))); } - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetActivationDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::miopenSetActivationDescriptor( desc_, mode, relu_ceiling, 0.0, 0.0)); return desc_; } @@ -575,16 +579,16 @@ class ScopedCTCLossDescriptor { public: ScopedCTCLossDescriptor() { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenCreateCTCLossDescriptor(&desc_)); + common::dynload::miopenCreateCTCLossDescriptor(&desc_)); } ~ScopedCTCLossDescriptor() PADDLE_MAY_THROW { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenDestroyCTCLossDescriptor(desc_)); + common::dynload::miopenDestroyCTCLossDescriptor(desc_)); } template inline miopenCTCLossDescriptor_t descriptor() { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetCTCLossDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::miopenSetCTCLossDescriptor( desc_, CudnnDataType::type, 0, false)); return desc_; } diff --git a/paddle/phi/backends/gpu/rocm/rocm_info.cc b/paddle/phi/backends/gpu/rocm/rocm_info.cc index edc23479c92380..1d9ac0d2e5226e 100644 --- a/paddle/phi/backends/gpu/rocm/rocm_info.cc +++ b/paddle/phi/backends/gpu/rocm/rocm_info.cc @@ -16,7 +16,7 @@ #include "paddle/phi/backends/gpu/gpu_info.h" -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" static std::once_flag g_device_props_size_init_flag; static std::vector> g_device_props_init_flags; diff --git a/paddle/phi/backends/onednn/onednn_context.cc b/paddle/phi/backends/onednn/onednn_context.cc index 8392a0a45b38c4..77eb0d80853d77 100644 --- a/paddle/phi/backends/onednn/onednn_context.cc +++ b/paddle/phi/backends/onednn/onednn_context.cc @@ -14,8 +14,8 @@ #ifdef PADDLE_WITH_DNNL #include "paddle/phi/backends/onednn/onednn_context.h" +#include "paddle/common/enforce.h" #include "paddle/phi/common/place.h" -#include "paddle/phi/core/enforce.h" #include "paddle/utils/flat_hash_map.h" #include "paddle/phi/backends/context_pool.h" diff --git a/paddle/phi/backends/xpu/enforce_xpu.h b/paddle/phi/backends/xpu/enforce_xpu.h index 0a2a21e236d040..9321a8c843ec72 100644 --- a/paddle/phi/backends/xpu/enforce_xpu.h +++ b/paddle/phi/backends/xpu/enforce_xpu.h @@ -14,8 +14,8 @@ limitations under the License. */ #pragma once +#include "paddle/common/enforce.h" #include "paddle/phi/backends/xpu/xpu_header.h" -#include "paddle/phi/core/enforce.h" #ifdef PADDLE_WITH_XPU_BKCL #include "xpu/bkcl.h" #endif diff --git a/paddle/phi/capi/include/type_utils.h b/paddle/phi/capi/include/type_utils.h index 029ee42fe091bc..98d25aa1bd010f 100644 --- a/paddle/phi/capi/include/type_utils.h +++ b/paddle/phi/capi/include/type_utils.h @@ -15,10 +15,10 @@ #pragma once #if !defined(_WIN32) +#include "paddle/common/enforce.h" #include "paddle/phi/capi/include/c_data_type.h" #include "paddle/phi/common/data_type.h" #include "paddle/phi/common/layout.h" -#include "paddle/phi/core/enforce.h" namespace phi { namespace capi { diff --git a/paddle/phi/common/int_array.cc b/paddle/phi/common/int_array.cc index 4b5d553006685b..75440bd2d5b818 100644 --- a/paddle/phi/common/int_array.cc +++ b/paddle/phi/common/int_array.cc @@ -14,10 +14,10 @@ limitations under the License. */ #include "paddle/phi/common/int_array.h" +#include "paddle/common/ddim.h" #include "paddle/phi/backends/context_pool.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/common/place.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/tensor_utils.h" namespace paddle { diff --git a/paddle/phi/common/memory_utils.h b/paddle/phi/common/memory_utils.h index e2a590ee4d210c..784394188ce406 100644 --- a/paddle/phi/common/memory_utils.h +++ b/paddle/phi/common/memory_utils.h @@ -17,11 +17,11 @@ #include // NOLINT #include +#include "paddle/common/enforce.h" #include "paddle/common/macros.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/allocator.h" #include "paddle/phi/core/device_context.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/stream.h" #include "paddle/utils/test_macros.h" diff --git a/paddle/phi/common/scalar.cc b/paddle/phi/common/scalar.cc index 71b90361f8b6b0..60ad3b68fe7f0e 100644 --- a/paddle/phi/common/scalar.cc +++ b/paddle/phi/common/scalar.cc @@ -14,10 +14,10 @@ limitations under the License. */ #include "paddle/common/scalar.h" +#include "paddle/common/enforce.h" #include "paddle/phi/backends/context_pool.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/common/place.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/tensor_utils.h" namespace paddle { namespace experimental { diff --git a/paddle/phi/common/transform.h b/paddle/phi/common/transform.h index d83b698a45bc6f..58b9d0ccf221ea 100644 --- a/paddle/phi/common/transform.h +++ b/paddle/phi/common/transform.h @@ -17,9 +17,9 @@ limitations under the License. */ #include #include +#include "paddle/common/enforce.h" #include "paddle/common/hostdevice.h" #include "paddle/phi/backends/all_context.h" -#include "paddle/phi/core/enforce.h" #if defined(__NVCC__) || defined(__HIPCC__) #include diff --git a/paddle/phi/core/compat/arg_map_context.cc b/paddle/phi/core/compat/arg_map_context.cc index 800245406afd3a..b924dab355564b 100644 --- a/paddle/phi/core/compat/arg_map_context.cc +++ b/paddle/phi/core/compat/arg_map_context.cc @@ -14,7 +14,7 @@ limitations under the License. */ #include "paddle/phi/core/compat/arg_map_context.h" -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" #include "paddle/utils/string/string_helper.h" namespace phi { diff --git a/paddle/phi/core/compat/convert_utils.cc b/paddle/phi/core/compat/convert_utils.cc index d4c5de0dbe6dc9..06b0651784dfd1 100644 --- a/paddle/phi/core/compat/convert_utils.cc +++ b/paddle/phi/core/compat/convert_utils.cc @@ -14,11 +14,11 @@ limitations under the License. */ #include "paddle/phi/core/compat/convert_utils.h" +#include "paddle/common/enforce.h" #include "paddle/phi/backends/gpu/gpu_info.h" #include "paddle/phi/backends/xpu/xpu_info.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/compat/op_utils.h" -#include "paddle/phi/core/enforce.h" #ifdef PADDLE_WITH_CUSTOM_DEVICE #include "paddle/phi/backends/device_manager.h" diff --git a/paddle/phi/core/compat/get_kerneltype_forvar_utils.cc b/paddle/phi/core/compat/get_kerneltype_forvar_utils.cc index e144af3757a40a..1c51762890e0ee 100644 --- a/paddle/phi/core/compat/get_kerneltype_forvar_utils.cc +++ b/paddle/phi/core/compat/get_kerneltype_forvar_utils.cc @@ -14,7 +14,7 @@ #include "paddle/phi/core/compat/get_kerneltype_forvar_utils.h" -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" namespace phi { const std::string& GetKernelTypeForVarContext::GetVarName() const { diff --git a/paddle/phi/core/compat/op_utils.h b/paddle/phi/core/compat/op_utils.h index e8ba9a8295816c..beee1aebe72197 100644 --- a/paddle/phi/core/compat/op_utils.h +++ b/paddle/phi/core/compat/op_utils.h @@ -18,10 +18,10 @@ limitations under the License. */ #include #include "glog/logging.h" +#include "paddle/common/enforce.h" #include "paddle/common/macros.h" #include "paddle/common/type_defs.h" #include "paddle/phi/core/compat/arg_map_context.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/infermeta_utils.h" #include "paddle/utils/flat_hash_map.h" diff --git a/paddle/phi/core/cuda_stream.h b/paddle/phi/core/cuda_stream.h index b1565643c97878..87db6a04097879 100644 --- a/paddle/phi/core/cuda_stream.h +++ b/paddle/phi/core/cuda_stream.h @@ -30,7 +30,7 @@ using gpuStream_t = hipStream_t; #include "glog/logging.h" -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" namespace phi { diff --git a/paddle/phi/core/ddim.cc b/paddle/phi/core/ddim.cc deleted file mode 100644 index ff95346be17c7a..00000000000000 --- a/paddle/phi/core/ddim.cc +++ /dev/null @@ -1,230 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/phi/core/ddim.h" - -#include - -namespace phi { - -DDim make_ddim(std::initializer_list dims) { - return DDim(dims.begin(), static_cast(dims.size())); -} - -DDim make_ddim(const std::vector& dims) { - return DDim(dims.data(), static_cast(dims.size())); -} - -DDim make_ddim(const std::vector& dims) { - return DDim(dims.data(), static_cast(dims.size())); -} - -struct DDimEqualityVisitor { - explicit DDimEqualityVisitor(const int64_t* d) : d_(d) {} - - template - inline bool operator()(const Dim& self) const { - return UnrollCompare::Run(self.Get(), d_); - } - - const int64_t* d_; -}; - -bool DDim::operator==(const DDim& d) const { - if (size() == -1 && d.size() == -1) { - return true; - } else if (size() == -1 || d.size() == -1) { - return false; - } else { - return size() == d.size() && - this->apply_visitor(DDimEqualityVisitor(d.Get())); - } -} - -bool DDim::operator!=(const DDim& d) const { return !(*this == d); } - -std::string DDim::to_str() const { - std::stringstream ss; - ss << '['; - if (rank_ > 0) ss << dim_[0]; - - for (int i = 1; i < rank_; ++i) ss << ", " << dim_[i]; - ss << ']'; - return ss.str(); -} - -struct ProductVisitor { - template - inline int64_t operator()(const Dim& dim) { - return product(dim); - } -}; - -int64_t product(const DDim& ddim) { - if (ddim.size() == -1) { - return 0; - } - return ddim.apply_visitor(ProductVisitor()); -} - -bool contain_unknown_dim(const DDim& ddim) { - for (int i = 0; i < ddim.size(); ++i) { - if (ddim[i] < 0) { - return true; - } - } - - return false; -} - -DDim slice_ddim(const DDim& dim, int begin, int end) { - PADDLE_ENFORCE_EQ( - (begin >= 0 && end <= dim.size()), - true, - phi::errors::InvalidArgument( - "[begin(%d), end(%d)) must be inside [0, %d) in ddim slice.", - begin, - end, - dim.size())); - // Constructor of DDim would check whether end - begin is valid - return DDim(dim.Get() + begin, end - begin); -} - -int arity(const DDim& d) { return d.size(); } - -struct DDimPrinter { - std::ostream& os; - explicit DDimPrinter(std::ostream& os_) : os(os_) {} - - template - void operator()(const Dim& t) { - os << t; - } -}; - -std::ostream& operator<<(std::ostream& os, const DDim& ddim) { - if (ddim.size() == -1) { - return os; - } - ddim.apply_visitor(DDimPrinter(os)); - return os; -} - -DDim flatten_to_3d(const DDim& src, int num_row_dims, int num_col_dims) { - PADDLE_ENFORCE_GE( - src.size(), - 3, - phi::errors::InvalidArgument("The rank of src dim should be at least 3 " - "in flatten_to_3d, but received %d.", - src.size())); - PADDLE_ENFORCE_EQ( - (num_row_dims >= 1 && num_row_dims < src.size()), - true, - phi::errors::InvalidArgument("The num_row_dims should be inside [1, %d] " - "in flatten_to_3d, but received %d.", - src.size() - 1, - num_row_dims)); - PADDLE_ENFORCE_EQ( - (num_col_dims >= 2 && num_col_dims <= src.size()), - true, - phi::errors::InvalidArgument("The num_col_dims should be inside [2, %d] " - "in flatten_to_3d, but received %d.", - src.size(), - num_col_dims)); - PADDLE_ENFORCE_GE( - num_col_dims, - num_row_dims, - phi::errors::InvalidArgument( - "The num_row_dims should be less than num_col_dims in flatten_to_3d," - "but received num_row_dims = %d, num_col_dims = %d.", - num_row_dims, - num_col_dims)); - - return DDim({product(slice_ddim(src, 0, num_row_dims)), - product(slice_ddim(src, num_row_dims, num_col_dims)), - product(slice_ddim(src, num_col_dims, src.size()))}); -} - -DDim flatten_to_2d(const DDim& src, int num_col_dims) { - return DDim({product(slice_ddim(src, 0, num_col_dims)), - product(slice_ddim(src, num_col_dims, src.size()))}); -} - -DDim flatten_to_1d(const DDim& src) { return DDim({product(src)}); } - -DDim stride(const DDim& ddim) { - DDim strides; - strides.rank_ = ddim.size(); - if (ddim.size() > 0) strides[ddim.size() - 1] = 1; - for (int i = ddim.size() - 2; i >= 0; --i) { - strides[i] = strides[i + 1] * ddim[i + 1]; - } - return strides; -} - -DDim stride_numel(const DDim& ddim) { - DDim strides; - strides.rank_ = ddim.size(); - if (ddim.size() > 0) strides[ddim.size() - 1] = ddim[ddim.size() - 1]; - for (int i = ddim.size() - 2; i >= 0; --i) { - strides[i] = strides[i + 1] * ddim[i]; - } - return strides; -} - -DDim DDim::reshape(std::vector& shape) const { - const DDim& in_dims = *this; - - for (int i = 0; i < static_cast(shape.size()); ++i) { - if (shape[i] == 0) { - shape[i] = static_cast(in_dims.at(i)); - } - } - - // Dim marked as "-1" must be inferred - auto it = std::find(shape.begin(), shape.end(), -1); - if (it != shape.end()) { - int index = static_cast(std::distance(shape.begin(), it)); - int reshape_out_product = - std::accumulate(shape.begin(), shape.end(), -1, std::multiplies()); - shape[index] = static_cast(product(in_dims)) / reshape_out_product; - } - - return phi::make_ddim(shape); -} - -DDim DDim::transpose(const std::vector& axis) const { - const DDim& in_dims = *this; - - DDim out_dims(in_dims); - for (int i = 0; i < static_cast(axis.size()); i++) { - out_dims[i] = in_dims[axis[i]]; - } - return out_dims; -} - -} // namespace phi - -namespace std { - -std::size_t hash::operator()(phi::DDim const& ddim) const { - int ndim = ddim.size(); - std::size_t seed = ndim; - for (int i = 0; i < ndim; ++i) { - seed ^= ddim.Get()[i] + 0x9e3779b9 + (seed << 6) + (seed >> 2); - } - return seed; -} - -} // namespace std diff --git a/paddle/phi/core/ddim.h b/paddle/phi/core/ddim.h deleted file mode 100644 index 22df7f9a1044ed..00000000000000 --- a/paddle/phi/core/ddim.h +++ /dev/null @@ -1,284 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#pragma once -#include -#include -#include -#include -#include - -#include "paddle/common/dim.h" -#include "paddle/common/exception.h" -#include "paddle/utils/test_macros.h" - -namespace phi { - -#define PADDLE_VISIT_DDIM_BASE(rank, callback) \ - case (rank): { \ - constexpr auto kRank = (rank); \ - return (callback); \ - } - -#define PADDLE_VISIT_DDIM(rank, callback) \ - switch (rank) { \ - PADDLE_VISIT_DDIM_BASE(0, callback); \ - PADDLE_VISIT_DDIM_BASE(1, callback); \ - PADDLE_VISIT_DDIM_BASE(2, callback); \ - PADDLE_VISIT_DDIM_BASE(3, callback); \ - PADDLE_VISIT_DDIM_BASE(4, callback); \ - PADDLE_VISIT_DDIM_BASE(5, callback); \ - PADDLE_VISIT_DDIM_BASE(6, callback); \ - PADDLE_VISIT_DDIM_BASE(7, callback); \ - PADDLE_VISIT_DDIM_BASE(8, callback); \ - PADDLE_VISIT_DDIM_BASE(9, callback); \ - default: \ - PD_THROW( \ - "Unimplemented error. Invalid dimension to be accessed. Now only " \ - "supports access to " \ - "dimension 0 to 9, but received dimension is ", \ - rank, \ - "."); \ - } - -template -inline void dynamic_dim_assign(const T1* in, T2* out, int n) { - if (n == -1) { - return; - } - PADDLE_VISIT_DDIM(n, (common::static_dim_assign(in, out))); -} - -/** - * \brief A dynamically sized dimension. - * - * The number of dimensions must be between [1, 9]. - */ -class DDim { - public: - constexpr static int kMaxRank = 9; - - DDim() : rank_(-1) { dim_[0] = 0; } - - DDim(const DDim& ddim) : dim_() { CopyFrom(ddim); } - - DDim(const int* d, int n) : rank_(n) { - dynamic_dim_assign(d, dim_.GetMutable(), n); - } - - DDim(const int64_t* d, int n) : rank_(n) { - dynamic_dim_assign(d, dim_.GetMutable(), n); - } - - template - /*implicit*/ DDim(const common::Dim& in) : rank_(D) { // NOLINT - UnsafeCast() = in; - } - - /*implicit*/ DDim(std::initializer_list init_list) - : DDim(init_list.begin(), init_list.size()) {} - - inline DDim& operator=(const DDim& ddim) { return CopyFrom(ddim); } - - template - inline DDim& operator=(const common::Dim& dim) { - rank_ = D; - UnsafeCast() = dim; - return *this; - } - - inline int64_t& operator[](int idx) { return dim_[idx]; } - - inline int64_t operator[](int idx) const { return dim_[idx]; } - - int64_t& at(int idx) { - PADDLE_ENFORCE_GE(idx, - 0, - common::errors::InvalidArgument( - "Invalid DDim index to be accessed. The valid index " - "is between 0 and %d, but received index is %d.", - rank_, - idx)); - PADDLE_ENFORCE_LT(idx, - rank_, - common::errors::InvalidArgument( - "Invalid DDim index to be accessed. The valid index " - "is between 0 and %d, but received index is %d.", - rank_, - idx)); - return dim_[idx]; - } - - int64_t at(int idx) const { - PADDLE_ENFORCE_GE(idx, - 0, - common::errors::InvalidArgument( - "Invalid DDim index to be accessed. The valid index " - "is between 0 and %d, but received index is %d.", - rank_, - idx)); - PADDLE_ENFORCE_LT(idx, - rank_, - common::errors::InvalidArgument( - "Invalid DDim index to be accessed. The valid index " - "is between 0 and %d, but received index is %d.", - rank_, - idx)); - return dim_[idx]; - } - - template - typename std::result_of&)>::type apply_visitor( - Visitor&& visitor) { - PADDLE_VISIT_DDIM(rank_, visitor(UnsafeCast())); - } - - template - typename std::result_of&)>::type apply_visitor( - Visitor&& visitor) const { - PADDLE_VISIT_DDIM(rank_, visitor(UnsafeCast())); - } - - bool operator==(const DDim& d) const; - - bool operator!=(const DDim& d) const; - - inline const int64_t* Get() const { return dim_.Get(); } - - inline int64_t* GetMutable() { return dim_.GetMutable(); } - - inline int size() const { return rank_; } - - std::string to_str() const; - - DDim reshape(std::vector& shape) const; // NOLINT - - DDim transpose(const std::vector& axis) const; - - private: - template - inline common::Dim& UnsafeCast() { - static_assert(D >= 0 && D <= kMaxRank, "Invalid rank"); - auto* p = static_cast(&dim_); - return *reinterpret_cast*>(p); - } - - template - inline const common::Dim& UnsafeCast() const { - static_assert(D >= 0 && D <= kMaxRank, "Invalid rank"); - auto* p = static_cast(&dim_); - return *reinterpret_cast*>(p); - } - - inline DDim& CopyFrom(const DDim& ddim) { - if (ddim.rank_ == -1) { - rank_ = -1; - return *this; - } - PADDLE_VISIT_DDIM(ddim.rank_, (*this = ddim.UnsafeCast())); - } - - friend DDim stride(const DDim& ddim); - friend DDim stride_numel(const DDim& ddim); - - private: - common::Dim dim_; - int rank_; -}; - -#undef PADDLE_VISIT_DDIM_BASE -#undef PADDLE_VISIT_DDIM - -/** - * \brief Make a DDim from std::vector - * - * \param dims An vector of ints. Must be sized between [1, 9] - */ -TEST_API DDim make_ddim(const std::vector& dims); - -TEST_API DDim make_ddim(const std::vector& dims); - -/** - * \brief Make a DDim from an initializer list - * - * \param dims An initializer list of ints. Must be sized between [1, 9] - * - */ -TEST_API DDim make_ddim(std::initializer_list dims); - -template -std::vector vectorize(const DDim& ddim) { - if (ddim.size() == -1) { - return std::vector({0}); - } - std::vector result(DDim::kMaxRank); - dynamic_dim_assign(ddim.Get(), result.data(), ddim.size()); - result.resize(ddim.size()); - return result; -} - -TEST_API int64_t product(const DDim& ddim); - -bool contain_unknown_dim(const DDim& ddim); - -/** - * \brief Slice a ddim - * - * Slice dim with [begin, end). - * e.g. DDim d = make_ddim({1,2,3,4,5}); - * slice_ddim(d, 1, 3); ====> {2,3} - */ -DDim slice_ddim(const DDim& dim, int begin, int end); - -/** - * \brief What is the length of this dimension? - * - * \param Dynamic dimension to inspect - */ - -int arity(const DDim& ddim); - -TEST_API std::ostream& operator<<(std::ostream&, const DDim&); - -/** - * \brief Flatten dim to 3d - * e.g., DDim d = mak_ddim({1, 2, 3, 4, 5, 6}) - * flatten_to_3d(d, 2, 4); ===> {1*2, 3*4, 5*6} ===> {2, 12, 30} - */ -DDim flatten_to_3d(const DDim& src, int num_row_dims, int num_col_dims); - -// Reshape a tensor to a matrix. The matrix's first dimension(column length) -// will be the product of tensor's first `num_col_dims` dimensions. -DDim flatten_to_2d(const DDim& src, int num_col_dims); - -DDim flatten_to_1d(const DDim& src); - -DDim stride(const DDim& ddim); - -DDim stride_numel(const DDim& ddim); -} // namespace phi - -namespace paddle { -namespace framework { - -using DDim = phi::DDim; - -} // namespace framework -} // namespace paddle - -namespace std { -template <> -struct hash { - std::size_t operator()(phi::DDim const& ddim) const; -}; -} // namespace std diff --git a/paddle/phi/core/device_context.cc b/paddle/phi/core/device_context.cc index 3804802e84260d..f148ad6e255744 100644 --- a/paddle/phi/core/device_context.cc +++ b/paddle/phi/core/device_context.cc @@ -18,8 +18,8 @@ #include "paddle/phi/backends/gpu/cuda/cuda_graph.h" #endif +#include "paddle/common/enforce.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/selected_rows.h" #include "paddle/phi/core/string_tensor.h" diff --git a/paddle/phi/core/distributed/auto_parallel/device_mesh.h b/paddle/phi/core/distributed/auto_parallel/device_mesh.h index 0888d5e2e7a2a6..03571c7932f33b 100644 --- a/paddle/phi/core/distributed/auto_parallel/device_mesh.h +++ b/paddle/phi/core/distributed/auto_parallel/device_mesh.h @@ -23,9 +23,9 @@ limitations under the License. */ #include #include +#include "paddle/common/enforce.h" #include "paddle/phi/core/distributed/auto_parallel/auto_parallel.pb.h" #include "paddle/phi/core/distributed/auto_parallel/utils.h" -#include "paddle/phi/core/enforce.h" namespace phi { namespace distributed { diff --git a/paddle/phi/core/distributed/auto_parallel/dist_attr.h b/paddle/phi/core/distributed/auto_parallel/dist_attr.h index 6689750d24ad9c..a9643912e3f5da 100644 --- a/paddle/phi/core/distributed/auto_parallel/dist_attr.h +++ b/paddle/phi/core/distributed/auto_parallel/dist_attr.h @@ -21,11 +21,11 @@ limitations under the License. */ #include #include +#include "paddle/common/enforce.h" #include "paddle/phi/common/reduce_type.h" #include "paddle/phi/core/distributed/auto_parallel/auto_parallel.pb.h" #include "paddle/phi/core/distributed/auto_parallel/process_mesh.h" #include "paddle/phi/core/distributed/auto_parallel/utils.h" -#include "paddle/phi/core/enforce.h" #include "paddle/utils/flat_hash_map.h" #include "paddle/utils/test_macros.h" diff --git a/paddle/phi/core/distributed/auto_parallel/inferspmd_utils.h b/paddle/phi/core/distributed/auto_parallel/inferspmd_utils.h index 922bdbebf895ec..ee8793ae687a78 100644 --- a/paddle/phi/core/distributed/auto_parallel/inferspmd_utils.h +++ b/paddle/phi/core/distributed/auto_parallel/inferspmd_utils.h @@ -19,6 +19,7 @@ limitations under the License. */ #include #include +#include "paddle/common/enforce.h" #include "paddle/common/macros.h" #include "paddle/common/scalar.h" #include "paddle/common/type_defs.h" @@ -27,7 +28,6 @@ limitations under the License. */ #include "paddle/phi/core/distributed/auto_parallel/dist_attr.h" #include "paddle/phi/core/distributed/auto_parallel/dist_meta_tensor.h" #include "paddle/phi/core/distributed/type_defs.h" -#include "paddle/phi/core/enforce.h" #include "paddle/utils/any.h" #include "paddle/utils/flat_hash_map.h" #include "paddle/utils/small_vector.h" diff --git a/paddle/phi/core/distributed/auto_parallel/process_mesh.h b/paddle/phi/core/distributed/auto_parallel/process_mesh.h index d512255ec10359..60a8031c2cc7b6 100644 --- a/paddle/phi/core/distributed/auto_parallel/process_mesh.h +++ b/paddle/phi/core/distributed/auto_parallel/process_mesh.h @@ -20,10 +20,10 @@ limitations under the License. */ #include #include +#include "paddle/common/enforce.h" #include "paddle/phi/core/distributed/auto_parallel/auto_parallel.pb.h" #include "paddle/phi/core/distributed/auto_parallel/device_mesh.h" #include "paddle/phi/core/distributed/auto_parallel/utils.h" -#include "paddle/phi/core/enforce.h" namespace phi { namespace distributed { diff --git a/paddle/phi/core/distributed/auto_parallel/reshard/reshard_utils.cc b/paddle/phi/core/distributed/auto_parallel/reshard/reshard_utils.cc index e7a1ec15da307a..c0a7d2dc59dd59 100644 --- a/paddle/phi/core/distributed/auto_parallel/reshard/reshard_utils.cc +++ b/paddle/phi/core/distributed/auto_parallel/reshard/reshard_utils.cc @@ -15,13 +15,13 @@ #include "paddle/phi/core/distributed/auto_parallel/reshard/reshard_utils.h" #include "glog/logging.h" +#include "paddle/common/enforce.h" #include "paddle/phi/backends/context_pool.h" #include "paddle/phi/core/device_context.h" #include "paddle/phi/core/distributed/auto_parallel/process_mesh.h" #include "paddle/phi/core/distributed/auto_parallel/reshard/reshard_function.h" #include "paddle/phi/core/distributed/comm_context_manager.h" #include "paddle/phi/core/distributed/store/store_utils.h" -#include "paddle/phi/core/enforce.h" namespace phi { namespace distributed { diff --git a/paddle/phi/core/distributed/auto_parallel/utils.h b/paddle/phi/core/distributed/auto_parallel/utils.h index 915c1565296700..ec6ab156050aa2 100644 --- a/paddle/phi/core/distributed/auto_parallel/utils.h +++ b/paddle/phi/core/distributed/auto_parallel/utils.h @@ -21,7 +21,7 @@ limitations under the License. */ #include #include -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" namespace phi { namespace distributed { @@ -52,7 +52,7 @@ inline int64_t canonical_dim(int dim, int ndim) { PADDLE_ENFORCE_EQ( dim >= -ndim && dim < ndim, true, - errors::InvalidArgument( + common::errors::InvalidArgument( "Dimension %d is outside of [-%d, %d).", dim, ndim, ndim)); if (dim < 0) { return dim + ndim; diff --git a/paddle/phi/core/distributed/check/nccl_dynamic_check.cc b/paddle/phi/core/distributed/check/nccl_dynamic_check.cc index 0cb295b1787a55..9836d04cdf2c84 100644 --- a/paddle/phi/core/distributed/check/nccl_dynamic_check.cc +++ b/paddle/phi/core/distributed/check/nccl_dynamic_check.cc @@ -16,9 +16,9 @@ #include "glog/logging.h" +#include "paddle/common/enforce.h" +#include "paddle/common/errors.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #if defined(PADDLE_WITH_RCCL) #include @@ -64,13 +64,13 @@ void NCCLDynamicCheck::CheckDataType(const phi::DenseTensor& tensor, PADDLE_ENFORCE_GPU_SUCCESS( gpuMemcpy(dtype_device, &dtype_host, kSize, gpuMemcpyHostToDevice)); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::ncclBroadcast(dtype_device, - dtype_device, - 1, - ncclInt64, - root_rank, - comm, - kDefaultStream)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::ncclBroadcast(dtype_device, + dtype_device, + 1, + ncclInt64, + root_rank, + comm, + kDefaultStream)); if (root_rank == cur_rank) { VLOG(3) << "Dynamic check broadcast metadata, dtype: " << dtype_host; @@ -106,13 +106,13 @@ void NCCLDynamicCheck::CheckShape(const phi::DenseTensor& tensor, PADDLE_ENFORCE_GPU_SUCCESS( gpuMemcpy(shape_device, &shape_host, kSize, gpuMemcpyHostToDevice)); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::ncclBroadcast(shape_device, - shape_device, - 1, - ncclInt64, - root_rank, - comm, - kDefaultStream)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::ncclBroadcast(shape_device, + shape_device, + 1, + ncclInt64, + root_rank, + comm, + kDefaultStream)); if (root_rank == cur_rank) { VLOG(3) << "Dynamic check broadcast metadata, shape: " << shape_host; @@ -143,14 +143,14 @@ void NCCLDynamicCheck::CheckShape(const phi::DenseTensor& out_tensor, PADDLE_ENFORCE_GPU_SUCCESS(gpuMalloc(&in_shape_device, kSize)); PADDLE_ENFORCE_GPU_SUCCESS(gpuMemcpy( in_shape_device, &in_shape_host, kSize, gpuMemcpyHostToDevice)); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::ncclReduce(in_shape_device, - in_shape_device, - 1, - ncclInt64, - ncclSum, - rank, - comm, - kDefaultStream)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::ncclReduce(in_shape_device, + in_shape_device, + 1, + ncclInt64, + ncclSum, + rank, + comm, + kDefaultStream)); if (rank == cur_rank) { PADDLE_ENFORCE_GPU_SUCCESS(gpuMemcpy( &in_shape_host, in_shape_device, kSize, gpuMemcpyDeviceToHost)); @@ -178,13 +178,13 @@ void NCCLDynamicCheck::CheckGatherShape( world_size * sizeof(int64_t), gpuMemcpyHostToDevice)); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::ncclAllReduce(in_shape_device, - in_shape_device, - world_size, - ncclInt64, - ncclSum, - comm, - kDefaultStream)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::ncclAllReduce(in_shape_device, + in_shape_device, + world_size, + ncclInt64, + ncclSum, + comm, + kDefaultStream)); PADDLE_ENFORCE_GPU_SUCCESS(gpuMemcpy(shapes.data(), in_shape_device, world_size * sizeof(int64_t), diff --git a/paddle/phi/core/distributed/check/static_check.cc b/paddle/phi/core/distributed/check/static_check.cc index 8ec3e19e6038ea..b6e208c677cd73 100644 --- a/paddle/phi/core/distributed/check/static_check.cc +++ b/paddle/phi/core/distributed/check/static_check.cc @@ -17,9 +17,9 @@ #include #include +#include "paddle/common/enforce.h" +#include "paddle/common/errors.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" namespace phi { namespace distributed { diff --git a/paddle/phi/core/distributed/comm_context_manager.cc b/paddle/phi/core/distributed/comm_context_manager.cc index 2a5b336f34e256..9450aa51cd2b91 100644 --- a/paddle/phi/core/distributed/comm_context_manager.cc +++ b/paddle/phi/core/distributed/comm_context_manager.cc @@ -18,9 +18,9 @@ #include #include "glog/logging.h" +#include "paddle/common/enforce.h" #include "paddle/phi/backends/gpu/gpu_info.h" #include "paddle/phi/core/distributed/store/store.h" -#include "paddle/phi/core/enforce.h" #if defined(PADDLE_WITH_GLOO) #include @@ -65,7 +65,7 @@ void CommContextManager::CreateNCCLCommContext( } ncclUniqueId nccl_id; if (rank == 0 || (p2p_opt && p2p_opt->is_p2p_op && p2p_opt->p2p_rank == 0)) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::ncclGetUniqueId(&nccl_id)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::ncclGetUniqueId(&nccl_id)); } std::string unique_key = "NCCLCommContext/" + unique_comm_key + hash_key; diff --git a/paddle/phi/core/distributed/comm_task.h b/paddle/phi/core/distributed/comm_task.h index 079c16902b7b9d..489700934d61e6 100644 --- a/paddle/phi/core/distributed/comm_task.h +++ b/paddle/phi/core/distributed/comm_task.h @@ -18,9 +18,9 @@ #include #include #include +#include "paddle/common/enforce.h" #include "paddle/common/macros.h" #include "paddle/phi/core/distributed/utils.h" -#include "paddle/phi/core/enforce.h" #if defined(PADDLE_WITH_RCCL) #include "paddle/phi/backends/dynload/rccl.h" diff --git a/paddle/phi/core/distributed/comm_task_manager.cc b/paddle/phi/core/distributed/comm_task_manager.cc index 37083119b59f59..a32d433739b2fd 100644 --- a/paddle/phi/core/distributed/comm_task_manager.cc +++ b/paddle/phi/core/distributed/comm_task_manager.cc @@ -27,9 +27,9 @@ #include "gflags/gflags.h" #include "glog/logging.h" +#include "paddle/common/enforce.h" #include "paddle/phi/backends/gpu/gpu_info.h" #include "paddle/phi/core/distributed/store/store.h" -#include "paddle/phi/core/enforce.h" #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) #include "paddle/phi/core/distributed/comm_task_manager.h" diff --git a/paddle/phi/core/distributed/gloo_comm_context.cc b/paddle/phi/core/distributed/gloo_comm_context.cc index 098bc851bf11c3..863d8d76e50359 100644 --- a/paddle/phi/core/distributed/gloo_comm_context.cc +++ b/paddle/phi/core/distributed/gloo_comm_context.cc @@ -24,10 +24,10 @@ #include #include +#include "paddle/common/enforce.h" #include "paddle/phi/common/data_type.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/distributed/check/static_check.h" -#include "paddle/phi/core/enforce.h" namespace phi { namespace distributed { diff --git a/paddle/phi/core/distributed/gloo_utils.cc b/paddle/phi/core/distributed/gloo_utils.cc index 312681384a1996..1472e15420ca2b 100644 --- a/paddle/phi/core/distributed/gloo_utils.cc +++ b/paddle/phi/core/distributed/gloo_utils.cc @@ -26,10 +26,10 @@ #include #include +#include "paddle/common/enforce.h" +#include "paddle/common/errors.h" #include "paddle/phi/core/distributed/gloo_utils.h" #include "paddle/phi/core/distributed/store/tcp_utils.h" -#include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" namespace phi { namespace distributed { diff --git a/paddle/phi/core/distributed/nccl_comm_context.cc b/paddle/phi/core/distributed/nccl_comm_context.cc index d1d92c98fb0fd6..c2bedaf4d613b0 100644 --- a/paddle/phi/core/distributed/nccl_comm_context.cc +++ b/paddle/phi/core/distributed/nccl_comm_context.cc @@ -16,13 +16,13 @@ #include "glog/logging.h" +#include "paddle/common/data_type.h" +#include "paddle/common/enforce.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/distributed/check/nccl_dynamic_check.h" #include "paddle/phi/core/distributed/check/static_check.h" #include "paddle/phi/core/distributed/nccl_tools.h" #include "paddle/phi/core/distributed/utils.h" -#include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/utils/data_type.h" namespace phi { namespace distributed { @@ -33,8 +33,8 @@ constexpr bool FLAGS_enable_nccl_dynamic_check = false; NCCLCommContext::NCCLCommContext(int rank, int size, ncclUniqueId nccl_id) : CommContext(rank, size) { NCCL_CHECK( - phi::dynload::ncclCommInitRank(&nccl_comm_, size_, nccl_id, rank_)); - NCCL_CHECK(phi::dynload::ncclGetVersion(&nccl_version_)); + common::dynload::ncclCommInitRank(&nccl_comm_, size_, nccl_id, rank_)); + NCCL_CHECK(common::dynload::ncclGetVersion(&nccl_version_)); } int NCCLCommContext::GetNcclVersion() { return nccl_version_; } @@ -77,13 +77,13 @@ void NCCLCommContext::Broadcast(phi::DenseTensor* out_tensor, if (FLAGS_enable_nccl_dynamic_check) { NCCLDynamicCheck::CheckShape(*out_tensor, root, rank_, nccl_comm_); } - NCCL_CHECK(phi::dynload::ncclBroadcast(in_tensor.data(), - out_tensor->data(), - in_tensor.numel(), - ToNCCLDataType(in_tensor.type()), - root, - nccl_comm_, - stream)); + NCCL_CHECK(common::dynload::ncclBroadcast(in_tensor.data(), + out_tensor->data(), + in_tensor.numel(), + ToNCCLDataType(in_tensor.type()), + root, + nccl_comm_, + stream)); } void NCCLCommContext::AllGather(phi::DenseTensor* out_tensor, @@ -100,12 +100,12 @@ void NCCLCommContext::AllGather(phi::DenseTensor* out_tensor, rank_, nccl_comm_); } - NCCL_CHECK(phi::dynload::ncclAllGather(in_tensor.data(), - out_tensor->data(), - in_tensor.numel(), - ToNCCLDataType(in_tensor.type()), - nccl_comm_, - stream)); + NCCL_CHECK(common::dynload::ncclAllGather(in_tensor.data(), + out_tensor->data(), + in_tensor.numel(), + ToNCCLDataType(in_tensor.type()), + nccl_comm_, + stream)); } void NCCLCommContext::ReduceScatter(phi::DenseTensor* out_tensor, const phi::DenseTensor& in_tensor, @@ -122,13 +122,14 @@ void NCCLCommContext::ReduceScatter(phi::DenseTensor* out_tensor, rank_, nccl_comm_); } - NCCL_CHECK(phi::dynload::ncclReduceScatter(in_tensor.data(), - out_tensor->data(), - out_tensor->numel(), - ToNCCLDataType(in_tensor.type()), - reduce_type, - nccl_comm_, - stream)); + NCCL_CHECK( + common::dynload::ncclReduceScatter(in_tensor.data(), + out_tensor->data(), + out_tensor->numel(), + ToNCCLDataType(in_tensor.type()), + reduce_type, + nccl_comm_, + stream)); } void NCCLCommContext::Send(const phi::DenseTensor& in_tensor, @@ -141,12 +142,12 @@ void NCCLCommContext::Send(const phi::DenseTensor& in_tensor, NCCLDynamicCheck::CheckShape(in_tensor, rank_, rank_, nccl_comm_); } - NCCL_CHECK(phi::dynload::ncclSend(in_tensor.data(), - count, - ToNCCLDataType(in_tensor.dtype()), - peer, - nccl_comm_, - stream)); + NCCL_CHECK(common::dynload::ncclSend(in_tensor.data(), + count, + ToNCCLDataType(in_tensor.dtype()), + peer, + nccl_comm_, + stream)); VLOG(3) << "rank " << GetRank() << " send " << phi::product(in_tensor.dims()) << " to " << peer; } @@ -160,12 +161,12 @@ void NCCLCommContext::Recv(phi::DenseTensor* out_tensor, NCCLDynamicCheck::CheckShape(*out_tensor, peer, rank_, nccl_comm_); } - NCCL_CHECK(phi::dynload::ncclRecv(out_tensor->data(), - count, - ToNCCLDataType(out_tensor->dtype()), - peer, - nccl_comm_, - stream)); + NCCL_CHECK(common::dynload::ncclRecv(out_tensor->data(), + count, + ToNCCLDataType(out_tensor->dtype()), + peer, + nccl_comm_, + stream)); VLOG(3) << "rank " << GetRank() << " recv " << phi::product(out_tensor->dims()) << " from " << peer; } @@ -185,13 +186,13 @@ void NCCLCommContext::AllReduce(phi::DenseTensor* out_tensor, rank_, nccl_comm_); } - NCCL_CHECK(phi::dynload::ncclAllReduce(in_tensor.data(), - out_tensor->data(), - in_tensor.numel(), - ToNCCLDataType(in_tensor.type()), - reduce_type, - nccl_comm_, - stream)); + NCCL_CHECK(common::dynload::ncclAllReduce(in_tensor.data(), + out_tensor->data(), + in_tensor.numel(), + ToNCCLDataType(in_tensor.type()), + reduce_type, + nccl_comm_, + stream)); } void NCCLCommContext::Reduce(phi::DenseTensor* out_tensor, @@ -210,32 +211,34 @@ void NCCLCommContext::Reduce(phi::DenseTensor* out_tensor, rank_, nccl_comm_); } - NCCL_CHECK(phi::dynload::ncclReduce(in_tensor.data(), - out_tensor->data(), - in_tensor.numel(), - ToNCCLDataType(in_tensor.type()), - reduce_type, - root, - nccl_comm_, - stream)); + NCCL_CHECK(common::dynload::ncclReduce(in_tensor.data(), + out_tensor->data(), + in_tensor.numel(), + ToNCCLDataType(in_tensor.type()), + reduce_type, + root, + nccl_comm_, + stream)); } void NCCLCommContext::GroupStart() { - NCCL_CHECK(phi::dynload::ncclGroupStart()); + NCCL_CHECK(common::dynload::ncclGroupStart()); +} +void NCCLCommContext::GroupEnd() { + NCCL_CHECK(common::dynload::ncclGroupEnd()); } -void NCCLCommContext::GroupEnd() { NCCL_CHECK(phi::dynload::ncclGroupEnd()); } #if NCCL_VERSION_CODE >= 21100 void NCCLCommContext::RedOpCreatePreMulSum(ncclRedOp_t* op, void* scalar, ncclDataType_t dtype, ncclScalarResidence_t residence) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::ncclRedOpCreatePreMulSum( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::ncclRedOpCreatePreMulSum( op, scalar, dtype, residence, nccl_comm_)); } void NCCLCommContext::RedOpDestroy(ncclRedOp_t op) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::ncclRedOpDestroy(op, nccl_comm_)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::ncclRedOpDestroy(op, nccl_comm_)); } #endif diff --git a/paddle/phi/core/distributed/nccl_comm_task.cc b/paddle/phi/core/distributed/nccl_comm_task.cc index f82f39c1954a3d..a495d7ec87621d 100644 --- a/paddle/phi/core/distributed/nccl_comm_task.cc +++ b/paddle/phi/core/distributed/nccl_comm_task.cc @@ -17,10 +17,10 @@ #include "gflags/gflags.h" #include "glog/logging.h" +#include "paddle/common/data_type.h" #include "paddle/phi/backends/gpu/gpu_info.h" #include "paddle/phi/core/distributed/nccl_tools.h" #include "paddle/phi/core/distributed/trace_utils.h" -#include "paddle/phi/core/utils/data_type.h" namespace phi { namespace distributed { @@ -119,7 +119,7 @@ std::string GetNCCLErrorDetail(ncclResult_t result) { std::string last_error; #ifdef ENABLE_NCCL_GET_LAST_ERROR last_error = - ", Last error: " + std::string(phi::dynload::ncclGetLastError(NULL)); + ", Last error: " + std::string(common::dynload::ncclGetLastError(NULL)); #endif switch (result) { case ncclUnhandledCudaError: @@ -167,7 +167,7 @@ std::string NCCLCommTask::GetCommErrors() { ncclResult_t nccl_async_error; NCCL_CHECK( - phi::dynload::ncclCommGetAsyncError(nccl_comm_, &nccl_async_error)); + common::dynload::ncclCommGetAsyncError(nccl_comm_, &nccl_async_error)); if (nccl_async_error != ncclSuccess) { comm_error_ = "\n\t Find nccl comm error: " + GetNCCLErrorDetail(nccl_async_error); @@ -190,7 +190,7 @@ void NCCLCommTask::AbortComm() { if (aborted_) { return; } - NCCL_CHECK(phi::dynload::ncclCommAbort(nccl_comm_)); + NCCL_CHECK(common::dynload::ncclCommAbort(nccl_comm_)); aborted_ = true; nccl_comm_ = nullptr; diff --git a/paddle/phi/core/distributed/nccl_tools.cc b/paddle/phi/core/distributed/nccl_tools.cc index e419cfca905fa5..fa224684f119b9 100644 --- a/paddle/phi/core/distributed/nccl_tools.cc +++ b/paddle/phi/core/distributed/nccl_tools.cc @@ -16,8 +16,8 @@ #include -#include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" +#include "paddle/common/enforce.h" +#include "paddle/common/errors.h" #if NCCL_VERSION_CODE >= 21300 #define ENABLE_NCCL_GET_LAST_ERROR @@ -37,7 +37,7 @@ ncclRedOp_t ToNCCLRedType(ReduceOp reduction) { auto it = red_type.find(reduction); PADDLE_ENFORCE_EQ(it != red_type.end(), true, - phi::errors::InvalidArgument( + common::errors::InvalidArgument( "Invalid nccl reduction. Must be ncclMin | ncclMax | " "ncclProd | ncclSum")); return it->second; @@ -75,7 +75,7 @@ std::string NCCLDTypeToString(ncclDataType_t dtype) { PD_NCCL_DTYPE_TO_STR(ncclUint64, "uint64"); #undef PD_NCCL_DTYPE_TO_STR - PADDLE_THROW(phi::errors::InvalidArgument( + PADDLE_THROW(common::errors::InvalidArgument( "This datatype %d in nccl is not supported.", static_cast(dtype))); } diff --git a/paddle/phi/core/distributed/nccl_tools.h b/paddle/phi/core/distributed/nccl_tools.h index 4268e690e7382d..8f7cdcce9dd434 100644 --- a/paddle/phi/core/distributed/nccl_tools.h +++ b/paddle/phi/core/distributed/nccl_tools.h @@ -29,16 +29,16 @@ namespace phi { namespace distributed { -#define NCCL_CHECK(cmd) \ - do { \ - ncclResult_t r = cmd; \ - if (r != ncclSuccess) { \ - PADDLE_THROW( \ - phi::errors::External("Failed, NCCL error %s:%d '%s'\n", \ - __FILE__, \ - __LINE__, \ - phi::dynload::ncclGetErrorString(r))); \ - } \ +#define NCCL_CHECK(cmd) \ + do { \ + ncclResult_t r = cmd; \ + if (r != ncclSuccess) { \ + PADDLE_THROW( \ + phi::errors::External("Failed, NCCL error %s:%d '%s'\n", \ + __FILE__, \ + __LINE__, \ + common::dynload::ncclGetErrorString(r))); \ + } \ } while (0) #ifdef PADDLE_WITH_NCCL diff --git a/paddle/phi/core/distributed/store/store.cc b/paddle/phi/core/distributed/store/store.cc index 5987b694b4e51e..8d4e6c26bb2b21 100644 --- a/paddle/phi/core/distributed/store/store.cc +++ b/paddle/phi/core/distributed/store/store.cc @@ -13,7 +13,7 @@ // limitations under the License. #include "paddle/phi/core/distributed/store/store.h" -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" namespace phi { namespace distributed { diff --git a/paddle/phi/core/distributed/store/tcp_utils.h b/paddle/phi/core/distributed/store/tcp_utils.h index af11ad27f04254..29130949b4b7ac 100644 --- a/paddle/phi/core/distributed/store/tcp_utils.h +++ b/paddle/phi/core/distributed/store/tcp_utils.h @@ -31,7 +31,7 @@ #include #include -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" // Utility functions for TCP socket. namespace phi { diff --git a/paddle/phi/core/distributed/xccl_comm_context.cc b/paddle/phi/core/distributed/xccl_comm_context.cc index 5c82e7baf0e82f..154dddeae31db3 100644 --- a/paddle/phi/core/distributed/xccl_comm_context.cc +++ b/paddle/phi/core/distributed/xccl_comm_context.cc @@ -16,11 +16,11 @@ #include "glog/logging.h" +#include "paddle/common/data_type.h" +#include "paddle/common/enforce.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/distributed/check/static_check.h" #include "paddle/phi/core/distributed/utils.h" -#include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/utils/data_type.h" namespace phi { namespace distributed { diff --git a/paddle/phi/core/enforce.h b/paddle/phi/core/enforce.h deleted file mode 100644 index 6106e56ae8dc95..00000000000000 --- a/paddle/phi/core/enforce.h +++ /dev/null @@ -1,1036 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#ifdef __GNUC__ -#include // for __cxa_demangle -#endif // __GNUC__ - -#if !defined(_WIN32) -#include // dladdr -#include // sleep, usleep -#else // _WIN32 -#ifndef NOMINMAX -#define NOMINMAX // msvc max/min macro conflict with std::min/max -#endif -#include // GetModuleFileName, Sleep -#endif - -#ifdef PADDLE_WITH_CUDA -#include -#include -#include -#include -#include -#include -#include -#endif // PADDLE_WITH_CUDA - -#ifdef PADDLE_WITH_HIP -#include -#include -#include -#include -#include // NOLINT -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include "paddle/common/macros.h" -#if !defined(_WIN32) && !defined(PADDLE_WITH_MUSL) -#include -#endif - -#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h -#include "paddle/common/errors.h" - -#include "paddle/utils/string/printf.h" -#include "paddle/utils/string/to_string.h" -#include "paddle/utils/test_macros.h" - -#ifdef PADDLE_WITH_CUDA -#include "paddle/common/backends/dynload/cublas.h" -#include "paddle/common/backends/dynload/cudnn.h" -#include "paddle/common/backends/dynload/curand.h" -#include "paddle/common/backends/dynload/cusolver.h" -#if !defined(__APPLE__) && defined(PADDLE_WITH_NCCL) -#include - -#include "paddle/common/backends/dynload/nccl.h" -#endif // __APPLE__ -#endif // PADDLE_WITH_CUDA - -#ifdef PADDLE_WITH_HIP -#include "paddle/common/backends/dynload/hipfft.h" -#include "paddle/common/backends/dynload/hiprand.h" -#include "paddle/common/backends/dynload/miopen.h" -#include "paddle/common/backends/dynload/rocblas.h" -#if !defined(__APPLE__) && defined(PADDLE_WITH_RCCL) -#include // NOLINT - -#include "paddle/common/backends/dynload/rccl.h" -#endif // __APPLE__ -#endif // PADDLE_WITH_HIP - -// Note: these headers for simplify demangle type string -#include "paddle/common/type_defs.h" -// Note: this header for simplify HIP and CUDA type string -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -#include "paddle/common/backends/gpu/gpu_types.h" -#endif - -#include "paddle/utils/variant.h" - -namespace phi { -class ErrorSummary; -} // namespace phi - -namespace phi { -namespace proto {} // namespace proto -} // namespace phi - -namespace phi { -namespace enforce { - -/** HELPER MACROS AND FUNCTIONS **/ -#ifndef PADDLE_MAY_THROW -#define PADDLE_MAY_THROW noexcept(false) -#endif - -// Because most enforce conditions would evaluate to true, we can use -// __builtin_expect to instruct the C++ compiler to generate code that -// always forces branch prediction of true. -// This generates faster binary code. __builtin_expect is since C++11. -// For more details, please check https://stackoverflow.com/a/43870188/724872. -#if !defined(_WIN32) -#define UNLIKELY(condition) __builtin_expect(static_cast(condition), 0) -#else -// there is no equivalent intrinsics in msvc. -#define UNLIKELY(condition) (condition) -#endif - -#if !defined(_WIN32) -#define LIKELY(condition) __builtin_expect(static_cast(condition), 1) -#else -// there is no equivalent intrinsics in msvc. -#define LIKELY(condition) (condition) -#endif - -#if defined _WIN32 && defined PADDLE_ON_INFERENCE && defined PADDLE_NO_PYTHON -#define HANDLE_THE_ERROR try { -#define END_HANDLE_THE_ERROR \ - } \ - catch (const std::exception& e) { \ - std::cout << e.what() << std::endl; \ - throw; \ - } -#else -#define HANDLE_THE_ERROR -#define END_HANDLE_THE_ERROR -#endif - -#ifdef __GNUC__ -inline std::string demangle(std::string name) { - int status = -4; // some arbitrary value to eliminate the compiler warning - std::unique_ptr res{ - abi::__cxa_demangle(name.c_str(), NULL, NULL, &status), std::free}; - return (status == 0) ? res.get() : name; -} -#else -inline std::string demangle(std::string name) { return name; } -#endif - -namespace details { -template -inline constexpr bool IsArithmetic() { - return std::is_arithmetic::value; -} - -template -struct TypeConverterImpl { - using Type1 = typename std::common_type::type; - using Type2 = Type1; -}; - -template -struct TypeConverterImpl { - using Type1 = T1; - using Type2 = T2; -}; - -template -struct TypeConverter { - static constexpr bool kIsArithmetic = - IsArithmetic() && IsArithmetic(); - using Type1 = typename TypeConverterImpl::Type1; - using Type2 = typename TypeConverterImpl::Type2; -}; - -template -using CommonType1 = typename std::add_lvalue_reference< - typename std::add_const::Type1>::type>::type; - -template -using CommonType2 = typename std::add_lvalue_reference< - typename std::add_const::Type2>::type>::type; - -// Here, we use SFINAE to check whether T can be converted to std::string -template -struct CanToString { - private: - using YesType = uint8_t; - using NoType = uint16_t; - - template - static YesType Check(decltype(std::cout << std::declval())) { - return 0; - } - - template - static NoType Check(...) { - return 0; - } - - public: - static constexpr bool kValue = - std::is_same(std::cout))>::value; -}; - -template -struct BinaryCompareMessageConverter { - template - static std::string Convert(const char* expression, const T& value) { - return expression + std::string(":") + paddle::string::to_string(value); - } -}; - -template <> -struct BinaryCompareMessageConverter { - template - static const char* Convert(const char* expression, const T& value UNUSED) { - return expression; - } -}; -} // namespace details - -TEST_API int GetCallStackLevel(); -TEST_API std::string GetCurrentTraceBackString(bool for_signal = false); -TEST_API std::string SimplifyErrorTypeFormat(const std::string& str); - -template -static std::string GetErrorSumaryString(StrType&& what, - const char* file, - int line) { - std::ostringstream sout; - if (GetCallStackLevel() > 1) { - sout << "\n----------------------\nError Message " - "Summary:\n----------------------\n"; - } - sout << paddle::string::Sprintf( - "%s (at %s:%d)", std::forward(what), file, line) - << std::endl; - return sout.str(); -} - -template -std::string GetCompleteTraceBackString(StrType&& what, - const char* file, - int line) { - std::ostringstream sout; - sout << "\n----------------------\nError Message " - "Summary:\n----------------------\n"; - sout << paddle::string::Sprintf( - "%s (at %s:%d)", std::forward(what), file, line) - << std::endl; - return GetCurrentTraceBackString() + sout.str(); -} - -template -static std::string GetTraceBackString(StrType&& what, - const char* file, - int line) { - if (GetCallStackLevel() > 1) { - // FLAGS_call_stack_level>1 means showing c++ call stack - return GetCurrentTraceBackString() + GetErrorSumaryString(what, file, line); - } else { - return GetErrorSumaryString(what, file, line); - } -} - -inline bool is_error(bool stat) { return !stat; } - -// Note: This Macro can only be used within enforce.h -#define __THROW_ERROR_INTERNAL__(__ERROR_SUMMARY) \ - do { \ - HANDLE_THE_ERROR \ - throw ::phi::enforce::EnforceNotMet(__ERROR_SUMMARY, __FILE__, __LINE__); \ - END_HANDLE_THE_ERROR \ - } while (0) - -/** - * [Why declare function ThrowWarnInternal instead of defining macro - * __THROW_WARN_INTERNAL__?] - * ThrowWarnInternal uses `LOG` macro to display warning message, which depends - * on third-party header file "logging.h". However, "logging.h" has not been - * exposed to site-package yet, so that error will occur when we include - * "enforce.h" header file. Hence, we declare function in enforce.h and define - * it in enforce.cc file. - */ -void ThrowWarnInternal(const std::string& message); - -/** ENFORCE EXCEPTION AND MACROS **/ - -struct EnforceNotMet : public std::exception { - public: - EnforceNotMet(std::exception_ptr e, const char* file, int line) { - try { - std::rethrow_exception(e); - } catch (EnforceNotMet& e) { - code_ = e.code(); - err_str_ = GetTraceBackString(e.what(), file, line); - simple_err_str_ = SimplifyErrorTypeFormat(err_str_); - } catch (std::exception& e) { - err_str_ = GetTraceBackString(e.what(), file, line); - simple_err_str_ = SimplifyErrorTypeFormat(err_str_); - } - } - - EnforceNotMet(const std::string& str, const char* file, int line) - : err_str_(GetTraceBackString(str, file, line)) { - simple_err_str_ = SimplifyErrorTypeFormat(err_str_); - } - - EnforceNotMet(const common::ErrorCode& error, const char* file, int line) - : code_(error.code()), - err_str_(GetTraceBackString(error.to_string(), file, line)) { - simple_err_str_ = SimplifyErrorTypeFormat(err_str_); - } - - const char* what() const noexcept override { - if (GetCallStackLevel() > 1) { - return err_str_.c_str(); - } else { - return simple_err_str_.c_str(); - } - } - - common::ErrorCode code() const { return code_; } - - const std::string& error_str() const { return err_str_; } - - const std::string& simple_error_str() const { return simple_err_str_; } - - void set_error_str(std::string str) { - if (GetCallStackLevel() > 1) { - err_str_ = str; - } else { - simple_err_str_ = str; - } - } - - ~EnforceNotMet() override = default; - - private: - // Used to determine the final type of exception thrown - common::ErrorCode code_ = common::ErrorCode::LEGACY; - // Complete error message - // e.g. InvalidArgumentError: *** - std::string err_str_; - // Simple error message used when no C++ stack and python compile stack - // e.g. (InvalidArgument) *** - std::string simple_err_str_; -}; - -#define PADDLE_THROW(...) \ - do { \ - HANDLE_THE_ERROR \ - throw ::phi::enforce::EnforceNotMet( \ - ::common::ErrorCode(__VA_ARGS__), __FILE__, __LINE__); \ - END_HANDLE_THE_ERROR \ - } while (0) - -#if defined(__CUDA_ARCH__) -// For cuda, the assertions can affect performance and it is therefore -// recommended to disable them in production code -// https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#assertion -#define PADDLE_ENFORCE(_IS_NOT_ERROR, __FORMAT, ...) \ - do { \ - if (!(_IS_NOT_ERROR)) { \ - printf("Error: %s:%d Assertion `%s` failed. " __FORMAT "\n", \ - __FILE__, \ - __LINE__, \ - #_IS_NOT_ERROR, \ - ##__VA_ARGS__); \ - asm("trap;"); \ - } \ - } while (0) -#elif defined(__HIPCC__) -#define PADDLE_ENFORCE(_IS_NOT_ERROR, __FORMAT, ...) \ - do { \ - if (!(_IS_NOT_ERROR)) { \ - printf("Error: %s:%d Assertion `%s` failed. " __FORMAT "\n", \ - __FILE__, \ - __LINE__, \ - #_IS_NOT_ERROR, \ - ##__VA_ARGS__); \ - abort(); \ - } \ - } while (0) -#else -#define PADDLE_ENFORCE(COND, ...) \ - do { \ - auto __cond__ = (COND); \ - if (UNLIKELY(::phi::is_error(__cond__))) { \ - __THROW_ERROR_INTERNAL__(common::ErrorCode(__VA_ARGS__)); \ - } \ - } while (0) -#endif - -/* - * Some enforce helpers here, usage: - * int a = 1; - * int b = 2; - * PADDLE_ENFORCE_EQ(a, b); - * - * will raise an expression described as follows: - * "Expected input a == b, but received a(1) != b(2)." - * with detailed stack information. - * - * extra messages is also supported, for example: - * PADDLE_ENFORCE(a, b, "some simple enforce failed between %d numbers", 2) - */ - -#define PADDLE_ENFORCE_NOT_NULL(__VAL, ...) \ - do { \ - if (UNLIKELY(nullptr == (__VAL))) { \ - auto __summary__ = common::ErrorCode(__VA_ARGS__); \ - auto __message__ = ::paddle::string::Sprintf( \ - "%s\n [Hint: " #__VAL " should not be null.]", \ - __summary__.error_message()); \ - __THROW_ERROR_INTERNAL__( \ - common::ErrorCode(__summary__.code(), std::move(__message__))); \ - } \ - } while (0) - -#define PADDLE_WARN_NOT_NULL(__VAL, ...) \ - do { \ - if (UNLIKELY(nullptr == (__VAL))) { \ - auto __summary__ = common::ErrorCode(__VA_ARGS__); \ - auto __message__ = ::paddle::string::Sprintf( \ - "%s\n [Hint: " #__VAL " should not be null.]", \ - __summary__.error_message()); \ - ::phi::enforce::ThrowWarnInternal(std::move(__message__)); \ - } \ - } while (0) - -#define __PADDLE_BINARY_COMPARE(__VAL1, __VAL2, __CMP, __INV_CMP, ...) \ - do { \ - auto __val1 = (__VAL1); \ - auto __val2 = (__VAL2); \ - using __TYPE1__ = decltype(__val1); \ - using __TYPE2__ = decltype(__val2); \ - using __COMMON_TYPE1__ = \ - ::phi::details::CommonType1<__TYPE1__, __TYPE2__>; \ - using __COMMON_TYPE2__ = \ - ::phi::details::CommonType2<__TYPE1__, __TYPE2__>; \ - bool __is_not_error = (static_cast<__COMMON_TYPE1__>(__val1))__CMP( \ - static_cast<__COMMON_TYPE2__>(__val2)); \ - if (UNLIKELY(!__is_not_error)) { \ - auto __summary__ = common::ErrorCode(__VA_ARGS__); \ - constexpr bool __kCanToString__ = \ - ::phi::details::CanToString<__TYPE1__>::kValue && \ - ::phi::details::CanToString<__TYPE2__>::kValue; \ - auto __message__ = ::paddle::string::Sprintf( \ - "%s\n [Hint: Expected %s " #__CMP \ - " %s, but received %s " #__INV_CMP " %s.]", \ - __summary__.error_message(), \ - #__VAL1, \ - #__VAL2, \ - ::phi::details::BinaryCompareMessageConverter< \ - __kCanToString__>::Convert(#__VAL1, __val1), \ - ::phi::details::BinaryCompareMessageConverter< \ - __kCanToString__>::Convert(#__VAL2, __val2)); \ - __THROW_ERROR_INTERNAL__( \ - common::ErrorCode(__summary__.code(), std::move(__message__))); \ - } \ - } while (0) - -#define PADDLE_ENFORCE_EQ(__VAL0, __VAL1, ...) \ - __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, ==, !=, __VA_ARGS__) -#define PADDLE_ENFORCE_NE(__VAL0, __VAL1, ...) \ - __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, !=, ==, __VA_ARGS__) -#define PADDLE_ENFORCE_GT(__VAL0, __VAL1, ...) \ - __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, >, <=, __VA_ARGS__) -#define PADDLE_ENFORCE_GE(__VAL0, __VAL1, ...) \ - __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, >=, <, __VA_ARGS__) -#define PADDLE_ENFORCE_LT(__VAL0, __VAL1, ...) \ - __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <, >=, __VA_ARGS__) -#define PADDLE_ENFORCE_LE(__VAL0, __VAL1, ...) \ - __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <=, >, __VA_ARGS__) - -/** EXTENDED TOOL FUNCTIONS WITH CHECKING **/ - -/* - * Summary: This macro is used to get Variable or internal type - * data (such as LoDTensor or SelectedRows) of the Input and - * Output in op, generally used when call scope.FindVar(Input/ - * Output("Name")) or ctx.Input(). - * Firstly this macro check whether the obtained pointer is null, - * and then return data if it is not null. - * - * Note: This macro is only suitable for specific scenarios and - * does not intended to be widely used. If it cannot meet the - * requirements, please use other PADDLE_ENFORCE** check macro. - * - * Parameters: - *     __PTR: pointer - * __ROLE: (string), Input or Output - * __NAME: (string), Input or Output name - * __OP_TYPE: (string), the op type - * - * Return: The data pointed to by the pointer. - * - * Examples: - * GET_DATA_SAFELY(ctx.Input("X"), "Input", "X", "Mul"); - */ -#define GET_DATA_SAFELY(__PTR, __ROLE, __NAME, __OP_TYPE) \ - (([&]() -> std::add_lvalue_reference::type { \ - auto* __ptr = (__PTR); \ - if (UNLIKELY(nullptr == __ptr)) { \ - auto __summary__ = common::errors::NotFound( \ - "Unable to get %s data of %s %s in operator %s. " \ - "Possible reasons are:\n" \ - " 1. The %s is not the %s of operator %s;\n" \ - " 2. The %s has no corresponding variable passed in;\n" \ - " 3. The %s corresponding variable is not initialized.", \ - phi::demangle( \ - typeid(std::add_lvalue_reference::type) \ - .name()), \ - __ROLE, \ - __NAME, \ - __OP_TYPE, \ - __NAME, \ - __ROLE, \ - __OP_TYPE, \ - __NAME, \ - __NAME); \ - auto __message__ = ::paddle::string::Sprintf( \ - "%s\n [Hint: pointer " #__PTR " should not be null.]", \ - __summary__.error_message()); \ - __THROW_ERROR_INTERNAL__( \ - common::ErrorCode(__summary__.code(), __message__)); \ - } \ - return *__ptr; \ - })()) - -/* - * Summary: This PADDLE_GET(_**) series macros are used to call paddle::get - * safely. paddle::get is not a completely safe api, although it will not - * go wrong in most cases, but in extreme cases, it may fail and directly - * throw a paddle::bad_variant_access const exception, without any stack - *information. - * This kind of problems is difficult to debug, so add these macros to - * enrich paddle::get error information. At the same time, we restrict - * the direct use of paddle::get by CI rule. - * - * Parameters: - * __TYPE: the target variable type - * __VALUE: the target variable to get - * - * Examples: - * - unsafe writing: int x = paddle::get(y); - * - safe writing: int x = PADDLE_GET(int, y); - * - * Note: GCC 4.8 cannot select right overloaded function here, so need - * to define different functions and macros here, after we upgrade - * CI gcc version, we can only define one PADDLE_GET macro. - */ -namespace details { - -#define DEFINE_SAFE_PADDLE_GET( \ - __InputType, __OutputType, __OutputTypePtr, __FuncName) \ - template \ - auto __FuncName( \ - __InputType input, const char* expression, const char* file, int line) \ - ->typename std::conditional::value, \ - __OutputTypePtr, \ - __OutputType>::type { \ - try { \ - return paddle::get(input); \ - } catch (paddle::bad_variant_access const&) { \ - HANDLE_THE_ERROR \ - throw ::phi::enforce::EnforceNotMet( \ - common::errors::InvalidArgument( \ - "paddle::get failed, cannot get value " \ - "(%s) by type %s, its type is %s.", \ - expression, \ - phi::enforce::demangle(typeid(OutputType).name()), \ - phi::enforce::demangle(input.type().name())), \ - file, \ - line); \ - END_HANDLE_THE_ERROR \ - } \ - } - -DEFINE_SAFE_PADDLE_GET(InputType&, OutputType&, OutputType*, SafeBoostGet); -DEFINE_SAFE_PADDLE_GET(const InputType&, - const OutputType&, - const OutputType*, - SafeBoostGetConst); -DEFINE_SAFE_PADDLE_GET(InputType&&, - OutputType, - OutputType*, - SafeBoostGetMutable); - -} // namespace details - -#define PADDLE_GET(__TYPE, __VALUE) \ - phi::enforce::details::SafeBoostGet<__TYPE>( \ - __VALUE, #__VALUE, __FILE__, __LINE__) -#define PADDLE_GET_CONST(__TYPE, __VALUE) \ - phi::enforce::details::SafeBoostGetConst<__TYPE>( \ - __VALUE, #__VALUE, __FILE__, __LINE__) -#define PADDLE_GET_MUTABLE(__TYPE, __VALUE) \ - phi::enforce::details::SafeBoostGetMutable<__TYPE>( \ - __VALUE, #__VALUE, __FILE__, __LINE__) - -/**************************************************************************/ -/**************************** NVIDIA ERROR ********************************/ -#ifdef PADDLE_WITH_CUDA - -namespace details { - -template -struct ExternalApiType {}; - -#define DEFINE_EXTERNAL_API_TYPE(type, success_value) \ - template <> \ - struct ExternalApiType { \ - using Type = type; \ - static constexpr Type kSuccess = success_value; \ - } - -DEFINE_EXTERNAL_API_TYPE(cudaError_t, cudaSuccess); -DEFINE_EXTERNAL_API_TYPE(curandStatus_t, CURAND_STATUS_SUCCESS); -DEFINE_EXTERNAL_API_TYPE(cudnnStatus_t, CUDNN_STATUS_SUCCESS); -DEFINE_EXTERNAL_API_TYPE(cublasStatus_t, CUBLAS_STATUS_SUCCESS); -DEFINE_EXTERNAL_API_TYPE(cusparseStatus_t, CUSPARSE_STATUS_SUCCESS); -DEFINE_EXTERNAL_API_TYPE(cusolverStatus_t, CUSOLVER_STATUS_SUCCESS); -DEFINE_EXTERNAL_API_TYPE(cufftResult_t, CUFFT_SUCCESS); -DEFINE_EXTERNAL_API_TYPE(CUresult, CUDA_SUCCESS); - -#if !defined(__APPLE__) && defined(PADDLE_WITH_NCCL) -DEFINE_EXTERNAL_API_TYPE(ncclResult_t, ncclSuccess); -#endif - -} // namespace details - -template -std::string GetExternalErrorMsg(T status); - -/*************** CUDA ERROR ***************/ -inline bool is_error(cudaError_t e) { return e != cudaSuccess; } - -inline std::string build_nvidia_error_msg(cudaError_t e) { - std::ostringstream sout; - sout << "CUDA error(" << e << "), " << cudaGetErrorString(e) << ". " - << GetExternalErrorMsg(e); - return sout.str(); -} - -/*************** CURAND ERROR ***************/ -inline bool is_error(curandStatus_t stat) { - return stat != CURAND_STATUS_SUCCESS; -} - -inline std::string build_nvidia_error_msg(curandStatus_t stat) { - std::ostringstream sout; - sout << "CURAND error(" << stat << "). " << GetExternalErrorMsg(stat); - return sout.str(); -} - -/*************** CUDNN ERROR ***************/ -inline bool is_error(cudnnStatus_t stat) { - return stat != CUDNN_STATUS_SUCCESS; -} - -inline std::string build_nvidia_error_msg(cudnnStatus_t stat) { - std::ostringstream sout; - sout << "CUDNN error(" << stat << "), " - << phi::dynload::cudnnGetErrorString(stat) << ". " - << GetExternalErrorMsg(stat); - return sout.str(); -} - -/*************** CUBLAS ERROR ***************/ -inline bool is_error(cublasStatus_t stat) { - return stat != CUBLAS_STATUS_SUCCESS; -} - -inline std::string build_nvidia_error_msg(cublasStatus_t stat) { - std::ostringstream sout; - sout << "CUBLAS error(" << stat << "). " << GetExternalErrorMsg(stat); - return sout.str(); -} - -/*************** CUSPARSE ERROR ***************/ -inline bool is_error(cusparseStatus_t stat) { - return stat != CUSPARSE_STATUS_SUCCESS; -} - -inline std::string build_nvidia_error_msg(cusparseStatus_t stat) { - std::ostringstream sout; - sout << "CUSparse error(" << stat << "). " << GetExternalErrorMsg(stat); - return sout.str(); -} - -/*************** CUSOLVER ERROR ***************/ -inline bool is_error(cusolverStatus_t stat) { - return stat != CUSOLVER_STATUS_SUCCESS; -} - -inline std::string build_nvidia_error_msg(cusolverStatus_t stat) { - std::ostringstream sout; - sout << "CUSOLVER error(" << stat << "). " << GetExternalErrorMsg(stat); - return sout.str(); -} - -/*************** CUFFT ERROR ***************/ -inline bool is_error(cufftResult_t stat) { return stat != CUFFT_SUCCESS; } - -inline std::string build_nvidia_error_msg(cufftResult_t stat) { - std::ostringstream sout; - sout << "CUFFT error(" << stat << "). " << GetExternalErrorMsg(stat); - return sout.str(); -} - -/*************** CUresult ERROR ***************/ -inline bool is_error(CUresult stat) { return stat != CUDA_SUCCESS; } - -inline std::string build_nvidia_error_msg(CUresult stat) { - std::ostringstream sout; - sout << "CU error(" << stat << "). " << GetExternalErrorMsg(stat); - return sout.str(); -} - -/**************** NCCL ERROR ****************/ -#if !defined(__APPLE__) && defined(PADDLE_WITH_NCCL) -inline bool is_error(ncclResult_t nccl_result) { - return nccl_result != ncclSuccess; -} - -inline std::string build_nvidia_error_msg(ncclResult_t nccl_result) { - std::ostringstream sout; - sout << "NCCL error(" << nccl_result << "), " - << phi::dynload::ncclGetErrorString(nccl_result) << ". "; - if (errno == ENOSPC || errno == EAGAIN) { - std::string detail(strerror(errno)); - detail += "\nPlease try one of the following solutions:"; - detail += "\n1. export NCCL_SHM_DISABLE=1;"; - detail += "\n2. export NCCL_P2P_LEVEL=SYS;"; - detail += - "\n3. Increase shared memory by setting the -shm-size " - "option when starting docker container, e.g., setting " - " -shm-size=2g.\n"; - sout << " Detail: " + detail; - } - sout << GetExternalErrorMsg(nccl_result); - return sout.str(); -} -#endif // not(__APPLE__) and PADDLE_WITH_NCCL - -#define PADDLE_ENFORCE_GPU_SUCCESS(COND) \ - do { \ - auto __cond__ = (COND); \ - using __CUDA_STATUS_TYPE__ = decltype(__cond__); \ - constexpr auto __success_type__ = \ - ::phi::enforce::details::ExternalApiType< \ - __CUDA_STATUS_TYPE__>::kSuccess; \ - if (UNLIKELY(__cond__ != __success_type__)) { \ - auto __summary__ = common::errors::External( \ - ::phi::enforce::build_nvidia_error_msg(__cond__)); \ - __THROW_ERROR_INTERNAL__(__summary__); \ - } \ - } while (0) - -#define PADDLE_WARN_GPU_SUCCESS(COND) \ - do { \ - auto __cond__ = (COND); \ - using __CUDA_STATUS_TYPE__ = decltype(__cond__); \ - constexpr auto __success_type__ = \ - ::phi::enforce::details::ExternalApiType< \ - __CUDA_STATUS_TYPE__>::kSuccess; \ - if (UNLIKELY(__cond__ != __success_type__)) { \ - ::phi::enforce::ThrowWarnInternal( \ - ::phi::enforce::build_nvidia_error_msg(__cond__)); \ - } \ - } while (0) - -#define PADDLE_ENFORCE_CUDA_LAUNCH_SUCCESS(OP) \ - do { \ - auto res = cudaGetLastError(); \ - if (UNLIKELY(res != cudaSuccess)) { \ - auto msg = ::phi::enforce::build_nvidia_error_msg(res); \ - PADDLE_THROW( \ - common::errors::Fatal("CUDA error after kernel (%s): %s", OP, msg)); \ - } \ - } while (0) - -inline void retry_sleep(unsigned milliseconds) { -#ifdef _WIN32 - Sleep(milliseconds); -#else - if (milliseconds < 1000) { - // usleep argument must be less than 1,000,000. Reference: - // https://pubs.opengroup.org/onlinepubs/7908799/xsh/usleep.html - usleep(milliseconds * 1000); - } else { - // clip to sleep in seconds because we can not and don't have to - // sleep for exact milliseconds - sleep(milliseconds / 1000); - } -#endif -} - -#define PADDLE_RETRY_CUDA_SUCCESS(COND) \ - do { \ - auto __cond__ = (COND); \ - int retry_count = 1; \ - using __CUDA_STATUS_TYPE__ = decltype(__cond__); \ - constexpr auto __success_type__ = \ - ::phi::enforce::details::ExternalApiType< \ - __CUDA_STATUS_TYPE__>::kSuccess; \ - while (UNLIKELY(__cond__ != __success_type__) && retry_count < 5) { \ - phi::enforce::retry_sleep(10000); \ - __cond__ = (COND); \ - ++retry_count; \ - } \ - if (UNLIKELY(__cond__ != __success_type__)) { \ - auto __summary__ = common::errors::External( \ - ::phi::enforce::build_nvidia_error_msg(__cond__)); \ - __THROW_ERROR_INTERNAL__(__summary__); \ - } \ - } while (0) - -#undef DEFINE_EXTERNAL_API_TYPE -#endif // PADDLE_WITH_CUDA - -/**************************************************************************/ -/***************************** HIP ERROR **********************************/ -#ifdef PADDLE_WITH_HIP - -/***** HIP ERROR *****/ -inline bool is_error(hipError_t e) { return e != hipSuccess; } - -inline std::string build_rocm_error_msg(hipError_t e) { - std::ostringstream sout; - sout << " Hip error(" << e << "), " << hipGetErrorString(e) << "."; - return sout.str(); -} - -/***** HIPRAND ERROR *****/ -inline bool is_error(hiprandStatus_t stat) { - return stat != HIPRAND_STATUS_SUCCESS; -} - -inline const char* hiprandGetErrorString(hiprandStatus_t stat) { - switch (stat) { - case HIPRAND_STATUS_SUCCESS: - return "HIPRAND_STATUS_SUCCESS"; - case HIPRAND_STATUS_VERSION_MISMATCH: - return "HIPRAND_STATUS_VERSION_MISMATCH"; - case HIPRAND_STATUS_NOT_INITIALIZED: - return "HIPRAND_STATUS_NOT_INITIALIZED"; - case HIPRAND_STATUS_ALLOCATION_FAILED: - return "HIPRAND_STATUS_ALLOCATION_FAILED"; - case HIPRAND_STATUS_TYPE_ERROR: - return "HIPRAND_STATUS_TYPE_ERROR"; - case HIPRAND_STATUS_OUT_OF_RANGE: - return "HIPRAND_STATUS_OUT_OF_RANGE"; - case HIPRAND_STATUS_LENGTH_NOT_MULTIPLE: - return "HIPRAND_STATUS_LENGTH_NOT_MULTIPLE"; - case HIPRAND_STATUS_DOUBLE_PRECISION_REQUIRED: - return "HIPRAND_STATUS_DOUBLE_PRECISION_REQUIRED"; - case HIPRAND_STATUS_LAUNCH_FAILURE: - return "HIPRAND_STATUS_LAUNCH_FAILURE"; - case HIPRAND_STATUS_PREEXISTING_FAILURE: - return "HIPRAND_STATUS_PREEXISTING_FAILURE"; - case HIPRAND_STATUS_INITIALIZATION_FAILED: - return "HIPRAND_STATUS_INITIALIZATION_FAILED"; - case HIPRAND_STATUS_ARCH_MISMATCH: - return "HIPRAND_STATUS_ARCH_MISMATCH"; - case HIPRAND_STATUS_INTERNAL_ERROR: - return "HIPRAND_STATUS_INTERNAL_ERROR"; - case HIPRAND_STATUS_NOT_IMPLEMENTED: - return "HIPRAND_STATUS_NOT_IMPLEMENTED"; - default: - return "Unknown hiprand status"; - } -} - -inline std::string build_rocm_error_msg(hiprandStatus_t stat) { - std::string msg(" Hiprand error, "); - return msg + hiprandGetErrorString(stat) + " "; -} - -/***** MIOPEN ERROR *****/ -inline bool is_error(miopenStatus_t stat) { - return stat != miopenStatusSuccess; -} - -inline std::string build_rocm_error_msg(miopenStatus_t stat) { - std::string msg(" Miopen error, "); - return msg + phi::dynload::miopenGetErrorString(stat) + " "; -} - -/***** ROCBLAS ERROR *****/ -inline bool is_error(rocblas_status stat) { - return stat != rocblas_status_success; -} - -inline const char* rocblasGetErrorString(rocblas_status stat) { - switch (stat) { - case rocblas_status_invalid_handle: - return "rocblas_status_invalid_handle"; - case rocblas_status_memory_error: - return "rocblas_status_memory_error"; - case rocblas_status_invalid_value: - return "rocblas_status_invalid_value"; - case rocblas_status_not_implemented: - return "rocblas_status_not_implemented"; - case rocblas_status_invalid_pointer: - return "rocblas_status_invalid_pointer"; - case rocblas_status_invalid_size: - return "rocblas_status_invalid_size"; - case rocblas_status_internal_error: - return "rocblas_status_internal_error"; - default: - return "Unknown cublas status"; - } -} - -inline std::string build_rocm_error_msg(rocblas_status stat) { - std::string msg(" Rocblas error, "); - return msg + rocblasGetErrorString(stat) + " "; -} - -/****** RCCL ERROR ******/ -#if !defined(__APPLE__) && defined(PADDLE_WITH_RCCL) -inline bool is_error(ncclResult_t nccl_result) { - return nccl_result != ncclSuccess; -} - -inline std::string build_rocm_error_msg(ncclResult_t nccl_result) { - std::string msg(" Rccl error, "); - return msg + phi::dynload::ncclGetErrorString(nccl_result) + " "; -} -#endif // not(__APPLE__) and PADDLE_WITH_NCCL - -/***** HIPFFT ERROR *****/ -inline bool is_error(hipfftResult_t stat) { return stat != HIPFFT_SUCCESS; } - -inline std::string build_rocm_error_msg(hipfftResult_t stat) { - std::string msg(" HIPFFT error, "); - return msg + phi::dynload::hipfftGetErrorString(stat) + " "; -} - -namespace details { - -template -struct ExternalApiType {}; - -#define DEFINE_EXTERNAL_API_TYPE(type, success_value) \ - template <> \ - struct ExternalApiType { \ - using Type = type; \ - static constexpr Type kSuccess = success_value; \ - } - -DEFINE_EXTERNAL_API_TYPE(hipError_t, hipSuccess); -DEFINE_EXTERNAL_API_TYPE(hiprandStatus_t, HIPRAND_STATUS_SUCCESS); -DEFINE_EXTERNAL_API_TYPE(miopenStatus_t, miopenStatusSuccess); -DEFINE_EXTERNAL_API_TYPE(rocblas_status, rocblas_status_success); -DEFINE_EXTERNAL_API_TYPE(hipfftResult_t, HIPFFT_SUCCESS); - -#if !defined(__APPLE__) && defined(PADDLE_WITH_RCCL) -DEFINE_EXTERNAL_API_TYPE(ncclResult_t, ncclSuccess); -#endif - -} // namespace details - -#define PADDLE_ENFORCE_GPU_SUCCESS(COND) \ - do { \ - auto __cond__ = (COND); \ - using __CUDA_STATUS_TYPE__ = decltype(__cond__); \ - constexpr auto __success_type__ = \ - ::phi::enforce::details::ExternalApiType< \ - __CUDA_STATUS_TYPE__>::kSuccess; \ - if (UNLIKELY(__cond__ != __success_type__)) { \ - auto __summary__ = common::errors::External( \ - ::phi::enforce::build_rocm_error_msg(__cond__)); \ - __THROW_ERROR_INTERNAL__(__summary__); \ - } \ - } while (0) - -#define PADDLE_WARN_GPU_SUCCESS(COND) \ - do { \ - auto __cond__ = (COND); \ - using __CUDA_STATUS_TYPE__ = decltype(__cond__); \ - constexpr auto __success_type__ = \ - ::phi::enforce::details::ExternalApiType< \ - __CUDA_STATUS_TYPE__>::kSuccess; \ - if (UNLIKELY(__cond__ != __success_type__)) { \ - ::phi::enforce::ThrowWarnInternal( \ - ::phi::enforce::build_rocm_error_msg(__cond__)); \ - } \ - } while (0) - -inline void retry_sleep(unsigned millisecond) { -#ifdef _WIN32 - Sleep(millisecond); -#else - sleep(millisecond); -#endif -} - -#define PADDLE_RETRY_CUDA_SUCCESS(COND) \ - do { \ - auto __cond__ = (COND); \ - int retry_count = 1; \ - using __CUDA_STATUS_TYPE__ = decltype(__cond__); \ - constexpr auto __success_type__ = \ - ::phi::enforce::details::ExternalApiType< \ - __CUDA_STATUS_TYPE__>::kSuccess; \ - while (UNLIKELY(__cond__ != __success_type__) && retry_count < 5) { \ - ::phi::enforce::retry_sleep(10000); \ - __cond__ = (COND); \ - ++retry_count; \ - } \ - if (UNLIKELY(__cond__ != __success_type__)) { \ - auto __summary__ = common::errors::External( \ - ::phi::enforce::build_rocm_error_msg(__cond__)); \ - __THROW_ERROR_INTERNAL__(__summary__); \ - } \ - } while (0) - -#undef DEFINE_EXTERNAL_API_TYPE -#endif // PADDLE_WITH_HIP - -} // namespace enforce -using namespace enforce; // NOLINT -} // namespace phi diff --git a/paddle/phi/core/errors.cc b/paddle/phi/core/errors.cc index 0fcf8f292c1e17..d76802b1d023fa 100644 --- a/paddle/phi/core/errors.cc +++ b/paddle/phi/core/errors.cc @@ -12,11 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/phi/core/errors.h" +#include "paddle/common/errors.h" #include -namespace phi { +namespace common { std::string error_name(ErrorCode code) { switch (code) { case ErrorCode::LEGACY: @@ -70,4 +70,4 @@ std::string ErrorSummary::to_string() const { result += error_message(); return result; } -} // namespace phi +} // namespace common diff --git a/paddle/phi/core/errors.h b/paddle/phi/core/errors.h deleted file mode 100644 index 5d1143b2e76a70..00000000000000 --- a/paddle/phi/core/errors.h +++ /dev/null @@ -1,147 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include -#include -#include -#include - -#include "paddle/utils/string/printf.h" -#include "paddle/utils/test_macros.h" - -namespace phi { -enum ErrorCode { - // Legacy error. - // Error type string: "Error" - LEGACY = 0, - - // Client specified an invalid argument. - // Error type string: "InvalidArgumentError" - INVALID_ARGUMENT = 1, - - // Some requested entity (e.g., file or directory) was not found. - // Error type string: "NotFoundError" - NOT_FOUND = 2, - - // Operation tried to iterate past the valid input range. E.g., seeking or - // reading past end of file. - // Error type string: "OutOfRangeError" - OUT_OF_RANGE = 3, - - // Some entity that we attempted to create (e.g., file or directory) - // already exists. - // Error type string: "AlreadyExistsError" - ALREADY_EXISTS = 4, - - // Some resource has been exhausted, perhaps a per-user quota, or - // perhaps the entire file system is out of space. - // Error type string: "ResourceExhaustedError" - RESOURCE_EXHAUSTED = 5, - - // Operation was rejected because the system is not in a state - // required for the operation's execution. - // Error type string: "PreconditionNotMetError" - PRECONDITION_NOT_MET = 6, - - // The caller does not have permission to execute the specified - // operation. - // Error type string: "PermissionDeniedError" - PERMISSION_DENIED = 7, - - // Deadline expired before operation could complete. - // Error type string: "ExecutionTimeout" - EXECUTION_TIMEOUT = 8, - - // Operation is not implemented or not supported/enabled in this service. - // Error type string: "UnimplementedError" - UNIMPLEMENTED = 9, - - // The service is currently unavailable. This is a most likely a - // transient condition and may be corrected by retrying with - // a backoff. - // Error type string: "UnavailableError" - UNAVAILABLE = 10, - - // Fatal errors. Means some invariant expected by the underlying - // system has been broken. If you see one of these errors, - // something is very broken. - // Error type string: "FatalError" - FATAL = 11, - - // Third-party library error. - // Error type string: "ExternalError" - EXTERNAL = 12, -}; - -class ErrorSummary { - public: - // Note(chenweihang): Final deprecated constructor - // This constructor is used to be compatible with - // current existing untyped PADDLE_ENFORCE_* - // PADDLE_ENFORCE - // Note(chenweihang): Windows openblas need this - // constructor for compiling PADDLE_ENFORCE in *.cu, - // this is a bug cause we can't remove this - // constructor now. - template - explicit ErrorSummary(Args... args) { - code_ = common::ErrorCode::LEGACY; - msg_ = paddle::string::Sprintf(args...); - } - - // Note(chenweihang): Only recommended constructor - // No longer supports PADDLE_ENFORCE without type or without error message - explicit ErrorSummary(ErrorCode code, std::string msg) - : code_(code), msg_(msg) {} - - ErrorCode code() const { return code_; } - - const std::string& error_message() const { return msg_; } - - TEST_API std::string to_string() const; - - private: - ErrorCode code_; - std::string msg_; -}; - -namespace errors { - -#define REGISTER_ERROR(FUNC, CONST, ...) \ - template \ - common::ErrorCode FUNC(Args... args) { \ - return common::ErrorCode(common::CONST, \ - ::paddle::string::Sprintf(args...)); \ - } - -REGISTER_ERROR(InvalidArgument, ErrorCode::INVALID_ARGUMENT) -REGISTER_ERROR(NotFound, ErrorCode::NOT_FOUND) -REGISTER_ERROR(OutOfRange, ErrorCode::OUT_OF_RANGE) -REGISTER_ERROR(AlreadyExists, ErrorCode::ALREADY_EXISTS) -REGISTER_ERROR(ResourceExhausted, ErrorCode::RESOURCE_EXHAUSTED) -REGISTER_ERROR(PreconditionNotMet, ErrorCode::PRECONDITION_NOT_MET) -REGISTER_ERROR(PermissionDenied, ErrorCode::PERMISSION_DENIED) -REGISTER_ERROR(ExecutionTimeout, ErrorCode::EXECUTION_TIMEOUT) -REGISTER_ERROR(Unimplemented, ErrorCode::UNIMPLEMENTED) -REGISTER_ERROR(Unavailable, ErrorCode::UNAVAILABLE) -REGISTER_ERROR(Fatal, ErrorCode::FATAL) -REGISTER_ERROR(External, ErrorCode::EXTERNAL) - -#undef REGISTER_ERROR - -} // namespace errors -} // namespace phi diff --git a/paddle/phi/core/generator.cc b/paddle/phi/core/generator.cc index b3f8a2d19caba0..278024ee353dec 100644 --- a/paddle/phi/core/generator.cc +++ b/paddle/phi/core/generator.cc @@ -19,9 +19,9 @@ limitations under the License. */ #include #include +#include "paddle/common/enforce.h" #include "paddle/phi/backends/gpu/gpu_info.h" #include "paddle/phi/backends/xpu/xpu_info.h" -#include "paddle/phi/core/enforce.h" static uint64_t GetRandomSeed() { std::random_device rd; diff --git a/paddle/phi/core/infermeta_utils.h b/paddle/phi/core/infermeta_utils.h index 84b944bbe19c5c..ff4c2b5ee8a3e1 100644 --- a/paddle/phi/core/infermeta_utils.h +++ b/paddle/phi/core/infermeta_utils.h @@ -19,12 +19,12 @@ limitations under the License. */ #include #include +#include "paddle/common/enforce.h" #include "paddle/common/macros.h" #include "paddle/common/scalar.h" #include "paddle/common/type_defs.h" #include "paddle/phi/common/int_array.h" #include "paddle/phi/core/attribute.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/meta_tensor.h" #include "paddle/utils/any.h" #include "paddle/utils/flat_hash_map.h" diff --git a/paddle/phi/core/kernel_context.h b/paddle/phi/core/kernel_context.h index df7bfd31251c8e..097f003d7345b4 100644 --- a/paddle/phi/core/kernel_context.h +++ b/paddle/phi/core/kernel_context.h @@ -17,10 +17,10 @@ #include #include +#include "paddle/common/enforce.h" #include "paddle/common/type_defs.h" #include "paddle/phi/core/attribute.h" #include "paddle/phi/core/device_context.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/tensor_base.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/utils/optional.h" diff --git a/paddle/phi/core/kernel_factory.cc b/paddle/phi/core/kernel_factory.cc index 69c7900def16ba..326aad1a0001bc 100644 --- a/paddle/phi/core/kernel_factory.cc +++ b/paddle/phi/core/kernel_factory.cc @@ -15,7 +15,7 @@ #include "paddle/phi/core/kernel_factory.h" #include "glog/logging.h" -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" #include "paddle/utils/flags.h" #if defined(PADDLE_WITH_XPU) #include "paddle/phi/backends/xpu/xpu_op_list.h" diff --git a/paddle/phi/core/kernel_factory.h b/paddle/phi/core/kernel_factory.h index 7d62485d703fbd..486206023e2912 100644 --- a/paddle/phi/core/kernel_factory.h +++ b/paddle/phi/core/kernel_factory.h @@ -18,13 +18,13 @@ #include #include #include +#include "paddle/common/data_type.h" #include "paddle/common/type_defs.h" #include "paddle/phi/common/backend.h" #include "paddle/phi/common/data_type.h" #include "paddle/phi/common/layout.h" #include "paddle/phi/core/compat/convert_utils.h" #include "paddle/phi/core/compat/get_kerneltype_forvar_utils.h" -#include "paddle/phi/core/utils/data_type.h" #include "paddle/utils/flat_hash_map.h" #include "paddle/utils/small_vector.h" namespace phi { diff --git a/paddle/phi/core/kernel_utils.h b/paddle/phi/core/kernel_utils.h index 88350b88e5011e..33bc151d7f8583 100644 --- a/paddle/phi/core/kernel_utils.h +++ b/paddle/phi/core/kernel_utils.h @@ -14,12 +14,12 @@ #pragma once +#include "paddle/common/enforce.h" #include "paddle/common/scalar.h" #include "paddle/phi/backends/all_context.h" #include "paddle/phi/common/int_array.h" #include "paddle/phi/common/tensor_ref.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/extended_tensor.h" #include "paddle/phi/core/kernel_context.h" #include "paddle/phi/core/selected_rows.h" diff --git a/paddle/phi/core/lod_utils.cc b/paddle/phi/core/lod_utils.cc index 2ebf0f23116417..8b8bc5dabdc887 100644 --- a/paddle/phi/core/lod_utils.cc +++ b/paddle/phi/core/lod_utils.cc @@ -14,7 +14,7 @@ #include "paddle/phi/core/lod_utils.h" -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" namespace phi { diff --git a/paddle/phi/core/meta_tensor.cc b/paddle/phi/core/meta_tensor.cc index 8f63dc5d4d56cf..40e94f06cbddee 100644 --- a/paddle/phi/core/meta_tensor.cc +++ b/paddle/phi/core/meta_tensor.cc @@ -16,10 +16,10 @@ limitations under the License. */ #include "glog/logging.h" +#include "paddle/common/enforce.h" #include "paddle/fluid/pir/dialect/operator/ir/meta_tensor.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/distributed/auto_parallel/dist_tensor.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/selected_rows.h" #include "paddle/phi/core/string_tensor.h" #include "paddle/phi/core/string_tensor_utils.h" diff --git a/paddle/phi/core/meta_tensor.h b/paddle/phi/core/meta_tensor.h index 4b0480af48cdad..38a1556fbd73c3 100644 --- a/paddle/phi/core/meta_tensor.h +++ b/paddle/phi/core/meta_tensor.h @@ -14,10 +14,10 @@ limitations under the License. */ #pragma once +#include "paddle/common/ddim.h" #include "paddle/common/macros.h" #include "paddle/phi/common/data_type.h" #include "paddle/phi/common/layout.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/tensor_base.h" #include "paddle/phi/core/tensor_meta.h" diff --git a/paddle/phi/core/mixed_vector.h b/paddle/phi/core/mixed_vector.h index d25a646608d3d2..f06ab5c6bb52d9 100644 --- a/paddle/phi/core/mixed_vector.h +++ b/paddle/phi/core/mixed_vector.h @@ -22,10 +22,10 @@ limitations under the License. */ #include #include "glog/logging.h" +#include "paddle/common/enforce.h" +#include "paddle/common/errors.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/allocator.h" -#include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #include "paddle/utils/none.h" #include "paddle/utils/optional.h" diff --git a/paddle/phi/core/selected_rows_impl.cc b/paddle/phi/core/selected_rows_impl.cc index ff96342940d923..c8bc294c6b4622 100644 --- a/paddle/phi/core/selected_rows_impl.cc +++ b/paddle/phi/core/selected_rows_impl.cc @@ -16,8 +16,8 @@ limitations under the License. */ #include "glog/logging.h" +#include "paddle/common/data_type.h" #include "paddle/phi/common/memory_utils.h" -#include "paddle/phi/core/utils/data_type.h" namespace phi { diff --git a/paddle/phi/core/selected_rows_impl.h b/paddle/phi/core/selected_rows_impl.h index a29f66b99420ab..445fb4cfaf51f7 100644 --- a/paddle/phi/core/selected_rows_impl.h +++ b/paddle/phi/core/selected_rows_impl.h @@ -21,10 +21,10 @@ limitations under the License. */ #include #include +#include "paddle/common/ddim.h" +#include "paddle/common/enforce.h" #include "paddle/phi/common/place.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/utils/rw_lock.h" namespace phi { diff --git a/paddle/phi/core/storage_properties.h b/paddle/phi/core/storage_properties.h index a6a66305c6297e..9d662544d9404b 100644 --- a/paddle/phi/core/storage_properties.h +++ b/paddle/phi/core/storage_properties.h @@ -16,7 +16,7 @@ limitations under the License. */ #include -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" #include "paddle/phi/core/utils/type_registry.h" #ifdef PADDLE_WITH_DNNL diff --git a/paddle/phi/core/tensor_base.h b/paddle/phi/core/tensor_base.h index 069382720e19de..334ef081e92787 100644 --- a/paddle/phi/core/tensor_base.h +++ b/paddle/phi/core/tensor_base.h @@ -14,12 +14,12 @@ limitations under the License. */ #pragma once +#include "paddle/common/ddim.h" #include "paddle/phi/common/backend.h" #include "paddle/phi/common/data_type.h" #include "paddle/phi/common/layout.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/allocator.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/utils/type_registry.h" namespace phi { diff --git a/paddle/phi/core/tensor_meta.h b/paddle/phi/core/tensor_meta.h index c57573e59be37a..02211c055c5074 100644 --- a/paddle/phi/core/tensor_meta.h +++ b/paddle/phi/core/tensor_meta.h @@ -16,10 +16,10 @@ limitations under the License. */ #include +#include "paddle/common/ddim.h" #include "paddle/phi/common/backend.h" #include "paddle/phi/common/data_type.h" #include "paddle/phi/common/layout.h" -#include "paddle/phi/core/ddim.h" #include "paddle/utils/any.h" #include "paddle/utils/optional.h" #include "paddle/utils/test_macros.h" diff --git a/paddle/phi/core/threadpool.cc b/paddle/phi/core/threadpool.cc index 7538087f4e8553..3d4894bcadf40c 100644 --- a/paddle/phi/core/threadpool.cc +++ b/paddle/phi/core/threadpool.cc @@ -17,7 +17,7 @@ #include #include "glog/logging.h" -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" #include "paddle/utils/flags.h" PD_DECLARE_int32(dist_threadpool_size); diff --git a/paddle/phi/core/threadpool.h b/paddle/phi/core/threadpool.h index 318ec38d3c8c58..d05adec044f65f 100644 --- a/paddle/phi/core/threadpool.h +++ b/paddle/phi/core/threadpool.h @@ -24,8 +24,8 @@ #include #include +#include "paddle/common/enforce.h" #include "paddle/common/macros.h" // for DISABLE_COPY_AND_ASSIGN -#include "paddle/phi/core/enforce.h" namespace phi { diff --git a/paddle/phi/core/utils/array.h b/paddle/phi/core/utils/array.h index 20e120b990c02f..5d7726b8320db1 100644 --- a/paddle/phi/core/utils/array.h +++ b/paddle/phi/core/utils/array.h @@ -16,8 +16,8 @@ #include +#include "paddle/common/enforce.h" #include "paddle/common/unroll_array_ops.h" -#include "paddle/phi/core/enforce.h" namespace phi { diff --git a/paddle/phi/core/utils/intrusive_ptr.h b/paddle/phi/core/utils/intrusive_ptr.h index e2e6cb7060d057..aab4fe45a6cf2b 100644 --- a/paddle/phi/core/utils/intrusive_ptr.h +++ b/paddle/phi/core/utils/intrusive_ptr.h @@ -17,7 +17,7 @@ limitations under the License. */ #include #include "glog/logging.h" -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" namespace phi { diff --git a/paddle/phi/core/utils/rw_lock.h b/paddle/phi/core/utils/rw_lock.h index fa87cfcbb5feeb..62dfadf4edc1fd 100644 --- a/paddle/phi/core/utils/rw_lock.h +++ b/paddle/phi/core/utils/rw_lock.h @@ -20,7 +20,7 @@ limitations under the License. */ #include // NOLINT #endif // !_WIN32 -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" namespace phi { diff --git a/paddle/phi/core/utils/visit_place.h b/paddle/phi/core/utils/visit_place.h index 6318b17647cd61..aed8ee48024d71 100644 --- a/paddle/phi/core/utils/visit_place.h +++ b/paddle/phi/core/utils/visit_place.h @@ -14,8 +14,8 @@ #pragma once +#include "paddle/common/enforce.h" #include "paddle/phi/common/place.h" -#include "paddle/phi/core/enforce.h" namespace phi { diff --git a/paddle/phi/infermeta/backward.cc b/paddle/phi/infermeta/backward.cc index 4c5e130aab7a07..f399ce6de11b2b 100644 --- a/paddle/phi/infermeta/backward.cc +++ b/paddle/phi/infermeta/backward.cc @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/phi/infermeta/backward.h" +#include "paddle/common/data_type.h" #include "paddle/phi/common/type_traits.h" -#include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/kernels/funcs/axis_utils.h" namespace phi { diff --git a/paddle/phi/infermeta/binary.cc b/paddle/phi/infermeta/binary.cc index add5013298d309..d917c8dea3d243 100644 --- a/paddle/phi/infermeta/binary.cc +++ b/paddle/phi/infermeta/binary.cc @@ -18,12 +18,12 @@ limitations under the License. */ #include #include "glog/logging.h" +#include "paddle/common/data_type.h" +#include "paddle/common/ddim.h" #include "paddle/phi/common/data_type.h" #include "paddle/phi/common/layout.h" #include "paddle/phi/common/type_traits.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/infermeta_utils.h" -#include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/infermeta/unary.h" #include "paddle/phi/kernels/cpu/conv_util.h" #include "paddle/phi/kernels/funcs/axis_utils.h" diff --git a/paddle/phi/infermeta/multiary.cc b/paddle/phi/infermeta/multiary.cc index 311f054e9dd274..5aeae9435f6bf5 100644 --- a/paddle/phi/infermeta/multiary.cc +++ b/paddle/phi/infermeta/multiary.cc @@ -18,13 +18,13 @@ limitations under the License. */ #include "glog/logging.h" +#include "paddle/common/data_type.h" #include "paddle/common/scalar.h" #include "paddle/phi/backends/device_memory_aligment.h" #include "paddle/phi/common/data_type.h" #include "paddle/phi/common/layout.h" #include "paddle/phi/core/infermeta_utils.h" #include "paddle/phi/core/meta_tensor.h" -#include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/infermeta/binary.h" #include "paddle/phi/infermeta/nullary.h" #include "paddle/phi/kernels/funcs/common_shape.h" diff --git a/paddle/phi/infermeta/spmd_rules/dim_trans.cc b/paddle/phi/infermeta/spmd_rules/dim_trans.cc index d781cc415ae4c4..e4608ca67aa659 100644 --- a/paddle/phi/infermeta/spmd_rules/dim_trans.cc +++ b/paddle/phi/infermeta/spmd_rules/dim_trans.cc @@ -17,8 +17,8 @@ limitations under the License. */ #include #include #include +#include "paddle/common/enforce.h" #include "paddle/phi/core/distributed/auto_parallel/dist_meta_tensor.h" -#include "paddle/phi/core/enforce.h" namespace phi { namespace distributed { diff --git a/paddle/phi/infermeta/spmd_rules/utils.cc b/paddle/phi/infermeta/spmd_rules/utils.cc index 42bbc659b2f2be..ca7b2608f1e4e2 100644 --- a/paddle/phi/infermeta/spmd_rules/utils.cc +++ b/paddle/phi/infermeta/spmd_rules/utils.cc @@ -16,9 +16,9 @@ limitations under the License. */ #include "glog/logging.h" +#include "paddle/common/enforce.h" #include "paddle/phi/core/distributed/auto_parallel/dist_attr.h" #include "paddle/phi/core/distributed/auto_parallel/utils.h" -#include "paddle/phi/core/enforce.h" namespace phi { namespace distributed { diff --git a/paddle/phi/infermeta/ternary.cc b/paddle/phi/infermeta/ternary.cc index d86b25b7ba224f..406b38dd0f3d1c 100644 --- a/paddle/phi/infermeta/ternary.cc +++ b/paddle/phi/infermeta/ternary.cc @@ -16,8 +16,8 @@ limitations under the License. */ #include "glog/logging.h" +#include "paddle/common/ddim.h" #include "paddle/phi/common/layout.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/kernels/funcs/common_shape.h" #include "paddle/phi/kernels/impl/box_coder.h" diff --git a/paddle/phi/infermeta/unary.cc b/paddle/phi/infermeta/unary.cc index 0308093ed9fc67..5c9124430b8be4 100644 --- a/paddle/phi/infermeta/unary.cc +++ b/paddle/phi/infermeta/unary.cc @@ -17,12 +17,12 @@ limitations under the License. */ #include #include +#include "paddle/common/data_type.h" +#include "paddle/common/enforce.h" #include "paddle/phi/common/data_type.h" #include "paddle/phi/common/type_traits.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/flags.h" #include "paddle/phi/core/infermeta_utils.h" -#include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/kernels/funcs/parse_qr_mode.h" #include "paddle/phi/kernels/funcs/pooling.h" #include "paddle/phi/kernels/funcs/slice_utils.h" diff --git a/paddle/phi/kernels/autotune/cache_base.h b/paddle/phi/kernels/autotune/cache_base.h index 68463e900c3578..64450fc99c01d4 100644 --- a/paddle/phi/kernels/autotune/cache_base.h +++ b/paddle/phi/kernels/autotune/cache_base.h @@ -18,8 +18,8 @@ #include #include -#include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" +#include "paddle/common/enforce.h" +#include "paddle/common/errors.h" #include "paddle/phi/core/flags.h" PHI_DECLARE_int32(search_cache_max_number); diff --git a/paddle/phi/kernels/autotune/gpu_timer.h b/paddle/phi/kernels/autotune/gpu_timer.h index 7433bb9e5ee22d..f31df8ebecb2cd 100644 --- a/paddle/phi/kernels/autotune/gpu_timer.h +++ b/paddle/phi/kernels/autotune/gpu_timer.h @@ -15,8 +15,8 @@ #pragma once #include "paddle/common/backends/gpu/gpu_decls.h" -#include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" +#include "paddle/common/enforce.h" +#include "paddle/common/errors.h" #ifdef PADDLE_WITH_CUDA #include #endif diff --git a/paddle/phi/kernels/cpu/allclose_kernel.cc b/paddle/phi/kernels/cpu/allclose_kernel.cc index fd6cf3aebc2687..e878a1d55314b2 100644 --- a/paddle/phi/kernels/cpu/allclose_kernel.cc +++ b/paddle/phi/kernels/cpu/allclose_kernel.cc @@ -17,7 +17,7 @@ #include #include "glog/logging.h" -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" #include "paddle/phi/core/kernel_registry.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/arg_min_max_kernel.cc b/paddle/phi/kernels/cpu/arg_min_max_kernel.cc index ce00926101f2cc..c89701e955fa9a 100644 --- a/paddle/phi/kernels/cpu/arg_min_max_kernel.cc +++ b/paddle/phi/kernels/cpu/arg_min_max_kernel.cc @@ -14,10 +14,10 @@ #include "paddle/phi/kernels/arg_min_max_kernel.h" +#include "paddle/common/data_type.h" +#include "paddle/common/ddim.h" #include "paddle/phi/backends/cpu/cpu_context.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/phi/kernels/cpu/assign_pos_kernel.cc b/paddle/phi/kernels/cpu/assign_pos_kernel.cc index ceab18c5ecc7b4..7bad2262dad685 100644 --- a/paddle/phi/kernels/cpu/assign_pos_kernel.cc +++ b/paddle/phi/kernels/cpu/assign_pos_kernel.cc @@ -13,7 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/assign_pos_kernel.h" -#include "paddle/phi/core/errors.h" +#include "paddle/common/errors.h" #include "paddle/phi/core/kernel_registry.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/broadcast_tensors_grad_kernel.cc b/paddle/phi/kernels/cpu/broadcast_tensors_grad_kernel.cc index 168f4e159cb811..9ca3ea52ae9461 100644 --- a/paddle/phi/kernels/cpu/broadcast_tensors_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/broadcast_tensors_grad_kernel.cc @@ -16,9 +16,9 @@ #include +#include "paddle/common/enforce.h" #include "paddle/common/float16.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/eigen/eigen_function.h" diff --git a/paddle/phi/kernels/cpu/conv_util.h b/paddle/phi/kernels/cpu/conv_util.h index 159a5cfbeb6b41..d9301ac8aae08e 100644 --- a/paddle/phi/kernels/cpu/conv_util.h +++ b/paddle/phi/kernels/cpu/conv_util.h @@ -13,7 +13,7 @@ // limitations under the License. #pragma once -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/cudnn_lstm_kernel.cc b/paddle/phi/kernels/cpu/cudnn_lstm_kernel.cc index cd709fe2bf4656..797cf05a3f28a8 100644 --- a/paddle/phi/kernels/cpu/cudnn_lstm_kernel.cc +++ b/paddle/phi/kernels/cpu/cudnn_lstm_kernel.cc @@ -14,8 +14,8 @@ #include "paddle/phi/kernels/cudnn_lstm_kernel.h" +#include "paddle/common/enforce.h" #include "paddle/phi/backends/cpu/cpu_context.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_registry.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/cumprod_grad_kernel.cc b/paddle/phi/kernels/cpu/cumprod_grad_kernel.cc index a2cc99c59fe2d8..01adfe7df0c0da 100644 --- a/paddle/phi/kernels/cpu/cumprod_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/cumprod_grad_kernel.cc @@ -14,16 +14,16 @@ #include "paddle/phi/kernels/cumprod_grad_kernel.h" +#include "paddle/common/ddim.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/allocator.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/complex_functors.h" #include "paddle/phi/kernels/funcs/cumprod.h" #include "paddle/phi/kernels/funcs/for_range.h" // NOTE(@xiongkun): use of IsComplex<> -#include "paddle/phi/core/utils/data_type.h" +#include "paddle/common/data_type.h" namespace phi { template diff --git a/paddle/phi/kernels/cpu/eigvals_kernel.cc b/paddle/phi/kernels/cpu/eigvals_kernel.cc index 5380106fd020bc..01d636bfaf06d3 100644 --- a/paddle/phi/kernels/cpu/eigvals_kernel.cc +++ b/paddle/phi/kernels/cpu/eigvals_kernel.cc @@ -17,9 +17,9 @@ #include "glog/logging.h" #include "paddle/common/complex.h" +#include "paddle/common/data_type.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/kernels/funcs/complex_functors.h" #include "paddle/phi/kernels/funcs/for_range.h" #include "paddle/phi/kernels/funcs/lapack/lapack_function.h" diff --git a/paddle/phi/kernels/cpu/embedding_kernel.cc b/paddle/phi/kernels/cpu/embedding_kernel.cc index 0d937e6364eacc..0c92c68deb1087 100644 --- a/paddle/phi/kernels/cpu/embedding_kernel.cc +++ b/paddle/phi/kernels/cpu/embedding_kernel.cc @@ -14,10 +14,10 @@ #include "paddle/phi/kernels/embedding_kernel.h" +#include "paddle/common/data_type.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/common/data_type.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/kernels/funcs/embedding_util.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/gather_tree_kernel.cc b/paddle/phi/kernels/cpu/gather_tree_kernel.cc index dac1441cb5006e..5df9058c2297e0 100644 --- a/paddle/phi/kernels/cpu/gather_tree_kernel.cc +++ b/paddle/phi/kernels/cpu/gather_tree_kernel.cc @@ -14,7 +14,7 @@ #include "paddle/phi/kernels/gather_tree_kernel.h" -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" #include "paddle/phi/core/kernel_registry.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/index_add_grad_kernel.cc b/paddle/phi/kernels/cpu/index_add_grad_kernel.cc index a60d52f2005a4b..902bb9473c4bf7 100644 --- a/paddle/phi/kernels/cpu/index_add_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/index_add_grad_kernel.cc @@ -14,8 +14,8 @@ #include "paddle/phi/kernels/index_add_grad_kernel.h" +#include "paddle/common/data_type.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/kernels/cpu/index_select_impl.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/index_add_kernel.cc b/paddle/phi/kernels/cpu/index_add_kernel.cc index c2c5aa60814c51..3bdc770371b4b4 100644 --- a/paddle/phi/kernels/cpu/index_add_kernel.cc +++ b/paddle/phi/kernels/cpu/index_add_kernel.cc @@ -13,9 +13,9 @@ // limitations under the License. #include "paddle/phi/kernels/index_add_kernel.h" +#include "paddle/common/data_type.h" #include "paddle/phi/common/memory_utils.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/kernels/cpu/index_add_impl.h" #include "paddle/phi/kernels/funcs/eigen/common.h" diff --git a/paddle/phi/kernels/cpu/index_sample_grad_kernel.cc b/paddle/phi/kernels/cpu/index_sample_grad_kernel.cc index c49a4531aea7a1..c87b2d72e67304 100644 --- a/paddle/phi/kernels/cpu/index_sample_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/index_sample_grad_kernel.cc @@ -14,11 +14,11 @@ #include "paddle/phi/kernels/index_sample_grad_kernel.h" +#include "paddle/common/data_type.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/common/data_type.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" -#include "paddle/phi/core/utils/data_type.h" namespace phi { template void IndexSampleGradInner(const Context& context, diff --git a/paddle/phi/kernels/cpu/index_sample_kernel.cc b/paddle/phi/kernels/cpu/index_sample_kernel.cc index 02f3afcb67b6ef..b8b697a3d451a4 100644 --- a/paddle/phi/kernels/cpu/index_sample_kernel.cc +++ b/paddle/phi/kernels/cpu/index_sample_kernel.cc @@ -23,11 +23,11 @@ #include "glog/logging.h" +#include "paddle/common/data_type.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/common/data_type.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" -#include "paddle/phi/core/utils/data_type.h" namespace phi { template void IndexSampleInner(const Context &context, diff --git a/paddle/phi/kernels/cpu/index_select_grad_kernel.cc b/paddle/phi/kernels/cpu/index_select_grad_kernel.cc index 4e53056bd117f7..451f4d14b82523 100644 --- a/paddle/phi/kernels/cpu/index_select_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/index_select_grad_kernel.cc @@ -14,8 +14,8 @@ #include "paddle/phi/kernels/index_select_grad_kernel.h" +#include "paddle/common/data_type.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/kernels/cpu/index_select_impl.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/index_select_kernel.cc b/paddle/phi/kernels/cpu/index_select_kernel.cc index a9e64d1d183b46..8d02554d8b108c 100644 --- a/paddle/phi/kernels/cpu/index_select_kernel.cc +++ b/paddle/phi/kernels/cpu/index_select_kernel.cc @@ -14,8 +14,8 @@ #include "paddle/phi/kernels/index_select_kernel.h" +#include "paddle/common/data_type.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/kernels/cpu/index_select_impl.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/limit_by_capacity_kernel.cc b/paddle/phi/kernels/cpu/limit_by_capacity_kernel.cc index ea2f6cbc6ee82c..1057120b2ae5e1 100644 --- a/paddle/phi/kernels/cpu/limit_by_capacity_kernel.cc +++ b/paddle/phi/kernels/cpu/limit_by_capacity_kernel.cc @@ -13,7 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/limit_by_capacity_kernel.h" -#include "paddle/phi/core/errors.h" +#include "paddle/common/errors.h" #include "paddle/phi/core/kernel_registry.h" #if defined(PADDLE_WITH_GLOO) diff --git a/paddle/phi/kernels/cpu/matrix_nms_kernel.cc b/paddle/phi/kernels/cpu/matrix_nms_kernel.cc index b2827d039bacce..49983182d644d6 100644 --- a/paddle/phi/kernels/cpu/matrix_nms_kernel.cc +++ b/paddle/phi/kernels/cpu/matrix_nms_kernel.cc @@ -14,8 +14,8 @@ #include "paddle/phi/kernels/matrix_nms_kernel.h" +#include "paddle/common/ddim.h" #include "paddle/phi/backends/cpu/cpu_context.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/kernel_registry.h" diff --git a/paddle/phi/kernels/cpu/nll_loss_kernel.cc b/paddle/phi/kernels/cpu/nll_loss_kernel.cc index c966e91a9a6e96..66c6e9449b6103 100644 --- a/paddle/phi/kernels/cpu/nll_loss_kernel.cc +++ b/paddle/phi/kernels/cpu/nll_loss_kernel.cc @@ -14,8 +14,8 @@ #include "paddle/phi/kernels/nll_loss_kernel.h" +#include "paddle/common/enforce.h" #include "paddle/phi/backends/cpu/cpu_context.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_registry.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/one_hot_kernel.cc b/paddle/phi/kernels/cpu/one_hot_kernel.cc index 0958e2c02b4c1c..a03eae4b12e303 100644 --- a/paddle/phi/kernels/cpu/one_hot_kernel.cc +++ b/paddle/phi/kernels/cpu/one_hot_kernel.cc @@ -14,8 +14,8 @@ #include "paddle/phi/kernels/one_hot_kernel.h" +#include "paddle/common/data_type.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/kernels/funcs/math_function.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/prune_gate_by_capacity_kernel.cc b/paddle/phi/kernels/cpu/prune_gate_by_capacity_kernel.cc index ed26b4f37dd5cc..7f2717b8ecacef 100644 --- a/paddle/phi/kernels/cpu/prune_gate_by_capacity_kernel.cc +++ b/paddle/phi/kernels/cpu/prune_gate_by_capacity_kernel.cc @@ -13,7 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/prune_gate_by_capacity_kernel.h" -#include "paddle/phi/core/errors.h" +#include "paddle/common/errors.h" #include "paddle/phi/core/kernel_registry.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/random_routing_kernel.cc b/paddle/phi/kernels/cpu/random_routing_kernel.cc index 0e1d450c1894ae..cdeab98f4c1ab3 100644 --- a/paddle/phi/kernels/cpu/random_routing_kernel.cc +++ b/paddle/phi/kernels/cpu/random_routing_kernel.cc @@ -13,7 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/random_routing_kernel.h" -#include "paddle/phi/core/errors.h" +#include "paddle/common/errors.h" #include "paddle/phi/core/kernel_registry.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/repeat_interleave_grad_kernel.cc b/paddle/phi/kernels/cpu/repeat_interleave_grad_kernel.cc index 05f19ac36107ec..6a03a88b020d45 100644 --- a/paddle/phi/kernels/cpu/repeat_interleave_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/repeat_interleave_grad_kernel.cc @@ -14,10 +14,10 @@ #include "paddle/phi/kernels/repeat_interleave_grad_kernel.h" +#include "paddle/common/data_type.h" #include "paddle/phi/backends/context_pool.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/kernels/cpu/index_select_impl.h" #include "paddle/phi/kernels/funcs/repeat_tensor2index_tensor.h" diff --git a/paddle/phi/kernels/cpu/sparse_weight_embedding_grad_kernel.cc b/paddle/phi/kernels/cpu/sparse_weight_embedding_grad_kernel.cc index d296aba66503b7..df67b2e66b5dbc 100644 --- a/paddle/phi/kernels/cpu/sparse_weight_embedding_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/sparse_weight_embedding_grad_kernel.cc @@ -14,10 +14,10 @@ #include "paddle/phi/kernels/sparse_weight_embedding_grad_kernel.h" +#include "paddle/common/data_type.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/common/data_type.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/kernels/funcs/embedding_util.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/sparse_weight_embedding_kernel.cc b/paddle/phi/kernels/cpu/sparse_weight_embedding_kernel.cc index 175b4a750a8203..081ac2b9d05bcf 100644 --- a/paddle/phi/kernels/cpu/sparse_weight_embedding_kernel.cc +++ b/paddle/phi/kernels/cpu/sparse_weight_embedding_kernel.cc @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/common/data_type.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/common/data_type.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/kernels/embedding_kernel.h" #include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/embedding_util.h" diff --git a/paddle/phi/kernels/cpu/triangular_solve_kernel.cc b/paddle/phi/kernels/cpu/triangular_solve_kernel.cc index 06c897b2199845..50ecc73d004c23 100644 --- a/paddle/phi/kernels/cpu/triangular_solve_kernel.cc +++ b/paddle/phi/kernels/cpu/triangular_solve_kernel.cc @@ -14,8 +14,8 @@ #include "paddle/phi/kernels/triangular_solve_kernel.h" +#include "paddle/common/ddim.h" #include "paddle/phi/backends/cpu/cpu_context.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/expand_kernel.h" diff --git a/paddle/phi/kernels/cpu/unique_consecutive_kernel.cc b/paddle/phi/kernels/cpu/unique_consecutive_kernel.cc index d0d674d06ee2bd..9cdbe84342d3c0 100644 --- a/paddle/phi/kernels/cpu/unique_consecutive_kernel.cc +++ b/paddle/phi/kernels/cpu/unique_consecutive_kernel.cc @@ -17,10 +17,10 @@ #include "paddle/phi/kernels/cpu/unique_consecutive_functor.h" #include "paddle/phi/kernels/unique_consecutive_kernel.h" +#include "paddle/common/data_type.h" +#include "paddle/common/errors.h" #include "paddle/phi/backends/cpu/cpu_context.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/core/utils/data_type.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/unique_kernel.cc b/paddle/phi/kernels/cpu/unique_kernel.cc index 1ea8452e1d1fa5..cbf495e970ea98 100644 --- a/paddle/phi/kernels/cpu/unique_kernel.cc +++ b/paddle/phi/kernels/cpu/unique_kernel.cc @@ -16,9 +16,9 @@ #include "paddle/phi/kernels/unique_kernel.h" +#include "paddle/common/data_type.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/kernels/funcs/unique_functor.h" namespace phi { diff --git a/paddle/phi/kernels/funcs/activation_functor.h b/paddle/phi/kernels/funcs/activation_functor.h index 799bfa45c416b6..c6404d50466040 100644 --- a/paddle/phi/kernels/funcs/activation_functor.h +++ b/paddle/phi/kernels/funcs/activation_functor.h @@ -30,10 +30,10 @@ #include #include "paddle/common/bfloat16.h" +#include "paddle/common/enforce.h" #include "paddle/common/float16.h" #include "paddle/phi/common/amp_type_traits.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/eigen/extensions.h" diff --git a/paddle/phi/kernels/funcs/axis_utils.h b/paddle/phi/kernels/funcs/axis_utils.h index 368c4a9e14061c..41bbd4f048c6b4 100644 --- a/paddle/phi/kernels/funcs/axis_utils.h +++ b/paddle/phi/kernels/funcs/axis_utils.h @@ -14,7 +14,7 @@ limitations under the License. */ #pragma once -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" namespace phi { namespace funcs { diff --git a/paddle/phi/kernels/funcs/blas/blas_impl.cu.h b/paddle/phi/kernels/funcs/blas/blas_impl.cu.h index c15c7c25954bcb..59d8e6d9df2ebf 100644 --- a/paddle/phi/kernels/funcs/blas/blas_impl.cu.h +++ b/paddle/phi/kernels/funcs/blas/blas_impl.cu.h @@ -38,33 +38,33 @@ template <> struct CUBlas { template static void GEMM(ARGS... args) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasSgemm(args...)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasSgemm(args...)); } template static void AXPY(ARGS... args) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasSaxpy(args...)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasSaxpy(args...)); } template static void SCAL(ARGS... args) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasSscal(args...)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasSscal(args...)); } template static void VCOPY(ARGS... args) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasScopy(args...)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasScopy(args...)); } template static void GEMV(ARGS... args) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasSgemv(args...)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasSgemv(args...)); } template static void GEMM_BATCH(ARGS... args) { #if CUDA_VERSION >= 8000 - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasSgemmBatched(args...)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasSgemmBatched(args...)); #else PADDLE_THROW(phi::errors::Unimplemented( "SgemmBatched is not supported on cuda <= 7.5")); @@ -75,7 +75,7 @@ struct CUBlas { static void GEMM_STRIDED_BATCH(ARGS... args) { #if CUDA_VERSION >= 8000 PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cublasSgemmStridedBatched(args...)); + common::dynload::cublasSgemmStridedBatched(args...)); #else PADDLE_THROW(phi::errors::Unimplemented( "SgemmStridedBatched is not supported on cuda <= 7.5")); @@ -109,23 +109,23 @@ struct CUBlas { VLOG(5) << "use_tensor_op_math: " << (dev_ctx->tensor_core_available() ? "True" : "False"); dev_ctx->TensorCoreCublasCallIfAvailable([&](cublasHandle_t handle) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasSgemmEx(handle, - transa, - transb, - m, - n, - k, - alpha, - A, - Atype, - lda, - B, - Btype, - ldb, - beta, - C, - Ctype, - ldc)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasSgemmEx(handle, + transa, + transb, + m, + n, + k, + alpha, + A, + Atype, + lda, + B, + Btype, + ldb, + beta, + C, + Ctype, + ldc)); }); #else PADDLE_THROW(phi::errors::Unimplemented( @@ -135,32 +135,32 @@ struct CUBlas { template static void TRSM(ARGS... args) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasStrsm(args...)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasStrsm(args...)); } template static void GETRF_BATCH(ARGS... args) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasSgetrfBatched(args...)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasSgetrfBatched(args...)); } template static void GETRI_BATCH(ARGS... args) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasSgetriBatched(args...)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasSgetriBatched(args...)); } template static void MATINV_BATCH(ARGS... args) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasSmatinvBatched(args...)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasSmatinvBatched(args...)); } template static void GETRS_BATCH(ARGS... args) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasSgetrsBatched(args...)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasSgetrsBatched(args...)); } template static void TRSM_BATCH(ARGS... args) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasStrsmBatched(args...)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasStrsmBatched(args...)); } }; @@ -168,33 +168,33 @@ template <> struct CUBlas { template static void GEMM(ARGS... args) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasDgemm(args...)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasDgemm(args...)); } template static void AXPY(ARGS... args) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasDaxpy(args...)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasDaxpy(args...)); } template static void SCAL(ARGS... args) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasDscal(args...)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasDscal(args...)); } template static void VCOPY(ARGS... args) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasDcopy(args...)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasDcopy(args...)); } template static void GEMV(ARGS... args) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasDgemv(args...)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasDgemv(args...)); } template static void GEMM_BATCH(ARGS... args) { #if CUDA_VERSION >= 8000 - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasDgemmBatched(args...)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasDgemmBatched(args...)); #else PADDLE_THROW(phi::errors::Unimplemented( "DgemmBatched is not supported on cuda <= 7.5")); @@ -205,7 +205,7 @@ struct CUBlas { static void GEMM_STRIDED_BATCH(ARGS... args) { #if CUDA_VERSION >= 8000 PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cublasDgemmStridedBatched(args...)); + common::dynload::cublasDgemmStridedBatched(args...)); #else PADDLE_THROW(phi::errors::Unimplemented( "DgemmStridedBatched is not supported on cuda <= 7.5")); @@ -220,32 +220,32 @@ struct CUBlas { template static void TRSM(ARGS... args) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasDtrsm(args...)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasDtrsm(args...)); } template static void GETRF_BATCH(ARGS... args) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasDgetrfBatched(args...)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasDgetrfBatched(args...)); } template static void GETRI_BATCH(ARGS... args) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasDgetriBatched(args...)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasDgetriBatched(args...)); } template static void MATINV_BATCH(ARGS... args) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasDmatinvBatched(args...)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasDmatinvBatched(args...)); } template static void GETRS_BATCH(ARGS... args) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasDgetrsBatched(args...)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasDgetrsBatched(args...)); } template static void TRSM_BATCH(ARGS... args) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasDtrsmBatched(args...)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasDtrsmBatched(args...)); } }; @@ -268,20 +268,20 @@ struct CUBlas { float16 *C, int ldc) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cublasHgemm(handle, - transa, - transb, - m, - n, - k, - reinterpret_cast(alpha), - reinterpret_cast(A), - lda, - reinterpret_cast(B), - ldb, - reinterpret_cast(beta), - reinterpret_cast<__half *>(C), - ldc)); + common::dynload::cublasHgemm(handle, + transa, + transb, + m, + n, + k, + reinterpret_cast(alpha), + reinterpret_cast(A), + lda, + reinterpret_cast(B), + ldb, + reinterpret_cast(beta), + reinterpret_cast<__half *>(C), + ldc)); } #if defined(__NVCC__) @@ -319,26 +319,26 @@ struct CUBlas { thrust::device_vector C_ptr(C, C + batchCount); dev_ctx->TensorCoreCublasCallIfAvailable([&](cublasHandle_t handle) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cublasGemmBatchedEx(handle, - transa, - transb, - m, - n, - k, - alpha, - A_ptr.data().get(), - Atype, - lda, - B_ptr.data().get(), - Btype, - ldb, - beta, - C_ptr.data().get(), - Ctype, - ldc, - batchCount, - computeType, - algo)); + common::dynload::cublasGemmBatchedEx(handle, + transa, + transb, + m, + n, + k, + alpha, + A_ptr.data().get(), + Atype, + lda, + B_ptr.data().get(), + Btype, + ldb, + beta, + C_ptr.data().get(), + Ctype, + ldc, + batchCount, + computeType, + algo)); }); #else PADDLE_THROW(phi::errors::Unimplemented( @@ -366,7 +366,7 @@ struct CUBlas { long long int strideC, // NOLINT int batchCount) { #if CUDA_VERSION >= 8000 - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasHgemmStridedBatched( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasHgemmStridedBatched( handle, transa, transb, @@ -424,25 +424,25 @@ struct CUBlas { #endif // CUDA_VERSION >= 9000 dev_ctx->TensorCoreCublasCallIfAvailable([&](cublasHandle_t handle) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasGemmEx(handle, - transa, - transb, - m, - n, - k, - alpha, - A, - Atype, - lda, - B, - Btype, - ldb, - beta, - C, - Ctype, - ldc, - computeType, - algo)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasGemmEx(handle, + transa, + transb, + m, + n, + k, + alpha, + A, + Atype, + lda, + B, + Btype, + ldb, + beta, + C, + Ctype, + ldc, + computeType, + algo)); }); #else PADDLE_THROW(phi::errors::Unimplemented( @@ -465,7 +465,7 @@ struct CUBlas> { const phi::dtype::complex *beta, phi::dtype::complex *C, int ldc) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasCgemv( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasCgemv( handle, transa, m, @@ -487,7 +487,7 @@ struct CUBlas> { const int incX, phi::dtype::complex *Y, const int incY) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasCaxpy( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasCaxpy( handle, n, reinterpret_cast(alpha), @@ -516,7 +516,7 @@ struct CUBlas> { long long int strideC, // NOLINT int batchCount) { #if CUDA_VERSION >= 8000 - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasCgemmStridedBatched( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasCgemmStridedBatched( handle, transa, transb, @@ -555,7 +555,7 @@ struct CUBlas> { const phi::dtype::complex *beta, phi::dtype::complex *C, int ldc) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasCgemm( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasCgemm( handle, transa, transb, @@ -584,7 +584,7 @@ struct CUBlas> { int lda, phi::dtype::complex *B, int ldb) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasCtrsm( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasCtrsm( handle, side, uplo, @@ -632,25 +632,25 @@ struct CUBlas> { #endif // CUDA_VERSION >= 9000 dev_ctx->TensorCoreCublasCallIfAvailable([&](cublasHandle_t handle) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasGemmEx(handle, - transa, - transb, - m, - n, - k, - alpha, - A, - Atype, - lda, - B, - Btype, - ldb, - beta, - C, - Ctype, - ldc, - computeType, - algo)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasGemmEx(handle, + transa, + transb, + m, + n, + k, + alpha, + A, + Atype, + lda, + B, + Btype, + ldb, + beta, + C, + Ctype, + ldc, + computeType, + algo)); }); #else PADDLE_THROW(phi::errors::Unimplemented( @@ -671,7 +671,7 @@ struct CUBlas> { phi::dtype::complex **B, int ldb, int batch_size) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasCtrsmBatched( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasCtrsmBatched( handle, side, uplo, @@ -702,7 +702,7 @@ struct CUBlas> { const phi::dtype::complex *beta, phi::dtype::complex *C, int ldc) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasZgemv( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasZgemv( handle, transa, m, @@ -724,7 +724,7 @@ struct CUBlas> { const int incX, phi::dtype::complex *Y, const int incY) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasZaxpy( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasZaxpy( handle, n, reinterpret_cast(alpha), @@ -754,7 +754,7 @@ struct CUBlas> { long long int strideC, // NOLINT int batchCount) { #if CUDA_VERSION >= 8000 - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasZgemmStridedBatched( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasZgemmStridedBatched( handle, transa, transb, @@ -793,7 +793,7 @@ struct CUBlas> { const phi::dtype::complex *beta, phi::dtype::complex *C, int ldc) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasZgemm( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasZgemm( handle, transa, transb, @@ -822,7 +822,7 @@ struct CUBlas> { int lda, phi::dtype::complex *B, int ldb) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasZtrsm( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasZtrsm( handle, side, uplo, @@ -850,7 +850,7 @@ struct CUBlas> { phi::dtype::complex **B, int ldb, int batch_size) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasZtrsmBatched( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasZtrsmBatched( handle, side, uplo, @@ -899,25 +899,25 @@ struct CUBlas> { #endif // CUDA_VERSION >= 9000 dev_ctx->TensorCoreCublasCallIfAvailable([&](cublasHandle_t handle) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasGemmEx(handle, - transa, - transb, - m, - n, - k, - alpha, - A, - Atype, - lda, - B, - Btype, - ldb, - beta, - C, - Ctype, - ldc, - computeType, - algo)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasGemmEx(handle, + transa, + transb, + m, + n, + k, + alpha, + A, + Atype, + lda, + B, + Btype, + ldb, + beta, + C, + Ctype, + ldc, + computeType, + algo)); }); #else PADDLE_THROW(phi::errors::Unimplemented( @@ -1111,25 +1111,25 @@ inline void Blas::GEMM(CBLAS_TRANSPOSE transA, VLOG(5) << "use_tensor_op_math: " << (use_tensor_op_math ? "True" : "False"); context_.TensorCoreCublasCallIfAvailable([&](cublasHandle_t handle) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasGemmEx(handle, - cuTransB, - cuTransA, - N, - M, - K, - &h_alpha, - B, - CUDA_R_16BF, - ldb, - A, - CUDA_R_16BF, - lda, - &h_beta, - C, - CUDA_R_16BF, - N, - CUDA_R_32F, - algo)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasGemmEx(handle, + cuTransB, + cuTransA, + N, + M, + K, + &h_alpha, + B, + CUDA_R_16BF, + ldb, + A, + CUDA_R_16BF, + lda, + &h_beta, + C, + CUDA_R_16BF, + N, + CUDA_R_32F, + algo)); }); #else // raise error @@ -1443,25 +1443,25 @@ inline void Blas::GEMM(bool transA, VLOG(5) << "use_tensor_op_math: " << (use_tensor_op_math ? "True" : "False"); context_.TensorCoreCublasCallIfAvailable([&](cublasHandle_t handle) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasGemmEx(handle, - cuTransB, - cuTransA, - N, - M, - K, - &h_alpha, - B, - CUDA_R_16BF, - ldb, - A, - CUDA_R_16BF, - lda, - &h_beta, - C, - CUDA_R_16BF, - ldc, - CUDA_R_32F, - algo)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasGemmEx(handle, + cuTransB, + cuTransA, + N, + M, + K, + &h_alpha, + B, + CUDA_R_16BF, + ldb, + A, + CUDA_R_16BF, + lda, + &h_beta, + C, + CUDA_R_16BF, + ldc, + CUDA_R_32F, + algo)); }); #else // raise error @@ -1615,29 +1615,29 @@ void Blas::BatchedGEMM(CBLAS_TRANSPOSE transA, context_.TensorCoreCublasCallIfAvailable([&](cublasHandle_t handle) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cublasGemmStridedBatchedEx(handle, - cuTransB, - cuTransA, - N, - M, - K, - a, - B, - fp, - ldb, - strideB, - A, - fp, - lda, - strideA, - b, - C, - fp, - ldc, - strideC, - batchCount, - compute_type, - algo)); + common::dynload::cublasGemmStridedBatchedEx(handle, + cuTransB, + cuTransA, + N, + M, + K, + a, + B, + fp, + ldb, + strideB, + A, + fp, + lda, + strideA, + b, + C, + fp, + ldc, + strideC, + batchCount, + compute_type, + algo)); }); } else { #endif // CUDA_VERSION >= 9010 @@ -1707,29 +1707,29 @@ inline void Blas::BatchedGEMM(CBLAS_TRANSPOSE transA, context_.TensorCoreCublasCallIfAvailable([&](cublasHandle_t handle) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cublasGemmStridedBatchedEx(handle, - cuTransB, - cuTransA, - N, - M, - K, - &h_alpha, - B, - CUDA_R_16BF, - ldb, - strideB, - A, - CUDA_R_16BF, - lda, - strideA, - &h_beta, - C, - CUDA_R_16BF, - ldc, - strideC, - batchCount, - CUBLAS_COMPUTE_32F, - algo)); + common::dynload::cublasGemmStridedBatchedEx(handle, + cuTransB, + cuTransA, + N, + M, + K, + &h_alpha, + B, + CUDA_R_16BF, + ldb, + strideB, + A, + CUDA_R_16BF, + lda, + strideA, + &h_beta, + C, + CUDA_R_16BF, + ldc, + strideC, + batchCount, + CUBLAS_COMPUTE_32F, + algo)); }); #else // raise error @@ -1950,26 +1950,26 @@ inline void Blas::BatchedGEMM(CBLAS_TRANSPOSE transA, thrust::device_vector C_ptr(C, C + batchCount); context_.TensorCoreCublasCallIfAvailable([&](cublasHandle_t handle) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cublasGemmBatchedEx(handle, - cuTransB, - cuTransA, - N, - M, - K, - &f_alpha, - B_ptr.data().get(), - CUDA_R_16BF, - ldb, - A_ptr.data().get(), - CUDA_R_16BF, - lda, - &f_beta, - C_ptr.data().get(), - CUDA_R_16BF, - ldc, - batchCount, - CUDA_R_32F, - algo)); + common::dynload::cublasGemmBatchedEx(handle, + cuTransB, + cuTransA, + N, + M, + K, + &f_alpha, + B_ptr.data().get(), + CUDA_R_16BF, + ldb, + A_ptr.data().get(), + CUDA_R_16BF, + lda, + &f_beta, + C_ptr.data().get(), + CUDA_R_16BF, + ldc, + batchCount, + CUDA_R_32F, + algo)); }); #else // raise error diff --git a/paddle/phi/kernels/funcs/blas/blas_impl.h b/paddle/phi/kernels/funcs/blas/blas_impl.h index 11eff02fefe214..34f8c830892db5 100644 --- a/paddle/phi/kernels/funcs/blas/blas_impl.h +++ b/paddle/phi/kernels/funcs/blas/blas_impl.h @@ -113,27 +113,27 @@ template <> struct CBlas { template static void GEMM(ARGS... args) { - phi::dynload::cblas_sgemm(args...); + common::dynload::cblas_sgemm(args...); } template static float *GEMM_ALLOC(ARGS... args) { - return phi::dynload::cblas_sgemm_alloc(args...); + return common::dynload::cblas_sgemm_alloc(args...); } template static void GEMM_PACK(ARGS... args) { - phi::dynload::cblas_sgemm_pack(args...); + common::dynload::cblas_sgemm_pack(args...); } template static void GEMM_COMPUTE(ARGS... args) { - phi::dynload::cblas_sgemm_compute(args...); + common::dynload::cblas_sgemm_compute(args...); } template static void GEMM_FREE(ARGS... args) { - phi::dynload::cblas_sgemm_free(args...); + common::dynload::cblas_sgemm_free(args...); } #ifdef PADDLE_WITH_LIBXSMM @@ -145,93 +145,93 @@ struct CBlas { template static void AXPY(ARGS... args) { - phi::dynload::cblas_saxpy(args...); + common::dynload::cblas_saxpy(args...); } template static void VCOPY(ARGS... args) { - phi::dynload::cblas_scopy(args...); + common::dynload::cblas_scopy(args...); } template static void GEMV(ARGS... args) { - phi::dynload::cblas_sgemv(args...); + common::dynload::cblas_sgemv(args...); } template static float DOT(ARGS... args) { - return phi::dynload::cblas_sdot(args...); + return common::dynload::cblas_sdot(args...); } template static void SCAL(ARGS... args) { - phi::dynload::cblas_sscal(args...); + common::dynload::cblas_sscal(args...); } template static float ASUM(ARGS... args) { - return phi::dynload::cblas_sasum(args...); + return common::dynload::cblas_sasum(args...); } template static void GEMM_BATCH(ARGS... args) { - phi::dynload::cblas_sgemm_batch(args...); + common::dynload::cblas_sgemm_batch(args...); } template static void VADD(ARGS... args) { - phi::dynload::vsAdd(args...); + common::dynload::vsAdd(args...); } template static void VSUB(ARGS... args) { - phi::dynload::vsSub(args...); + common::dynload::vsSub(args...); } template static void VMUL(ARGS... args) { - phi::dynload::vsMul(args...); + common::dynload::vsMul(args...); } template static void VDIV(ARGS... args) { - phi::dynload::vsDiv(args...); + common::dynload::vsDiv(args...); } template static void VEXP(ARGS... args) { - phi::dynload::vsExp(args...); + common::dynload::vsExp(args...); } template static void VSQUARE(ARGS... args) { - phi::dynload::vsSqr(args...); + common::dynload::vsSqr(args...); } template static void VPOW(ARGS... args) { - phi::dynload::vsPowx(args...); + common::dynload::vsPowx(args...); } template static void VINV(ARGS... args) { - phi::dynload::vsInv(args...); + common::dynload::vsInv(args...); } template static void VMERF(ARGS... args) { - phi::dynload::vmsErf(args...); + common::dynload::vmsErf(args...); } #if !defined(_WIN32) template static void CSRMM(ARGS... args) { - phi::dynload::mkl_scsrmm(args...); + common::dynload::mkl_scsrmm(args...); } #endif template static void TRSM(ARGS... args) { - phi::dynload::cblas_strsm(args...); + common::dynload::cblas_strsm(args...); } }; @@ -239,27 +239,27 @@ template <> struct CBlas { template static void GEMM(ARGS... args) { - phi::dynload::cblas_dgemm(args...); + common::dynload::cblas_dgemm(args...); } template static double *GEMM_ALLOC(ARGS... args) { - return phi::dynload::cblas_dgemm_alloc(args...); + return common::dynload::cblas_dgemm_alloc(args...); } template static void GEMM_PACK(ARGS... args) { - phi::dynload::cblas_dgemm_pack(args...); + common::dynload::cblas_dgemm_pack(args...); } template static void GEMM_COMPUTE(ARGS... args) { - phi::dynload::cblas_dgemm_compute(args...); + common::dynload::cblas_dgemm_compute(args...); } template static void GEMM_FREE(ARGS... args) { - phi::dynload::cblas_dgemm_free(args...); + common::dynload::cblas_dgemm_free(args...); } #ifdef PADDLE_WITH_LIBXSMM @@ -271,93 +271,93 @@ struct CBlas { template static void AXPY(ARGS... args) { - phi::dynload::cblas_daxpy(args...); + common::dynload::cblas_daxpy(args...); } template static void VCOPY(ARGS... args) { - phi::dynload::cblas_dcopy(args...); + common::dynload::cblas_dcopy(args...); } template static void GEMV(ARGS... args) { - phi::dynload::cblas_dgemv(args...); + common::dynload::cblas_dgemv(args...); } template static double DOT(ARGS... args) { - return phi::dynload::cblas_ddot(args...); + return common::dynload::cblas_ddot(args...); } template static void SCAL(ARGS... args) { - phi::dynload::cblas_dscal(args...); + common::dynload::cblas_dscal(args...); } template static double ASUM(ARGS... args) { - return phi::dynload::cblas_dasum(args...); + return common::dynload::cblas_dasum(args...); } template static void GEMM_BATCH(ARGS... args) { - phi::dynload::cblas_dgemm_batch(args...); + common::dynload::cblas_dgemm_batch(args...); } template static void VADD(ARGS... args) { - phi::dynload::vdAdd(args...); + common::dynload::vdAdd(args...); } template static void VSUB(ARGS... args) { - phi::dynload::vdSub(args...); + common::dynload::vdSub(args...); } template static void VMUL(ARGS... args) { - phi::dynload::vdMul(args...); + common::dynload::vdMul(args...); } template static void VDIV(ARGS... args) { - phi::dynload::vdDiv(args...); + common::dynload::vdDiv(args...); } template static void VEXP(ARGS... args) { - phi::dynload::vdExp(args...); + common::dynload::vdExp(args...); } template static void VSQUARE(ARGS... args) { - phi::dynload::vdSqr(args...); + common::dynload::vdSqr(args...); } template static void VPOW(ARGS... args) { - phi::dynload::vdPowx(args...); + common::dynload::vdPowx(args...); } template static void VINV(ARGS... args) { - phi::dynload::vdInv(args...); + common::dynload::vdInv(args...); } template static void VMERF(ARGS... args) { - phi::dynload::vmdErf(args...); + common::dynload::vmdErf(args...); } #if !defined(_WIN32) template static void CSRMM(ARGS... args) { - phi::dynload::mkl_dcsrmm(args...); + common::dynload::mkl_dcsrmm(args...); } #endif template static void TRSM(ARGS... args) { - phi::dynload::cblas_dtrsm(args...); + common::dynload::cblas_dtrsm(args...); } }; @@ -370,12 +370,12 @@ struct CBlas> { const int incX, phi::dtype::complex *Y, const int incY) { - phi::dynload::cblas_caxpy(n, &alpha, X, incX, Y, incY); + common::dynload::cblas_caxpy(n, &alpha, X, incX, Y, incY); } template static void VCOPY(ARGS... args) { - phi::dynload::cblas_ccopy(args...); + common::dynload::cblas_ccopy(args...); } // the libmklml_intel.so paddle used has no vcAdd, vcSub, @@ -384,22 +384,22 @@ struct CBlas> { /* template static void VADD(ARGS... args) { - phi::dynload::vcAdd(args...); + common::dynload::vcAdd(args...); } template static void VSUB(ARGS... args) { - phi::dynload::vcSub(args...); + common::dynload::vcSub(args...); } template static void VMUL(ARGS... args) { - phi::dynload::vcMul(args...); + common::dynload::vcMul(args...); } template static void VDIV(ARGS... args) { - phi::dynload::vcDiv(args...); + common::dynload::vcDiv(args...); } */ @@ -458,7 +458,7 @@ struct CBlas> { const void *a_ = (const void *)(A); const void *x_ = (const void *)(X); void *y_ = static_cast(Y); - phi::dynload::cblas_cgemv( + common::dynload::cblas_cgemv( layout, trans, M, N, &alpha, a_, lda, x_, incx, &beta, y_, incy); } @@ -480,20 +480,20 @@ struct CBlas> { const void *a_ = (const void *)(A); const void *b_ = (const void *)(B); void *c_ = static_cast(C); - phi::dynload::cblas_cgemm(layout, - trans_a, - trans_b, - M, - N, - K, - &alpha, - a_, - lda, - b_, - ldb, - &beta, - c_, - ldc); + common::dynload::cblas_cgemm(layout, + trans_a, + trans_b, + M, + N, + K, + &alpha, + a_, + lda, + b_, + ldb, + &beta, + c_, + ldc); } static void TRSM(CBLAS_LAYOUT layout, @@ -510,7 +510,7 @@ struct CBlas> { int ldb) { const void *a_ = (const void *)(A); void *b_ = static_cast(B); - phi::dynload::cblas_ctrsm( + common::dynload::cblas_ctrsm( layout, side, uplo, trans_a, diag, M, N, &alpha, a_, lda, b_, ldb); } @@ -535,27 +535,27 @@ struct CBlas> { const void **B_void = (const void **)(&(*B)); void **C_void = reinterpret_cast(C); - phi::dynload::cblas_cgemm_batch(layout, - trans_a, - trans_b, - M, - N, - K, - alpha, - A_void, - lda, - B_void, - ldb, - beta, - C_void, - ldc, - group_count, - group_size); + common::dynload::cblas_cgemm_batch(layout, + trans_a, + trans_b, + M, + N, + K, + alpha, + A_void, + lda, + B_void, + ldb, + beta, + C_void, + ldc, + group_count, + group_size); } template static void GEMM_EX(ARGS... args) { - phi::dynload::cblas_cgemm_batch(args...); + common::dynload::cblas_cgemm_batch(args...); } }; @@ -568,12 +568,12 @@ struct CBlas> { const int incX, phi::dtype::complex *Y, const int incY) { - phi::dynload::cblas_zaxpy(n, &alpha, X, incX, Y, incY); + common::dynload::cblas_zaxpy(n, &alpha, X, incX, Y, incY); } template static void VCOPY(ARGS... args) { - phi::dynload::cblas_zcopy(args...); + common::dynload::cblas_zcopy(args...); } // the libmklml_intel.so paddle used has no vzAdd, vzSub, @@ -582,22 +582,22 @@ struct CBlas> { /* template static void VADD(ARGS... args) { - phi::dynload::vzAdd(args...); + common::dynload::vzAdd(args...); } template static void VSUB(ARGS... args) { - phi::dynload::vzSub(args...); + common::dynload::vzSub(args...); } template static void VMUL(ARGS... args) { - phi::dynload::vzMul(args...); + common::dynload::vzMul(args...); } template static void VDIV(ARGS... args) { - phi::dynload::vzDiv(args...); + common::dynload::vzDiv(args...); } */ @@ -656,7 +656,7 @@ struct CBlas> { const void *a_ = (const void *)(A); const void *x_ = (const void *)(X); void *y_ = static_cast(Y); - phi::dynload::cblas_zgemv( + common::dynload::cblas_zgemv( layout, trans, M, N, &alpha, a_, lda, x_, incx, &beta, y_, incy); } @@ -678,20 +678,20 @@ struct CBlas> { const void *a_ = (const void *)(A); const void *b_ = (const void *)(B); void *c_ = static_cast(C); - phi::dynload::cblas_zgemm(layout, - trans_a, - trans_b, - M, - N, - K, - &alpha, - a_, - lda, - b_, - ldb, - &beta, - c_, - ldc); + common::dynload::cblas_zgemm(layout, + trans_a, + trans_b, + M, + N, + K, + &alpha, + a_, + lda, + b_, + ldb, + &beta, + c_, + ldc); } static void TRSM(CBLAS_LAYOUT layout, @@ -708,7 +708,7 @@ struct CBlas> { int ldb) { const void *a_ = (const void *)(A); void *b_ = static_cast(B); - phi::dynload::cblas_ztrsm( + common::dynload::cblas_ztrsm( layout, side, uplo, trans_a, diag, M, N, &alpha, a_, lda, b_, ldb); } @@ -733,27 +733,27 @@ struct CBlas> { const void **B_void = (const void **)(&(*B)); void **C_void = reinterpret_cast(C); - phi::dynload::cblas_zgemm_batch(layout, - trans_a, - trans_b, - M, - N, - K, - alpha, - A_void, - lda, - B_void, - ldb, - beta, - C_void, - ldc, - group_count, - group_size); + common::dynload::cblas_zgemm_batch(layout, + trans_a, + trans_b, + M, + N, + K, + alpha, + A_void, + lda, + B_void, + ldb, + beta, + C_void, + ldc, + group_count, + group_size); } template static void GEMM_EX(ARGS... args) { - phi::dynload::cblas_zgemm_batch(args...); + common::dynload::cblas_zgemm_batch(args...); } }; diff --git a/paddle/phi/kernels/funcs/blas/blas_impl.hip.h b/paddle/phi/kernels/funcs/blas/blas_impl.hip.h index 224cf4d6cb4970..44890354f231c4 100644 --- a/paddle/phi/kernels/funcs/blas/blas_impl.hip.h +++ b/paddle/phi/kernels/funcs/blas/blas_impl.hip.h @@ -32,33 +32,33 @@ template <> struct CUBlas { template static void GEMM(ARGS... args) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_sgemm(args...)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::rocblas_sgemm(args...)); } template static void AXPY(ARGS... args) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_saxpy(args...)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::rocblas_saxpy(args...)); } template static void SCAL(ARGS... args) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_sscal(args...)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::rocblas_sscal(args...)); } template static void VCOPY(ARGS... args) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_scopy(args...)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::rocblas_scopy(args...)); } template static void GEMV(ARGS... args) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_sgemv(args...)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::rocblas_sgemv(args...)); } template static void GEMM_STRIDED_BATCH(ARGS... args) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::rocblas_sgemm_strided_batched(args...)); + common::dynload::rocblas_sgemm_strided_batched(args...)); } // HIP not supportted, refer to the doc here: @@ -71,7 +71,7 @@ struct CUBlas { template static void TRSM(ARGS... args) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_strsm(args...)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::rocblas_strsm(args...)); } template @@ -103,33 +103,33 @@ template <> struct CUBlas { template static void GEMM(ARGS... args) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_dgemm(args...)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::rocblas_dgemm(args...)); } template static void AXPY(ARGS... args) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_daxpy(args...)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::rocblas_daxpy(args...)); } template static void SCAL(ARGS... args) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_dscal(args...)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::rocblas_dscal(args...)); } template static void VCOPY(ARGS... args) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_dcopy(args...)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::rocblas_dcopy(args...)); } template static void GEMV(ARGS... args) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_dgemv(args...)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::rocblas_dgemv(args...)); } template static void GEMM_STRIDED_BATCH(ARGS... args) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::rocblas_dgemm_strided_batched(args...)); + common::dynload::rocblas_dgemm_strided_batched(args...)); } template @@ -140,7 +140,7 @@ struct CUBlas { template static void TRSM(ARGS... args) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_dtrsm(args...)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::rocblas_dtrsm(args...)); } template @@ -186,7 +186,7 @@ struct CUBlas { const float16 *beta, float16 *C, int ldc) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_hgemm( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::rocblas_hgemm( handle, transa, transb, @@ -221,7 +221,7 @@ struct CUBlas { int ldc, long long int strideC, // NOLINT int batchCount) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_hgemm_strided_batched( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::rocblas_hgemm_strided_batched( handle, transa, transb, @@ -265,30 +265,30 @@ struct CUBlas { rocblas_datatype computeType) { rocblas_gemm_algo algo = rocblas_gemm_algo_standard; dev_ctx->TensorCoreCublasCallIfAvailable([&](rocblas_handle handle) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_gemm_ex(handle, - transa, - transb, - m, - n, - k, - alpha, - A, - Atype, - lda, - B, - Btype, - ldb, - beta, - C, - Ctype, - ldc, - C, - Ctype, - ldc, - computeType, - algo, - 0, - 0)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::rocblas_gemm_ex(handle, + transa, + transb, + m, + n, + k, + alpha, + A, + Atype, + lda, + B, + Btype, + ldb, + beta, + C, + Ctype, + ldc, + C, + Ctype, + ldc, + computeType, + algo, + 0, + 0)); }); } }; @@ -307,7 +307,7 @@ struct CUBlas> { const phi::dtype::complex *beta, phi::dtype::complex *C, int ldc) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_cgemv( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::rocblas_cgemv( handle, transa, m, @@ -329,7 +329,7 @@ struct CUBlas> { const int incX, phi::dtype::complex *Y, const int incY) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_caxpy( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::rocblas_caxpy( handle, n, reinterpret_cast(alpha), @@ -357,7 +357,7 @@ struct CUBlas> { int ldc, long long int strideC, // NOLINT int batchCount) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_cgemm_strided_batched( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::rocblas_cgemm_strided_batched( handle, transa, transb, @@ -392,7 +392,7 @@ struct CUBlas> { const phi::dtype::complex *beta, phi::dtype::complex *C, int ldc) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_cgemm( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::rocblas_cgemm( handle, transa, transb, @@ -432,30 +432,30 @@ struct CUBlas> { rocblas_datatype computeType) { rocblas_gemm_algo algo = rocblas_gemm_algo_standard; dev_ctx->TensorCoreCublasCallIfAvailable([&](rocblas_handle handle) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_gemm_ex(handle, - transa, - transb, - m, - n, - k, - alpha, - A, - Atype, - lda, - B, - Btype, - ldb, - beta, - C, - Ctype, - ldc, - C, - Ctype, - ldc, - computeType, - algo, - 0, - 0)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::rocblas_gemm_ex(handle, + transa, + transb, + m, + n, + k, + alpha, + A, + Atype, + lda, + B, + Btype, + ldb, + beta, + C, + Ctype, + ldc, + C, + Ctype, + ldc, + computeType, + algo, + 0, + 0)); }); } }; @@ -474,7 +474,7 @@ struct CUBlas> { const phi::dtype::complex *beta, phi::dtype::complex *C, int ldc) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_zgemv( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::rocblas_zgemv( handle, transa, m, @@ -496,7 +496,7 @@ struct CUBlas> { const int incX, phi::dtype::complex *Y, const int incY) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_zaxpy( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::rocblas_zaxpy( handle, n, reinterpret_cast(alpha), @@ -525,7 +525,7 @@ struct CUBlas> { int ldc, long long int strideC, // NOLINT int batchCount) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_zgemm_strided_batched( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::rocblas_zgemm_strided_batched( handle, transa, transb, @@ -560,7 +560,7 @@ struct CUBlas> { const phi::dtype::complex *beta, phi::dtype::complex *C, int ldc) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_zgemm( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::rocblas_zgemm( handle, transa, transb, @@ -600,30 +600,30 @@ struct CUBlas> { rocblas_datatype computeType) { rocblas_gemm_algo algo = rocblas_gemm_algo_standard; dev_ctx->TensorCoreCublasCallIfAvailable([&](rocblas_handle handle) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_gemm_ex(handle, - transa, - transb, - m, - n, - k, - alpha, - A, - Atype, - lda, - B, - Btype, - ldb, - beta, - C, - Ctype, - ldc, - C, - Ctype, - ldc, - computeType, - algo, - 0, - 0)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::rocblas_gemm_ex(handle, + transa, + transb, + m, + n, + k, + alpha, + A, + Atype, + lda, + B, + Btype, + ldb, + beta, + C, + Ctype, + ldc, + C, + Ctype, + ldc, + computeType, + algo, + 0, + 0)); }); } }; @@ -761,30 +761,30 @@ inline void Blas::GEMM(CBLAS_TRANSPOSE transA, context_.TensorCoreCublasCallIfAvailable([&](rocblas_handle handle) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::rocblas_gemm_ex(handle, - cuTransB, - cuTransA, - N, - M, - K, - &h_alpha, - B, - rocblas_datatype_bf16_r, - ldb, - A, - rocblas_datatype_bf16_r, - lda, - &h_beta, - C, - rocblas_datatype_bf16_r, - N, - C, - rocblas_datatype_bf16_r, - N, - rocblas_datatype_f32_r, - algo, - 0, - 0)); + common::dynload::rocblas_gemm_ex(handle, + cuTransB, + cuTransA, + N, + M, + K, + &h_alpha, + B, + rocblas_datatype_bf16_r, + ldb, + A, + rocblas_datatype_bf16_r, + lda, + &h_beta, + C, + rocblas_datatype_bf16_r, + N, + C, + rocblas_datatype_bf16_r, + N, + rocblas_datatype_f32_r, + algo, + 0, + 0)); }); } @@ -1017,30 +1017,30 @@ inline void Blas::GEMM(bool transA, context_.TensorCoreCublasCallIfAvailable([&](rocblas_handle handle) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::rocblas_gemm_ex(handle, - cuTransB, - cuTransA, - N, - M, - K, - &h_alpha, - B, - rocblas_datatype_bf16_r, - ldb, - A, - rocblas_datatype_bf16_r, - lda, - &h_beta, - C, - rocblas_datatype_bf16_r, - ldc, - C, - rocblas_datatype_bf16_r, - ldc, - rocblas_datatype_f32_r, - algo, - 0, - 0)); + common::dynload::rocblas_gemm_ex(handle, + cuTransB, + cuTransA, + N, + M, + K, + &h_alpha, + B, + rocblas_datatype_bf16_r, + ldb, + A, + rocblas_datatype_bf16_r, + lda, + &h_beta, + C, + rocblas_datatype_bf16_r, + ldc, + C, + rocblas_datatype_bf16_r, + ldc, + rocblas_datatype_f32_r, + algo, + 0, + 0)); }); } @@ -1201,7 +1201,7 @@ inline void Blas::BatchedGEMM(CBLAS_TRANSPOSE transA, : rocblas_operation_transpose; const int64_t strideC = M * N; context_.CublasCall([&](rocblas_handle handle) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::rocblas_hgemm_strided_batched( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::rocblas_hgemm_strided_batched( handle, cuTransB, cuTransA, @@ -1254,24 +1254,24 @@ inline void Blas::BatchedGEMM(CBLAS_TRANSPOSE transA, const int64_t strideC = M * N; context_.CublasCall([&](rocblas_handle handle) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::rocblas_sgemm_strided_batched(handle, - cuTransB, - cuTransA, - N, - M, - K, - &alpha, - B, - ldb, - strideB, - A, - lda, - strideA, - &beta, - C, - ldc, - strideC, - batchCount)); + common::dynload::rocblas_sgemm_strided_batched(handle, + cuTransB, + cuTransA, + N, + M, + K, + &alpha, + B, + ldb, + strideB, + A, + lda, + strideA, + &beta, + C, + ldc, + strideC, + batchCount)); }); } @@ -1304,24 +1304,24 @@ inline void Blas::BatchedGEMM(CBLAS_TRANSPOSE transA, const int64_t strideC = M * N; context_.CublasCall([&](rocblas_handle handle) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::rocblas_dgemm_strided_batched(handle, - cuTransB, - cuTransA, - N, - M, - K, - &alpha, - B, - ldb, - strideB, - A, - lda, - strideA, - &beta, - C, - ldc, - strideC, - batchCount)); + common::dynload::rocblas_dgemm_strided_batched(handle, + cuTransB, + cuTransA, + N, + M, + K, + &alpha, + B, + ldb, + strideB, + A, + lda, + strideA, + &beta, + C, + ldc, + strideC, + batchCount)); }); } @@ -1355,36 +1355,36 @@ inline void Blas::BatchedGEMM(CBLAS_TRANSPOSE transA, rocblas_gemm_algo algo = rocblas_gemm_algo_standard; context_.TensorCoreCublasCallIfAvailable([&](rocblas_handle handle) { - PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::rocblas_gemm_strided_batched_ex(handle, - cuTransB, - cuTransA, - N, - M, - K, - &h_alpha, - B, - rocblas_datatype_bf16_r, - ldb, - strideB, - A, - rocblas_datatype_bf16_r, - lda, - strideA, - &h_beta, - C, - rocblas_datatype_bf16_r, - ldc, - strideC, - C, - rocblas_datatype_bf16_r, - ldc, - strideC, - batchCount, - rocblas_datatype_f32_r, - algo, - 0, - 0)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::rocblas_gemm_strided_batched_ex( + handle, + cuTransB, + cuTransA, + N, + M, + K, + &h_alpha, + B, + rocblas_datatype_bf16_r, + ldb, + strideB, + A, + rocblas_datatype_bf16_r, + lda, + strideA, + &h_beta, + C, + rocblas_datatype_bf16_r, + ldc, + strideC, + C, + rocblas_datatype_bf16_r, + ldc, + strideC, + batchCount, + rocblas_datatype_f32_r, + algo, + 0, + 0)); }); } diff --git a/paddle/phi/kernels/funcs/check_numerics_utils.h b/paddle/phi/kernels/funcs/check_numerics_utils.h index 473d7994058a8d..7e9514dab43be3 100644 --- a/paddle/phi/kernels/funcs/check_numerics_utils.h +++ b/paddle/phi/kernels/funcs/check_numerics_utils.h @@ -19,9 +19,9 @@ #endif #include +#include "paddle/common/enforce.h" #include "paddle/phi/common/amp_type_traits.h" #include "paddle/phi/common/data_type.h" -#include "paddle/phi/core/enforce.h" #ifdef _WIN32 #include diff --git a/paddle/phi/kernels/funcs/concat_and_split_functor.h b/paddle/phi/kernels/funcs/concat_and_split_functor.h index 9e3f663cb419c7..f2beeb96ff473e 100644 --- a/paddle/phi/kernels/funcs/concat_and_split_functor.h +++ b/paddle/phi/kernels/funcs/concat_and_split_functor.h @@ -16,13 +16,13 @@ limitations under the License. */ #include +#include "paddle/common/data_type.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/xpu/xpu_context.h" #include "paddle/phi/common/memory_utils.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/device_context.h" -#include "paddle/phi/core/utils/data_type.h" namespace phi { namespace funcs { diff --git a/paddle/phi/kernels/funcs/concat_funcs.h b/paddle/phi/kernels/funcs/concat_funcs.h index db965c2ef9b654..c888a9d4e0d46a 100644 --- a/paddle/phi/kernels/funcs/concat_funcs.h +++ b/paddle/phi/kernels/funcs/concat_funcs.h @@ -14,8 +14,8 @@ #pragma once -#include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" +#include "paddle/common/enforce.h" +#include "paddle/common/errors.h" namespace phi { namespace funcs { diff --git a/paddle/phi/kernels/funcs/cpu_vec.h b/paddle/phi/kernels/funcs/cpu_vec.h index 6774cd391dd5d6..e6d08533e31c1f 100644 --- a/paddle/phi/kernels/funcs/cpu_vec.h +++ b/paddle/phi/kernels/funcs/cpu_vec.h @@ -17,8 +17,8 @@ limitations under the License. */ #include #include +#include "paddle/common/enforce.h" #include "paddle/phi/backends/cpu/cpu_info.h" -#include "paddle/phi/core/enforce.h" #ifdef PADDLE_WITH_MKLML #include "paddle/phi/backends/dynload/mklml.h" @@ -60,23 +60,23 @@ inline void vec_exp(const int n, const float* x, float* y) { y[i] = std::exp(x[i]); } } else { - phi::dynload::vsExp(n, x, y); + common::dynload::vsExp(n, x, y); } } template <> inline void vec_exp(const int n, const double* x, double* y) { - phi::dynload::vdExp(n, x, y); + common::dynload::vdExp(n, x, y); } template <> inline void vec_scal(const int n, const float a, float* x) { - phi::dynload::cblas_sscal(n, a, x, 1); + common::dynload::cblas_sscal(n, a, x, 1); } template <> inline void vec_scal(const int n, const double a, double* x) { - phi::dynload::cblas_dscal(n, a, x, 1); + common::dynload::cblas_dscal(n, a, x, 1); } #endif diff --git a/paddle/phi/kernels/funcs/cross_entropy.cc b/paddle/phi/kernels/funcs/cross_entropy.cc index cf53e9ea65efcc..e1949cc26fb7b9 100644 --- a/paddle/phi/kernels/funcs/cross_entropy.cc +++ b/paddle/phi/kernels/funcs/cross_entropy.cc @@ -14,8 +14,8 @@ limitations under the License. */ #include "paddle/phi/kernels/funcs/cross_entropy.h" +#include "paddle/common/data_type.h" #include "paddle/phi/backends/cpu/cpu_context.h" -#include "paddle/phi/core/utils/data_type.h" namespace phi { namespace funcs { diff --git a/paddle/phi/kernels/funcs/cross_entropy.cu b/paddle/phi/kernels/funcs/cross_entropy.cu index 20a15f9e944fef..94a58912a1746d 100644 --- a/paddle/phi/kernels/funcs/cross_entropy.cu +++ b/paddle/phi/kernels/funcs/cross_entropy.cu @@ -14,11 +14,11 @@ limitations under the License. */ #include "paddle/phi/kernels/funcs/cross_entropy.h" +#include "paddle/common/data_type.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_device_function.h" #include "paddle/phi/backends/gpu/gpu_dnn.h" #include "paddle/phi/backends/gpu/gpu_primitives.h" -#include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/kernels/funcs/math.h" namespace phi { diff --git a/paddle/phi/kernels/funcs/cublaslt.h b/paddle/phi/kernels/funcs/cublaslt.h index 6278f159df075d..f0434def707bc6 100644 --- a/paddle/phi/kernels/funcs/cublaslt.h +++ b/paddle/phi/kernels/funcs/cublaslt.h @@ -20,7 +20,7 @@ limitations under the License. */ #include "paddle/phi/backends/dynload/cublasLt.h" #include "paddle/phi/core/dense_tensor.h" -namespace dyl = phi::dynload; +namespace dyl = common::dynload; namespace phi { diff --git a/paddle/phi/kernels/funcs/cufft_util.h b/paddle/phi/kernels/funcs/cufft_util.h index 3a4a3ef5e59149..80b4f378d6f0da 100644 --- a/paddle/phi/kernels/funcs/cufft_util.h +++ b/paddle/phi/kernels/funcs/cufft_util.h @@ -15,9 +15,9 @@ #pragma once #include +#include "paddle/common/ddim.h" +#include "paddle/common/enforce.h" #include "paddle/phi/backends/dynload/cufft.h" -#include "paddle/phi/core/ddim.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/kernels/funcs/fft.h" #include "paddle/phi/kernels/funcs/fft_key.h" @@ -29,7 +29,7 @@ namespace detail { class CuFFTHandle { public: CuFFTHandle() { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cufftCreate(&handle_)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cufftCreate(&handle_)); } CuFFTHandle(const CuFFTHandle& other) = delete; @@ -41,7 +41,7 @@ class CuFFTHandle { ::cufftHandle& get() { return handle_; } const ::cufftHandle& get() const { return handle_; } - ~CuFFTHandle() { phi::dynload::cufftDestroy(handle_); } + ~CuFFTHandle() { common::dynload::cufftDestroy(handle_); } private: ::cufftHandle handle_; @@ -108,23 +108,23 @@ class FFTConfig { // disable auto allocation of workspace to use allocator from the framework PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cufftSetAutoAllocation(plan(), /* autoAllocate */ 0)); + common::dynload::cufftSetAutoAllocation(plan(), /* autoAllocate */ 0)); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cufftXtMakePlanMany(plan(), - signal_ndim, - signal_sizes.data(), - /* inembed */ nullptr, - /* base_istride */ 1L, - /* idist */ 1L, - itype, - /* onembed */ nullptr, - /* base_ostride */ 1L, - /* odist */ 1L, - otype, - batch_size, - &ws_size_, - exec_type)); + common::dynload::cufftXtMakePlanMany(plan(), + signal_ndim, + signal_sizes.data(), + /* inembed */ nullptr, + /* base_istride */ 1L, + /* idist */ 1L, + itype, + /* onembed */ nullptr, + /* base_ostride */ 1L, + /* odist */ 1L, + otype, + batch_size, + &ws_size_, + exec_type)); } FFTConfig(const FFTConfig& other) = delete; @@ -151,7 +151,7 @@ static void exec_plan(const FFTConfig& config, void* out_data, bool forward) { auto& plan = config.plan(); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cufftXtExec( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cufftXtExec( plan, in_data, out_data, forward ? CUFFT_FORWARD : CUFFT_INVERSE)); } diff --git a/paddle/phi/kernels/funcs/cumprod.h b/paddle/phi/kernels/funcs/cumprod.h index 4eefd4559c33a2..fad43f4acef72a 100644 --- a/paddle/phi/kernels/funcs/cumprod.h +++ b/paddle/phi/kernels/funcs/cumprod.h @@ -13,7 +13,7 @@ // limitations under the License. #pragma once -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" #include "paddle/phi/core/dense_tensor.h" namespace phi { diff --git a/paddle/phi/kernels/funcs/detail/strided_memcpy.h b/paddle/phi/kernels/funcs/detail/strided_memcpy.h index 0cd07fdfd0e1ae..03e3bdde05ad09 100644 --- a/paddle/phi/kernels/funcs/detail/strided_memcpy.h +++ b/paddle/phi/kernels/funcs/detail/strided_memcpy.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include "paddle/common/ddim.h" #include "paddle/phi/common/memory_utils.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/device_context.h" #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) diff --git a/paddle/phi/kernels/funcs/dims_simplifier.h b/paddle/phi/kernels/funcs/dims_simplifier.h index 35621ed0abddb3..57056a19963c1e 100644 --- a/paddle/phi/kernels/funcs/dims_simplifier.h +++ b/paddle/phi/kernels/funcs/dims_simplifier.h @@ -14,7 +14,7 @@ limitations under the License. */ #pragma once -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" #include "paddle/phi/core/dense_tensor.h" #include "glog/logging.h" diff --git a/paddle/phi/kernels/funcs/elementwise_functor.h b/paddle/phi/kernels/funcs/elementwise_functor.h index 5112bf3f35da49..80dfed1544340b 100644 --- a/paddle/phi/kernels/funcs/elementwise_functor.h +++ b/paddle/phi/kernels/funcs/elementwise_functor.h @@ -16,10 +16,10 @@ limitations under the License. */ #include "paddle/common/bfloat16.h" #include "paddle/common/complex.h" +#include "paddle/common/enforce.h" #include "paddle/common/float16.h" #include "paddle/common/hostdevice.h" #include "paddle/common/macros.h" -#include "paddle/phi/core/enforce.h" #if defined(__xpu__) #include diff --git a/paddle/phi/kernels/funcs/elementwise_utils.h b/paddle/phi/kernels/funcs/elementwise_utils.h index 3790044346dc42..5e3ae5a5d8ac7c 100644 --- a/paddle/phi/kernels/funcs/elementwise_utils.h +++ b/paddle/phi/kernels/funcs/elementwise_utils.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include "paddle/common/enforce.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/enforce.h" namespace phi { diff --git a/paddle/phi/kernels/funcs/fft.cc b/paddle/phi/kernels/funcs/fft.cc index 31ea37f5b5037e..a5bd2e1281ffce 100644 --- a/paddle/phi/kernels/funcs/fft.cc +++ b/paddle/phi/kernels/funcs/fft.cc @@ -113,18 +113,18 @@ void exec_fft(const phi::CPUContext& ctx, const FFTTransformType fft_type = GetFFTTransformType(x.dtype(), out->type()); if (fft_type == FFTTransformType::C2R && forward) { ConjKernel(ctx, collapsed_input, &collapsed_input); - MKL_DFTI_CHECK(phi::dynload::DftiComputeBackward( + MKL_DFTI_CHECK(common::dynload::DftiComputeBackward( desc.get(), collapsed_input.data(), collapsed_output.data())); } else if (fft_type == FFTTransformType::R2C && !forward) { - MKL_DFTI_CHECK(phi::dynload::DftiComputeForward( + MKL_DFTI_CHECK(common::dynload::DftiComputeForward( desc.get(), collapsed_input.data(), collapsed_output.data())); ConjKernel(ctx, collapsed_output, &collapsed_output); } else { if (forward) { - MKL_DFTI_CHECK(phi::dynload::DftiComputeForward( + MKL_DFTI_CHECK(common::dynload::DftiComputeForward( desc.get(), collapsed_input.data(), collapsed_output.data())); } else { - MKL_DFTI_CHECK(phi::dynload::DftiComputeBackward( + MKL_DFTI_CHECK(common::dynload::DftiComputeBackward( desc.get(), collapsed_input.data(), collapsed_output.data())); } } diff --git a/paddle/phi/kernels/funcs/fft.cu b/paddle/phi/kernels/funcs/fft.cu index e13a79b335ac0e..2010f7bdab0130 100644 --- a/paddle/phi/kernels/funcs/fft.cu +++ b/paddle/phi/kernels/funcs/fft.cu @@ -17,8 +17,8 @@ #include "paddle/phi/kernels/funcs/fft.h" #include "paddle/phi/kernels/funcs/fft_cache.h" +#include "paddle/common/ddim.h" #include "paddle/phi/backends/gpu/gpu_context.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/kernels/assign_kernel.h" #include "paddle/phi/kernels/complex_kernel.h" #include "paddle/phi/kernels/empty_kernel.h" @@ -93,7 +93,7 @@ bool has_large_prime_factor(int64_t n) { inline bool use_cache(const int64_t* signal_size) { bool using_cache = true; int cufft_version; - phi::dynload::cufftGetVersion(&cufft_version); + common::dynload::cufftGetVersion(&cufft_version); if (10300 <= cufft_version && cufft_version <= 10400) { using_cache = std::none_of( signal_size + 1, signal_size + kMaxDataNdim, [](int64_t dim_size) { @@ -190,14 +190,14 @@ void exec_fft(const phi::GPUContext& ctx, // prepare cufft for execution #if defined(PADDLE_WITH_CUDA) PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cufftSetStream(config->plan(), ctx.stream())); - PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cufftSetWorkArea(config->plan(), workspace_tensor.data())); + common::dynload::cufftSetStream(config->plan(), ctx.stream())); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cufftSetWorkArea( + config->plan(), workspace_tensor.data())); #elif defined(PADDLE_WITH_HIP) PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::hipfftSetStream(config->plan(), ctx.stream())); - PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::hipfftSetWorkArea(config->plan(), workspace_tensor.data())); + common::dynload::hipfftSetStream(config->plan(), ctx.stream())); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::hipfftSetWorkArea( + config->plan(), workspace_tensor.data())); #endif // execution of fft plan diff --git a/paddle/phi/kernels/funcs/fft.h b/paddle/phi/kernels/funcs/fft.h index 3f9e1191ebb3e6..86aa6e1f8cb729 100644 --- a/paddle/phi/kernels/funcs/fft.h +++ b/paddle/phi/kernels/funcs/fft.h @@ -15,9 +15,9 @@ #pragma once #include +#include "paddle/common/data_type.h" #include "paddle/phi/common/data_type.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/utils/data_type.h" namespace phi { namespace funcs { diff --git a/paddle/phi/kernels/funcs/fft_key.h b/paddle/phi/kernels/funcs/fft_key.h index 5893cfc6ba019f..2531ff25310fa9 100644 --- a/paddle/phi/kernels/funcs/fft_key.h +++ b/paddle/phi/kernels/funcs/fft_key.h @@ -13,7 +13,7 @@ // limitations under the License. #pragma once -#include "paddle/phi/core/utils/data_type.h" +#include "paddle/common/data_type.h" #include "paddle/phi/kernels/funcs/fft.h" namespace phi { diff --git a/paddle/phi/kernels/funcs/fused_gemm_epilogue.h b/paddle/phi/kernels/funcs/fused_gemm_epilogue.h index df55625eada6d1..0944980da7218f 100644 --- a/paddle/phi/kernels/funcs/fused_gemm_epilogue.h +++ b/paddle/phi/kernels/funcs/fused_gemm_epilogue.h @@ -27,6 +27,7 @@ limitations under the License. */ #if CUDA_VERSION >= 11060 #include "glog/logging.h" +#include "paddle/common/enforce.h" #include "paddle/common/float16.h" #include "paddle/phi/backends/all_context.h" #include "paddle/phi/backends/dynload/cublasLt.h" @@ -34,7 +35,6 @@ limitations under the License. */ #include "paddle/phi/common/amp_type_traits.h" #include "paddle/phi/common/memory_utils.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/scope_guard.h" #include "paddle/phi/kernels/funcs/blas/blaslt_impl.cu.h" #include "paddle/utils/flags.h" @@ -90,9 +90,9 @@ class GemmEpilogueAlgoCache { cublasLtMatmulPreference_t preference; PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cublasLtMatmulPreferenceCreate(&preference)); + common::dynload::cublasLtMatmulPreferenceCreate(&preference)); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cublasLtMatmulPreferenceSetAttribute( + common::dynload::cublasLtMatmulPreferenceSetAttribute( preference, CUBLASLT_MATMUL_PREF_MAX_WORKSPACE_BYTES, &workspace_size, @@ -101,17 +101,17 @@ class GemmEpilogueAlgoCache { int returned_results = 0; std::vector heuristic_results( requested_algo_count_); - PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cublasLtMatmulAlgoGetHeuristic(lt_handle, - op_desc, - a_desc, - b_desc, - c_desc, - c_desc, - preference, - requested_algo_count_, - heuristic_results.data(), - &returned_results)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasLtMatmulAlgoGetHeuristic( + lt_handle, + op_desc, + a_desc, + b_desc, + c_desc, + c_desc, + preference, + requested_algo_count_, + heuristic_results.data(), + &returned_results)); PADDLE_ENFORCE_GT( returned_results, @@ -119,7 +119,7 @@ class GemmEpilogueAlgoCache { phi::errors::Unavailable("No GEMM epilogue algorithm support!")); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cublasLtMatmulPreferenceDestroy(preference)); + common::dynload::cublasLtMatmulPreferenceDestroy(preference)); int best_algo_idx = -1; float best_algo_time = 0; @@ -127,23 +127,23 @@ class GemmEpilogueAlgoCache { // Run 100 times for warmup int warmup_algo_idx = 0; for (int t = 0; t < 100; t++) { - cublasStatus_t status = - phi::dynload::cublasLtMatmul(lt_handle, - op_desc, - alpha, - a, - a_desc, - b, - b_desc, - beta, - c, - c_desc, - c, - c_desc, - &heuristic_results[warmup_algo_idx].algo, - workspace, - workspace_size, - stream); + cublasStatus_t status = common::dynload::cublasLtMatmul( + lt_handle, + op_desc, + alpha, + a, + a_desc, + b, + b_desc, + beta, + c, + c_desc, + c, + c_desc, + &heuristic_results[warmup_algo_idx].algo, + workspace, + workspace_size, + stream); if (status != CUBLAS_STATUS_SUCCESS) { t = -1; warmup_algo_idx += 1; @@ -165,22 +165,22 @@ class GemmEpilogueAlgoCache { PADDLE_ENFORCE_GPU_SUCCESS(cudaEventRecord(start_event, stream)); cublasStatus_t status = - phi::dynload::cublasLtMatmul(lt_handle, - op_desc, - alpha, - a, - a_desc, - b, - b_desc, - beta, - c, - c_desc, - c, - c_desc, - &heuristic_results[algo_idx].algo, - workspace, - workspace_size, - stream); + common::dynload::cublasLtMatmul(lt_handle, + op_desc, + alpha, + a, + a_desc, + b, + b_desc, + beta, + c, + c_desc, + c, + c_desc, + &heuristic_results[algo_idx].algo, + workspace, + workspace_size, + stream); PADDLE_ENFORCE_GPU_SUCCESS(cudaEventRecord(stop_event, stream)); PADDLE_ENFORCE_GPU_SUCCESS(cudaEventSynchronize(stop_event)); @@ -236,7 +236,7 @@ class GemmEpilogueAlgoCache { int trans_a, trans_b; uint32_t epilogue; - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasLtMatmulDescGetAttribute( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasLtMatmulDescGetAttribute( desc, CUBLASLT_MATMUL_DESC_TRANSA, &trans_a, @@ -244,7 +244,7 @@ class GemmEpilogueAlgoCache { &size_to_write)); HashValue_(seed, hash_fn, static_cast(trans_a)); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasLtMatmulDescGetAttribute( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasLtMatmulDescGetAttribute( desc, CUBLASLT_MATMUL_DESC_TRANSB, &trans_b, @@ -252,7 +252,7 @@ class GemmEpilogueAlgoCache { &size_to_write)); HashValue_(seed, hash_fn, static_cast(trans_b)); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasLtMatmulDescGetAttribute( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasLtMatmulDescGetAttribute( desc, CUBLASLT_MATMUL_DESC_EPILOGUE, &epilogue, @@ -270,40 +270,54 @@ class GemmEpilogueAlgoCache { uint64_t row, col; int64_t ld, batch_offset; - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasLtMatrixLayoutGetAttribute( - desc, - CUBLASLT_MATRIX_LAYOUT_TYPE, - &dtype, - sizeof(dtype), - &size_to_write)); + PADDLE_ENFORCE_GPU_SUCCESS( + common::dynload::cublasLtMatrixLayoutGetAttribute( + desc, + CUBLASLT_MATRIX_LAYOUT_TYPE, + &dtype, + sizeof(dtype), + &size_to_write)); HashValue_(seed, hash_fn, static_cast(dtype)); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasLtMatrixLayoutGetAttribute( - desc, - CUBLASLT_MATRIX_LAYOUT_BATCH_COUNT, - &batch, - sizeof(batch), - &size_to_write)); + PADDLE_ENFORCE_GPU_SUCCESS( + common::dynload::cublasLtMatrixLayoutGetAttribute( + desc, + CUBLASLT_MATRIX_LAYOUT_BATCH_COUNT, + &batch, + sizeof(batch), + &size_to_write)); HashValue_(seed, hash_fn, static_cast(batch)); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasLtMatrixLayoutGetAttribute( - desc, CUBLASLT_MATRIX_LAYOUT_ROWS, &row, sizeof(row), &size_to_write)); + PADDLE_ENFORCE_GPU_SUCCESS( + common::dynload::cublasLtMatrixLayoutGetAttribute( + desc, + CUBLASLT_MATRIX_LAYOUT_ROWS, + &row, + sizeof(row), + &size_to_write)); HashValue_(seed, hash_fn, static_cast(row)); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasLtMatrixLayoutGetAttribute( - desc, CUBLASLT_MATRIX_LAYOUT_COLS, &col, sizeof(col), &size_to_write)); + PADDLE_ENFORCE_GPU_SUCCESS( + common::dynload::cublasLtMatrixLayoutGetAttribute( + desc, + CUBLASLT_MATRIX_LAYOUT_COLS, + &col, + sizeof(col), + &size_to_write)); HashValue_(seed, hash_fn, static_cast(col)); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasLtMatrixLayoutGetAttribute( - desc, CUBLASLT_MATRIX_LAYOUT_LD, &ld, sizeof(ld), &size_to_write)); + PADDLE_ENFORCE_GPU_SUCCESS( + common::dynload::cublasLtMatrixLayoutGetAttribute( + desc, CUBLASLT_MATRIX_LAYOUT_LD, &ld, sizeof(ld), &size_to_write)); HashValue_(seed, hash_fn, static_cast(ld)); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasLtMatrixLayoutGetAttribute( - desc, - CUBLASLT_MATRIX_LAYOUT_STRIDED_BATCH_OFFSET, - &batch_offset, - sizeof(batch_offset), - &size_to_write)); + PADDLE_ENFORCE_GPU_SUCCESS( + common::dynload::cublasLtMatrixLayoutGetAttribute( + desc, + CUBLASLT_MATRIX_LAYOUT_STRIDED_BATCH_OFFSET, + &batch_offset, + sizeof(batch_offset), + &size_to_write)); HashValue_(seed, hash_fn, static_cast(batch_offset)); } @@ -365,24 +379,24 @@ void ComputeFusedGemmEpilogueForward(const phi::GPUContext& dev_ctx, } cublasLtMatmulDesc_t operation_desc = NULL; - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasLtMatmulDescCreate( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasLtMatmulDescCreate( &operation_desc, compute_type, scale_type)); cublasOperation_t transx = trans_x ? CUBLAS_OP_T : CUBLAS_OP_N; cublasOperation_t transy = trans_y ? CUBLAS_OP_T : CUBLAS_OP_N; - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasLtMatmulDescSetAttribute( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasLtMatmulDescSetAttribute( operation_desc, CUBLASLT_MATMUL_DESC_TRANSB, &transx, sizeof(transx))); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasLtMatmulDescSetAttribute( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasLtMatmulDescSetAttribute( operation_desc, CUBLASLT_MATMUL_DESC_TRANSA, &transy, sizeof(transy))); cublasLtEpilogue_t epiloque_func = GetEpilogueType(activation, enable_auxiliary); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasLtMatmulDescSetAttribute( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasLtMatmulDescSetAttribute( operation_desc, CUBLASLT_MATMUL_DESC_EPILOGUE, &epiloque_func, sizeof(epiloque_func))); const T* bias_data = bias->data(); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasLtMatmulDescSetAttribute( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasLtMatmulDescSetAttribute( operation_desc, CUBLASLT_MATMUL_DESC_BIAS_POINTER, &bias_data, @@ -404,13 +418,13 @@ void ComputeFusedGemmEpilogueForward(const phi::GPUContext& dev_ctx, void* aux_data = reserve_space->data(); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasLtMatmulDescSetAttribute( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasLtMatmulDescSetAttribute( operation_desc, CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_POINTER, &aux_data, sizeof(aux_data))); int64_t aux_ld = N; - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasLtMatmulDescSetAttribute( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasLtMatmulDescSetAttribute( operation_desc, CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_LD, &aux_ld, @@ -419,21 +433,21 @@ void ComputeFusedGemmEpilogueForward(const phi::GPUContext& dev_ctx, cublasLtMatrixLayout_t x_desc = NULL, y_desc = NULL, out_desc = NULL; if (trans_x) { - PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cublasLtMatrixLayoutCreate(&x_desc, mat_type, M, K, M)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasLtMatrixLayoutCreate( + &x_desc, mat_type, M, K, M)); } else { - PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cublasLtMatrixLayoutCreate(&x_desc, mat_type, K, M, K)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasLtMatrixLayoutCreate( + &x_desc, mat_type, K, M, K)); } if (trans_y) { - PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cublasLtMatrixLayoutCreate(&y_desc, mat_type, K, N, K)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasLtMatrixLayoutCreate( + &y_desc, mat_type, K, N, K)); } else { - PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cublasLtMatrixLayoutCreate(&y_desc, mat_type, N, K, N)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasLtMatrixLayoutCreate( + &y_desc, mat_type, N, K, N)); } - PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cublasLtMatrixLayoutCreate(&out_desc, mat_type, N, M, N)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasLtMatrixLayoutCreate( + &out_desc, mat_type, N, M, N)); cublasLtHandle_t lt_handle = dev_ctx.cublaslt_handle(); // NOTE(zengjinle): I do not know whether the 4MB workspace size is @@ -464,29 +478,31 @@ void ComputeFusedGemmEpilogueForward(const phi::GPUContext& dev_ctx, stream, workspace->ptr(), workspace_size); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasLtMatmul(lt_handle, - operation_desc, - &alpha, - y_data, - y_desc, - x_data, - x_desc, - &beta, - out_data, - out_desc, - out_data, - out_desc, - algo, - workspace->ptr(), - workspace_size, - stream)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasLtMatmul(lt_handle, + operation_desc, + &alpha, + y_data, + y_desc, + x_data, + x_desc, + &beta, + out_data, + out_desc, + out_data, + out_desc, + algo, + workspace->ptr(), + workspace_size, + stream)); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cublasLtMatmulDescDestroy(operation_desc)); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasLtMatrixLayoutDestroy(y_desc)); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasLtMatrixLayoutDestroy(x_desc)); + common::dynload::cublasLtMatmulDescDestroy(operation_desc)); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cublasLtMatrixLayoutDestroy(out_desc)); + common::dynload::cublasLtMatrixLayoutDestroy(y_desc)); + PADDLE_ENFORCE_GPU_SUCCESS( + common::dynload::cublasLtMatrixLayoutDestroy(x_desc)); + PADDLE_ENFORCE_GPU_SUCCESS( + common::dynload::cublasLtMatrixLayoutDestroy(out_desc)); } struct BwdFusedEpilogueSetter { @@ -671,18 +687,18 @@ void ComputeFusedGemmEpilogueBackwardImplDev( for (auto desc : descs) { if (desc) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cublasLtMatrixLayoutDestroy(desc)); + common::dynload::cublasLtMatrixLayoutDestroy(desc)); } } if (dx_operation_desc) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cublasLtMatmulDescDestroy(dx_operation_desc)); + common::dynload::cublasLtMatmulDescDestroy(dx_operation_desc)); } if (dy_operation_desc) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cublasLtMatmulDescDestroy(dy_operation_desc)); + common::dynload::cublasLtMatmulDescDestroy(dy_operation_desc)); } }); @@ -700,16 +716,16 @@ void ComputeFusedGemmEpilogueBackwardImplDev( if (TransX) { dx_dout_desc = &dout_trans_desc; - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasLtMatrixLayoutCreate( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasLtMatrixLayoutCreate( dx_dout_desc, mat_type, z_row, z_col, z_row)); } else { dx_dout_desc = &dout_desc; - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasLtMatrixLayoutCreate( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasLtMatrixLayoutCreate( dx_dout_desc, mat_type, z_col, z_row, z_col)); } dx_y_desc = &y_trans_desc; - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasLtMatrixLayoutCreate( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasLtMatrixLayoutCreate( dx_y_desc, mat_type, y_col, y_row, y_col)); auto& a_desc = kXGradAIsDZ ? (*dx_dout_desc) : (*dx_y_desc); @@ -717,21 +733,21 @@ void ComputeFusedGemmEpilogueBackwardImplDev( auto a_trans = BoolToCuBlasEnum(Trait::kXGradATrans); auto b_trans = BoolToCuBlasEnum(Trait::kXGradBTrans); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasLtMatrixLayoutCreate( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasLtMatrixLayoutCreate( &dx_desc, phi::backends::gpu::ToCudaDataType(), x_col, x_row, x_col)); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasLtMatmulDescCreate( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasLtMatmulDescCreate( &dx_operation_desc, compute_type, scale_type)); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasLtMatmulDescSetAttribute( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasLtMatmulDescSetAttribute( dx_operation_desc, CUBLASLT_MATMUL_DESC_TRANSB, &a_trans, sizeof(a_trans))); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasLtMatmulDescSetAttribute( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasLtMatmulDescSetAttribute( dx_operation_desc, CUBLASLT_MATMUL_DESC_TRANSA, &b_trans, @@ -739,7 +755,7 @@ void ComputeFusedGemmEpilogueBackwardImplDev( cublasLtEpilogue_t epiloque_func_for_dx = GetEpilogueGradType(activation_grad); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasLtMatmulDescSetAttribute( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasLtMatmulDescSetAttribute( dx_operation_desc, CUBLASLT_MATMUL_DESC_EPILOGUE, &epiloque_func_for_dx, @@ -747,17 +763,19 @@ void ComputeFusedGemmEpilogueBackwardImplDev( if (activation_grad != "none") { auto* aux_data = reserve_space->data(); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasLtMatmulDescSetAttribute( - dx_operation_desc, - CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_POINTER, - &aux_data, - sizeof(aux_data))); + PADDLE_ENFORCE_GPU_SUCCESS( + common::dynload::cublasLtMatmulDescSetAttribute( + dx_operation_desc, + CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_POINTER, + &aux_data, + sizeof(aux_data))); int64_t aux_ld = TransX ? M : K; - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasLtMatmulDescSetAttribute( - dx_operation_desc, - CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_LD, - &aux_ld, - sizeof(aux_ld))); + PADDLE_ENFORCE_GPU_SUCCESS( + common::dynload::cublasLtMatmulDescSetAttribute( + dx_operation_desc, + CUBLASLT_MATMUL_DESC_EPILOGUE_AUX_LD, + &aux_ld, + sizeof(aux_ld))); } auto dx_workspace = memory_utils::Alloc( @@ -786,22 +804,23 @@ void ComputeFusedGemmEpilogueBackwardImplDev( dx_workspace->ptr(), workspace_size); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasLtMatmul(lt_handle, - dx_operation_desc, - &alpha, - b_data, - b_desc, - a_data, - a_desc, - &beta_dx, - dx_data, - dx_desc, - dx_data, - dx_desc, - algo, - dx_workspace->ptr(), - workspace_size, - stream)); + PADDLE_ENFORCE_GPU_SUCCESS( + common::dynload::cublasLtMatmul(lt_handle, + dx_operation_desc, + &alpha, + b_data, + b_desc, + a_data, + a_desc, + &beta_dx, + dx_data, + dx_desc, + dx_data, + dx_desc, + algo, + dx_workspace->ptr(), + workspace_size, + stream)); } // dy = func(dout, x) @@ -812,19 +831,19 @@ void ComputeFusedGemmEpilogueBackwardImplDev( if (TransX) { dy_dout_desc = &dout_trans_desc; if (dout_trans_desc == nullptr) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasLtMatrixLayoutCreate( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasLtMatrixLayoutCreate( dy_dout_desc, mat_type, z_row, z_col, z_row)); } } else { dy_dout_desc = &dout_desc; if (dout_desc == nullptr) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasLtMatrixLayoutCreate( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasLtMatrixLayoutCreate( dy_dout_desc, mat_type, z_col, z_row, z_col)); } } dy_x_desc = &x_trans_desc; - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasLtMatrixLayoutCreate( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasLtMatrixLayoutCreate( dy_x_desc, mat_type, x_col, x_row, x_col)); auto& a_desc = kYGradAIsDZ ? (*dy_dout_desc) : (*dy_x_desc); @@ -832,22 +851,22 @@ void ComputeFusedGemmEpilogueBackwardImplDev( auto a_trans = BoolToCuBlasEnum(Trait::kYGradATrans); auto b_trans = BoolToCuBlasEnum(Trait::kYGradBTrans); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasLtMatrixLayoutCreate( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasLtMatrixLayoutCreate( &dy_desc, phi::backends::gpu::ToCudaDataType(), y_col, y_row, y_col)); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasLtMatmulDescCreate( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasLtMatmulDescCreate( &dy_operation_desc, compute_type, scale_type)); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasLtMatmulDescSetAttribute( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasLtMatmulDescSetAttribute( dy_operation_desc, CUBLASLT_MATMUL_DESC_TRANSB, &a_trans, sizeof(a_trans))); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasLtMatmulDescSetAttribute( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasLtMatmulDescSetAttribute( dy_operation_desc, CUBLASLT_MATMUL_DESC_TRANSA, &b_trans, @@ -864,7 +883,7 @@ void ComputeFusedGemmEpilogueBackwardImplDev( } } - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasLtMatmulDescSetAttribute( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cublasLtMatmulDescSetAttribute( dy_operation_desc, CUBLASLT_MATMUL_DESC_EPILOGUE, &epiloque_func_for_dy, @@ -873,11 +892,12 @@ void ComputeFusedGemmEpilogueBackwardImplDev( if (dbias) { auto* dbias_data = dev_ctx.Alloc(dbias, dbias->numel() * sizeof(DYT)); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasLtMatmulDescSetAttribute( - dy_operation_desc, - CUBLASLT_MATMUL_DESC_BIAS_POINTER, - &dbias_data, - sizeof(dbias_data))); + PADDLE_ENFORCE_GPU_SUCCESS( + common::dynload::cublasLtMatmulDescSetAttribute( + dy_operation_desc, + CUBLASLT_MATMUL_DESC_BIAS_POINTER, + &dbias_data, + sizeof(dbias_data))); } auto dy_workspace = memory_utils::Alloc( @@ -905,22 +925,23 @@ void ComputeFusedGemmEpilogueBackwardImplDev( dy_workspace->ptr(), workspace_size); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cublasLtMatmul(lt_handle, - dy_operation_desc, - &alpha, - b_data, - b_desc, - a_data, - a_desc, - &beta_dy, - dy_data, - dy_desc, - dy_data, - dy_desc, - algo, - dy_workspace->ptr(), - workspace_size, - stream)); + PADDLE_ENFORCE_GPU_SUCCESS( + common::dynload::cublasLtMatmul(lt_handle, + dy_operation_desc, + &alpha, + b_data, + b_desc, + a_data, + a_desc, + &beta_dy, + dy_data, + dy_desc, + dy_data, + dy_desc, + algo, + dy_workspace->ptr(), + workspace_size, + stream)); } } diff --git a/paddle/phi/kernels/funcs/gather.h b/paddle/phi/kernels/funcs/gather.h index 7afc6280d374d9..87f85efc956436 100644 --- a/paddle/phi/kernels/funcs/gather.h +++ b/paddle/phi/kernels/funcs/gather.h @@ -18,9 +18,9 @@ limitations under the License. */ #include #include +#include "paddle/common/ddim.h" #include "paddle/common/macros.h" #include "paddle/phi/common/place.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/kernels/funcs/math_function.h" namespace phi { diff --git a/paddle/phi/kernels/funcs/gpc.cc b/paddle/phi/kernels/funcs/gpc.cc index cd02f276392086..038ffde44605f6 100644 --- a/paddle/phi/kernels/funcs/gpc.cc +++ b/paddle/phi/kernels/funcs/gpc.cc @@ -26,7 +26,7 @@ #include "paddle/phi/kernels/funcs/gpc.h" #include -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" namespace phi { namespace funcs { diff --git a/paddle/phi/kernels/funcs/gru_compute.h b/paddle/phi/kernels/funcs/gru_compute.h index 7e53c88b7394a0..a0529d7f75858c 100644 --- a/paddle/phi/kernels/funcs/gru_compute.h +++ b/paddle/phi/kernels/funcs/gru_compute.h @@ -11,8 +11,8 @@ limitations under the License. */ #pragma once +#include "paddle/common/enforce.h" #include "paddle/phi/backends/all_context.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/kernels/funcs/detail/activation_functions.h" namespace phi { diff --git a/paddle/phi/kernels/funcs/hipfft_util.h b/paddle/phi/kernels/funcs/hipfft_util.h index 74ca06fcf17f04..a3baf5cb3f26d6 100644 --- a/paddle/phi/kernels/funcs/hipfft_util.h +++ b/paddle/phi/kernels/funcs/hipfft_util.h @@ -15,8 +15,8 @@ #pragma once #include +#include "paddle/common/enforce.h" #include "paddle/phi/backends/dynload/hipfft.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/kernels/funcs/fft.h" #include "paddle/phi/kernels/funcs/fft_key.h" @@ -28,7 +28,7 @@ namespace detail { class HIPFFTHandle { public: HIPFFTHandle() { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::hipfftCreate(&handle_)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::hipfftCreate(&handle_)); } HIPFFTHandle(const HIPFFTHandle& other) = delete; @@ -40,7 +40,7 @@ class HIPFFTHandle { ::hipfftHandle& get() { return handle_; } const ::hipfftHandle& get() const { return handle_; } - ~HIPFFTHandle() { phi::dynload::hipfftDestroy(handle_); } + ~HIPFFTHandle() { common::dynload::hipfftDestroy(handle_); } private: ::hipfftHandle handle_; @@ -88,20 +88,20 @@ class FFTConfig { // disable auto allocation of workspace to use allocator from the framework PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::hipfftSetAutoAllocation(plan(), /* autoAllocate */ 0)); + common::dynload::hipfftSetAutoAllocation(plan(), /* autoAllocate */ 0)); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::hipfftMakePlanMany(plan(), - signal_ndim, - signal_sizes.data(), - /* inembed */ nullptr, - /* base_istride */ 1, - /* idist */ 1, - /* onembed */ nullptr, - /* base_ostride */ 1, - /* odist */ 1, - exec_type, - batch_size, - &ws_size_)); + common::dynload::hipfftMakePlanMany(plan(), + signal_ndim, + signal_sizes.data(), + /* inembed */ nullptr, + /* base_istride */ 1, + /* idist */ 1, + /* onembed */ nullptr, + /* base_ostride */ 1, + /* odist */ 1, + exec_type, + batch_size, + &ws_size_)); } const hipfftHandle& plan() const { return plan_.get(); } @@ -127,7 +127,7 @@ static void exec_plan(const FFTConfig& config, if (value_type == DataType::FLOAT32) { switch (config.transform_type()) { case FFTTransformType::C2C: { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::hipfftExecC2C( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::hipfftExecC2C( plan, static_cast(in_data), static_cast(out_data), @@ -135,24 +135,24 @@ static void exec_plan(const FFTConfig& config, return; } case FFTTransformType::R2C: { - PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::hipfftExecR2C(plan, - static_cast(in_data), - static_cast(out_data))); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::hipfftExecR2C( + plan, + static_cast(in_data), + static_cast(out_data))); return; } case FFTTransformType::C2R: { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::hipfftExecC2R(plan, - static_cast(in_data), - static_cast(out_data))); + common::dynload::hipfftExecC2R(plan, + static_cast(in_data), + static_cast(out_data))); return; } } } else if (value_type == DataType::FLOAT64) { switch (config.transform_type()) { case FFTTransformType::C2C: { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::hipfftExecZ2Z( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::hipfftExecZ2Z( plan, static_cast(in_data), static_cast(out_data), @@ -160,14 +160,14 @@ static void exec_plan(const FFTConfig& config, return; } case FFTTransformType::R2C: { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::hipfftExecD2Z( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::hipfftExecD2Z( plan, static_cast(in_data), static_cast(out_data))); return; } case FFTTransformType::C2R: { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::hipfftExecZ2D( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::hipfftExecZ2D( plan, static_cast(in_data), static_cast(out_data))); diff --git a/paddle/phi/kernels/funcs/im2col.h b/paddle/phi/kernels/funcs/im2col.h index 73b2866924d1e9..683a1615ee3486 100644 --- a/paddle/phi/kernels/funcs/im2col.h +++ b/paddle/phi/kernels/funcs/im2col.h @@ -16,10 +16,10 @@ limitations under the License. */ #include +#include "paddle/common/enforce.h" +#include "paddle/common/errors.h" #include "paddle/phi/common/layout.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" namespace phi { namespace funcs { diff --git a/paddle/phi/kernels/funcs/inclusive_scan.h b/paddle/phi/kernels/funcs/inclusive_scan.h index 265febd306f334..413f9d5eae67cb 100644 --- a/paddle/phi/kernels/funcs/inclusive_scan.h +++ b/paddle/phi/kernels/funcs/inclusive_scan.h @@ -25,9 +25,9 @@ namespace cub = hipcub; #include #include +#include "paddle/common/enforce.h" #include "paddle/phi/common/memory_utils.h" #include "paddle/phi/common/type_traits.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/kernels/funcs/for_range.h" namespace phi { diff --git a/paddle/phi/kernels/funcs/interpolate_function.h b/paddle/phi/kernels/funcs/interpolate_function.h index 23731285926da4..91e797cda90920 100644 --- a/paddle/phi/kernels/funcs/interpolate_function.h +++ b/paddle/phi/kernels/funcs/interpolate_function.h @@ -14,9 +14,9 @@ #pragma once +#include "paddle/common/ddim.h" #include "paddle/phi/backends/context_pool.h" #include "paddle/phi/common/layout.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #if defined(__NVCC__) || defined(__HIPCC__) diff --git a/paddle/phi/kernels/funcs/jit/benchmark.cc b/paddle/phi/kernels/funcs/jit/benchmark.cc index 894a711ddec6d7..545cbc079c46ae 100644 --- a/paddle/phi/kernels/funcs/jit/benchmark.cc +++ b/paddle/phi/kernels/funcs/jit/benchmark.cc @@ -16,10 +16,10 @@ #include #include "glog/logging.h" +#include "paddle/common/enforce.h" #include "paddle/phi/api/profiler/device_tracer.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/kernels/funcs/jit/kernels.h" #include "paddle/utils/flags.h" diff --git a/paddle/phi/kernels/funcs/jit/gen/act.h b/paddle/phi/kernels/funcs/jit/gen/act.h index 098bfe19728be3..9019af68176cf1 100644 --- a/paddle/phi/kernels/funcs/jit/gen/act.h +++ b/paddle/phi/kernels/funcs/jit/gen/act.h @@ -17,7 +17,7 @@ #include #include "glog/logging.h" -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" #include "paddle/phi/kernels/funcs/jit/gen/jitcode.h" namespace phi { diff --git a/paddle/phi/kernels/funcs/jit/gen/adam.h b/paddle/phi/kernels/funcs/jit/gen/adam.h index 5c432e03ec9214..be56661ff55338 100644 --- a/paddle/phi/kernels/funcs/jit/gen/adam.h +++ b/paddle/phi/kernels/funcs/jit/gen/adam.h @@ -17,7 +17,7 @@ #include #include "glog/logging.h" -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" #include "paddle/phi/kernels/funcs/jit/gen/jitcode.h" namespace phi { diff --git a/paddle/phi/kernels/funcs/jit/gen/adamw.h b/paddle/phi/kernels/funcs/jit/gen/adamw.h index dab90e0e0f69e1..9e0caa549e6a67 100644 --- a/paddle/phi/kernels/funcs/jit/gen/adamw.h +++ b/paddle/phi/kernels/funcs/jit/gen/adamw.h @@ -17,7 +17,7 @@ #include #include "glog/logging.h" -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" #include "paddle/phi/kernels/funcs/jit/gen/jitcode.h" namespace phi { diff --git a/paddle/phi/kernels/funcs/jit/gen/blas.h b/paddle/phi/kernels/funcs/jit/gen/blas.h index a046634440ea81..5c889e246077a3 100644 --- a/paddle/phi/kernels/funcs/jit/gen/blas.h +++ b/paddle/phi/kernels/funcs/jit/gen/blas.h @@ -17,7 +17,7 @@ #include #include "glog/logging.h" -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" #include "paddle/phi/kernels/funcs/jit/gen/jitcode.h" namespace phi { diff --git a/paddle/phi/kernels/funcs/jit/gen/embseqpool.h b/paddle/phi/kernels/funcs/jit/gen/embseqpool.h index 8e201b7538ebe1..86464b74b632e1 100644 --- a/paddle/phi/kernels/funcs/jit/gen/embseqpool.h +++ b/paddle/phi/kernels/funcs/jit/gen/embseqpool.h @@ -17,7 +17,7 @@ #include #include "glog/logging.h" -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" #include "paddle/phi/kernels/funcs/jit/gen/jitcode.h" namespace phi { diff --git a/paddle/phi/kernels/funcs/jit/gen/matmul.h b/paddle/phi/kernels/funcs/jit/gen/matmul.h index dcbec14250d86a..62eda7742eae16 100644 --- a/paddle/phi/kernels/funcs/jit/gen/matmul.h +++ b/paddle/phi/kernels/funcs/jit/gen/matmul.h @@ -20,7 +20,7 @@ #include #include "glog/logging.h" -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" #include "paddle/phi/kernels/funcs/jit/gen/jitcode.h" namespace phi { diff --git a/paddle/phi/kernels/funcs/jit/gen/seqpool.h b/paddle/phi/kernels/funcs/jit/gen/seqpool.h index 260fd6cde88ffa..83a75cb8f81f13 100644 --- a/paddle/phi/kernels/funcs/jit/gen/seqpool.h +++ b/paddle/phi/kernels/funcs/jit/gen/seqpool.h @@ -17,7 +17,7 @@ #include #include "glog/logging.h" -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" #include "paddle/phi/kernels/funcs/jit/gen/jitcode.h" namespace phi { diff --git a/paddle/phi/kernels/funcs/jit/gen/sgd.h b/paddle/phi/kernels/funcs/jit/gen/sgd.h index 4f9617ccdafb2e..b58b21e15cce7f 100644 --- a/paddle/phi/kernels/funcs/jit/gen/sgd.h +++ b/paddle/phi/kernels/funcs/jit/gen/sgd.h @@ -17,7 +17,7 @@ #include #include "glog/logging.h" -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" #include "paddle/phi/kernels/funcs/jit/gen/jitcode.h" namespace phi { diff --git a/paddle/phi/kernels/funcs/jit/gen/vbroadcast.h b/paddle/phi/kernels/funcs/jit/gen/vbroadcast.h index b1cf8521dd76c4..9aa7b8456eccae 100644 --- a/paddle/phi/kernels/funcs/jit/gen/vbroadcast.h +++ b/paddle/phi/kernels/funcs/jit/gen/vbroadcast.h @@ -17,7 +17,7 @@ #include #include "glog/logging.h" -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" #include "paddle/phi/kernels/funcs/jit/gen/jitcode.h" namespace phi { diff --git a/paddle/phi/kernels/funcs/jit/gen_base.cc b/paddle/phi/kernels/funcs/jit/gen_base.cc index 3758aaf4cace8d..bacfcd59877329 100644 --- a/paddle/phi/kernels/funcs/jit/gen_base.cc +++ b/paddle/phi/kernels/funcs/jit/gen_base.cc @@ -16,8 +16,8 @@ #include +#include "paddle/common/enforce.h" #include "paddle/phi/backends/cpu/cpu_info.h" -#include "paddle/phi/core/enforce.h" #ifdef _WIN32 #define posix_memalign_free _aligned_free diff --git a/paddle/phi/kernels/funcs/jit/helper.cc b/paddle/phi/kernels/funcs/jit/helper.cc index c135d6ee3177dd..8f5f64b48ef69f 100644 --- a/paddle/phi/kernels/funcs/jit/helper.cc +++ b/paddle/phi/kernels/funcs/jit/helper.cc @@ -16,7 +16,7 @@ #include -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" namespace phi { namespace jit { diff --git a/paddle/phi/kernels/funcs/jit/helper.h b/paddle/phi/kernels/funcs/jit/helper.h index 7e3394dffd4a2a..69e647494d45ce 100644 --- a/paddle/phi/kernels/funcs/jit/helper.h +++ b/paddle/phi/kernels/funcs/jit/helper.h @@ -22,8 +22,8 @@ #include // for std::move #include +#include "paddle/common/enforce.h" #include "paddle/phi/common/place.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/kernels/funcs/jit/gen_base.h" #include "paddle/phi/kernels/funcs/jit/kernel_base.h" #include "paddle/phi/kernels/funcs/jit/kernel_key.h" diff --git a/paddle/phi/kernels/funcs/jit/more/mkl/mkl.cc b/paddle/phi/kernels/funcs/jit/more/mkl/mkl.cc index deaeba0224fb98..2523315fa29713 100644 --- a/paddle/phi/kernels/funcs/jit/more/mkl/mkl.cc +++ b/paddle/phi/kernels/funcs/jit/more/mkl/mkl.cc @@ -29,20 +29,20 @@ void MatMul(const float* a, const float* b, float* c, const matmul_attr_t* attr) { - phi::dynload::cblas_sgemm(CblasRowMajor, - CblasNoTrans, - CblasNoTrans, - attr->m, - attr->n, - attr->k, - 1.f, - a, - attr->k, - b, - attr->n, - 0.f, - c, - attr->n); + common::dynload::cblas_sgemm(CblasRowMajor, + CblasNoTrans, + CblasNoTrans, + attr->m, + attr->n, + attr->k, + 1.f, + a, + attr->k, + b, + attr->n, + 0.f, + c, + attr->n); } template <> @@ -50,46 +50,46 @@ void MatMul(const double* a, const double* b, double* c, const matmul_attr_t* attr) { - phi::dynload::cblas_dgemm(CblasRowMajor, - CblasNoTrans, - CblasNoTrans, - attr->m, - attr->n, - attr->k, - 1.0, - a, - attr->k, - b, - attr->n, - 0.0, - c, - attr->n); + common::dynload::cblas_dgemm(CblasRowMajor, + CblasNoTrans, + CblasNoTrans, + attr->m, + attr->n, + attr->k, + 1.0, + a, + attr->k, + b, + attr->n, + 0.0, + c, + attr->n); } template <> void VMul(const float* x, const float* y, float* z, int n) { - phi::dynload::vsMul(n, x, y, z); + common::dynload::vsMul(n, x, y, z); } template <> void VMul(const double* x, const double* y, double* z, int n) { - phi::dynload::vdMul(n, x, y, z); + common::dynload::vdMul(n, x, y, z); } template <> void VAdd(const float* x, const float* y, float* z, int n) { - phi::dynload::vsAdd(n, x, y, z); + common::dynload::vsAdd(n, x, y, z); } template <> void VAdd(const double* x, const double* y, double* z, int n) { - phi::dynload::vdAdd(n, x, y, z); + common::dynload::vdAdd(n, x, y, z); } template <> void VScal(const float* a, const float* x, float* y, int n) { if (x == y) { - phi::dynload::cblas_sscal(n, *a, y, 1); + common::dynload::cblas_sscal(n, *a, y, 1); } else { refer::VScal(a, x, y, n); } @@ -98,7 +98,7 @@ void VScal(const float* a, const float* x, float* y, int n) { template <> void VScal(const double* a, const double* x, double* y, int n) { if (x == y) { - phi::dynload::cblas_dscal(n, *a, y, 1); + common::dynload::cblas_dscal(n, *a, y, 1); } else { refer::VScal(a, x, y, n); } @@ -106,52 +106,52 @@ void VScal(const double* a, const double* x, double* y, int n) { template <> void VExp(const float* x, float* y, int n) { - phi::dynload::vsExp(n, x, y); + common::dynload::vsExp(n, x, y); } template <> void VExp(const double* x, double* y, int n) { - phi::dynload::vdExp(n, x, y); + common::dynload::vdExp(n, x, y); } template <> void VSquare(const float* x, float* y, int n) { - phi::dynload::vsSqr(n, x, y); + common::dynload::vsSqr(n, x, y); } template <> void VSquare(const double* x, double* y, int n) { - phi::dynload::vdSqr(n, x, y); + common::dynload::vdSqr(n, x, y); } template <> void VCopy(const float* x, float* y, int n) { - phi::dynload::cblas_scopy(n, x, 1, y, 1); + common::dynload::cblas_scopy(n, x, 1, y, 1); } template <> void VCopy(const double* x, double* y, int n) { - phi::dynload::cblas_dcopy(n, x, 1, y, 1); + common::dynload::cblas_dcopy(n, x, 1, y, 1); } template <> void VAXPY(float a, const float* x, float* y, int n) { - phi::dynload::cblas_saxpy(n, a, x, 1, y, 1); + common::dynload::cblas_saxpy(n, a, x, 1, y, 1); } template <> void VAXPY(double a, const double* x, double* y, int n) { - phi::dynload::cblas_daxpy(n, a, x, 1, y, 1); + common::dynload::cblas_daxpy(n, a, x, 1, y, 1); } template <> void ASum(const float* x, float* res, int n) { - res[0] = phi::dynload::cblas_sasum(n, x, 1); + res[0] = common::dynload::cblas_sasum(n, x, 1); } template <> void ASum(const double* x, double* res, int n) { - res[0] = phi::dynload::cblas_dasum(n, x, 1); + res[0] = common::dynload::cblas_dasum(n, x, 1); } // TODO(TJ): tuning me carefully on AVX, AVX2 and AVX512 diff --git a/paddle/phi/kernels/funcs/jit/more/mkl/mkl.h b/paddle/phi/kernels/funcs/jit/more/mkl/mkl.h index 017fd7980039dc..069f62fe22f5eb 100644 --- a/paddle/phi/kernels/funcs/jit/more/mkl/mkl.h +++ b/paddle/phi/kernels/funcs/jit/more/mkl/mkl.h @@ -18,7 +18,7 @@ #include #include -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" #include "paddle/phi/kernels/funcs/jit/kernel_base.h" namespace phi { diff --git a/paddle/phi/kernels/funcs/jit/refer/refer.h b/paddle/phi/kernels/funcs/jit/refer/refer.h index c7c3835f890682..781b417064c0fc 100644 --- a/paddle/phi/kernels/funcs/jit/refer/refer.h +++ b/paddle/phi/kernels/funcs/jit/refer/refer.h @@ -18,7 +18,7 @@ #include #include -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" #include "paddle/phi/kernels/funcs/jit/helper.h" #include "paddle/phi/kernels/funcs/jit/kernel_base.h" diff --git a/paddle/phi/kernels/funcs/jit/test.cc b/paddle/phi/kernels/funcs/jit/test.cc index d388d95975cff9..f80a5b49cba47b 100644 --- a/paddle/phi/kernels/funcs/jit/test.cc +++ b/paddle/phi/kernels/funcs/jit/test.cc @@ -18,9 +18,9 @@ limitations under the License. */ #include "glog/logging.h" #include "gtest/gtest.h" +#include "paddle/common/enforce.h" #include "paddle/phi/backends/cpu/cpu_info.h" #include "paddle/phi/common/place.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/kernels/funcs/jit/kernels.h" #include "paddle/utils/flags.h" diff --git a/paddle/phi/kernels/funcs/layer_norm_impl.cu.h b/paddle/phi/kernels/funcs/layer_norm_impl.cu.h index 1a52e57e45f236..6a82875819161b 100644 --- a/paddle/phi/kernels/funcs/layer_norm_impl.cu.h +++ b/paddle/phi/kernels/funcs/layer_norm_impl.cu.h @@ -26,11 +26,11 @@ namespace cub = hipcub; #include "glog/logging.h" +#include "paddle/common/ddim.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_device_function.h" #include "paddle/phi/backends/gpu/gpu_dnn.h" #include "paddle/phi/common/memory_utils.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/kernels/funcs/aligned_vector.h" namespace phi { diff --git a/paddle/phi/kernels/funcs/lstm_compute.h b/paddle/phi/kernels/funcs/lstm_compute.h index 56cd975d848caa..275f20872a9cc1 100644 --- a/paddle/phi/kernels/funcs/lstm_compute.h +++ b/paddle/phi/kernels/funcs/lstm_compute.h @@ -14,8 +14,8 @@ limitations under the License. */ #pragma once +#include "paddle/common/enforce.h" #include "paddle/phi/backends/all_context.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/kernels/funcs/detail/activation_functions.h" namespace phi { diff --git a/paddle/phi/kernels/funcs/math_function.h b/paddle/phi/kernels/funcs/math_function.h index 5390d77c876f5f..9cd0db530dc4f0 100644 --- a/paddle/phi/kernels/funcs/math_function.h +++ b/paddle/phi/kernels/funcs/math_function.h @@ -17,10 +17,10 @@ limitations under the License. */ #include #include +#include "paddle/common/data_type.h" #include "paddle/phi/backends/all_context.h" #include "paddle/phi/common/memory_utils.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/utils/data_type.h" #ifdef PADDLE_WITH_XPU #include "paddle/phi/backends/context_pool.h" #include "paddle/phi/backends/xpu/enforce_xpu.h" diff --git a/paddle/phi/kernels/funcs/matrix_inverse.h b/paddle/phi/kernels/funcs/matrix_inverse.h index f0cd265a546481..0b42785e5f6681 100644 --- a/paddle/phi/kernels/funcs/matrix_inverse.h +++ b/paddle/phi/kernels/funcs/matrix_inverse.h @@ -18,9 +18,9 @@ limitations under the License. */ #include "Eigen/Core" #include "Eigen/LU" +#include "paddle/common/enforce.h" #include "paddle/phi/backends/all_context.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/enforce.h" namespace phi { namespace funcs { diff --git a/paddle/phi/kernels/funcs/matrix_solve.h b/paddle/phi/kernels/funcs/matrix_solve.h index 3856c06c1b25fc..a21a5eb4ec8da4 100644 --- a/paddle/phi/kernels/funcs/matrix_solve.h +++ b/paddle/phi/kernels/funcs/matrix_solve.h @@ -18,9 +18,9 @@ limitations under the License. */ #include "Eigen/Core" #include "Eigen/LU" +#include "paddle/common/enforce.h" #include "paddle/phi/backends/all_context.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/enforce.h" namespace phi { namespace funcs { diff --git a/paddle/phi/kernels/funcs/mkl_fft_utils.h b/paddle/phi/kernels/funcs/mkl_fft_utils.h index dbc0678ab7ae5a..ba85f6f9107aa2 100644 --- a/paddle/phi/kernels/funcs/mkl_fft_utils.h +++ b/paddle/phi/kernels/funcs/mkl_fft_utils.h @@ -23,18 +23,18 @@ namespace phi { namespace funcs { namespace detail { -#define MKL_DFTI_CHECK(expr) \ - do { \ - MKL_LONG status = (expr); \ - if (!phi::dynload::DftiErrorClass(status, DFTI_NO_ERROR)) \ - PADDLE_THROW( \ - phi::errors::External(phi::dynload::DftiErrorMessage(status))); \ +#define MKL_DFTI_CHECK(expr) \ + do { \ + MKL_LONG status = (expr); \ + if (!common::dynload::DftiErrorClass(status, DFTI_NO_ERROR)) \ + PADDLE_THROW( \ + phi::errors::External(common::dynload::DftiErrorMessage(status))); \ } while (0); struct DftiDescriptorDeleter { void operator()(DFTI_DESCRIPTOR_HANDLE handle) { if (handle != nullptr) { - MKL_DFTI_CHECK(phi::dynload::DftiFreeDescriptor(&handle)); + MKL_DFTI_CHECK(common::dynload::DftiFreeDescriptor(&handle)); } } }; @@ -52,7 +52,7 @@ class DftiDescriptor { "DftiDescriptor has already been initialized.")); DFTI_DESCRIPTOR* raw_desc; - MKL_DFTI_CHECK(phi::dynload::DftiCreateDescriptorX( + MKL_DFTI_CHECK(common::dynload::DftiCreateDescriptorX( &raw_desc, precision, signal_type, signal_ndim, sizes)); desc_.reset(raw_desc); } @@ -105,20 +105,20 @@ static DftiDescriptor plan_mkl_fft(const DataType in_dtype, descriptor.init(precision, domain, signal_ndim, fft_sizes.data() + 1); // placement inplace or not inplace - MKL_DFTI_CHECK(phi::dynload::DftiSetValue( + MKL_DFTI_CHECK(common::dynload::DftiSetValue( descriptor.get(), DFTI_PLACEMENT, DFTI_NOT_INPLACE)); // number of transformations const MKL_LONG batch_size = fft_sizes[0]; - MKL_DFTI_CHECK(phi::dynload::DftiSetValue( + MKL_DFTI_CHECK(common::dynload::DftiSetValue( descriptor.get(), DFTI_NUMBER_OF_TRANSFORMS, batch_size)); // input & output distance const MKL_LONG idist = in_strides[0]; const MKL_LONG odist = out_strides[0]; - MKL_DFTI_CHECK( - phi::dynload::DftiSetValue(descriptor.get(), DFTI_INPUT_DISTANCE, idist)); - MKL_DFTI_CHECK(phi::dynload::DftiSetValue( + MKL_DFTI_CHECK(common::dynload::DftiSetValue( + descriptor.get(), DFTI_INPUT_DISTANCE, idist)); + MKL_DFTI_CHECK(common::dynload::DftiSetValue( descriptor.get(), DFTI_OUTPUT_DISTANCE, odist)); // input & output stride @@ -128,14 +128,14 @@ static DftiDescriptor plan_mkl_fft(const DataType in_dtype, mkl_in_stride[i] = in_strides[i]; mkl_out_stride[i] = out_strides[i]; } - MKL_DFTI_CHECK(phi::dynload::DftiSetValue( + MKL_DFTI_CHECK(common::dynload::DftiSetValue( descriptor.get(), DFTI_INPUT_STRIDES, mkl_in_stride.data())); - MKL_DFTI_CHECK(phi::dynload::DftiSetValue( + MKL_DFTI_CHECK(common::dynload::DftiSetValue( descriptor.get(), DFTI_OUTPUT_STRIDES, mkl_out_stride.data())); // conjugate even storage if (!(fft_type == FFTTransformType::C2C)) { - MKL_DFTI_CHECK(phi::dynload::DftiSetValue( + MKL_DFTI_CHECK(common::dynload::DftiSetValue( descriptor.get(), DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX)); } @@ -158,12 +158,12 @@ static DftiDescriptor plan_mkl_fft(const DataType in_dtype, return DFTI_BACKWARD_SCALE; } }(); - MKL_DFTI_CHECK( - phi::dynload::DftiSetValue(descriptor.get(), scale_direction, scale)); + MKL_DFTI_CHECK(common::dynload::DftiSetValue( + descriptor.get(), scale_direction, scale)); } // commit the descriptor - MKL_DFTI_CHECK(phi::dynload::DftiCommitDescriptor(descriptor.get())); + MKL_DFTI_CHECK(common::dynload::DftiCommitDescriptor(descriptor.get())); return descriptor; } diff --git a/paddle/phi/kernels/funcs/multinomial_functor.h b/paddle/phi/kernels/funcs/multinomial_functor.h index 05a5a0faf67746..40428820a88998 100644 --- a/paddle/phi/kernels/funcs/multinomial_functor.h +++ b/paddle/phi/kernels/funcs/multinomial_functor.h @@ -14,8 +14,8 @@ limitations under the License. */ #pragma once +#include "paddle/common/enforce.h" #include "paddle/phi/core/device_context.h" -#include "paddle/phi/core/enforce.h" namespace phi { namespace funcs { diff --git a/paddle/phi/kernels/funcs/norm_utils.h b/paddle/phi/kernels/funcs/norm_utils.h index 5c898549b353ea..7df4acc980ab41 100644 --- a/paddle/phi/kernels/funcs/norm_utils.h +++ b/paddle/phi/kernels/funcs/norm_utils.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include "paddle/common/ddim.h" #include "paddle/phi/common/layout.h" -#include "paddle/phi/core/ddim.h" namespace phi { namespace funcs { diff --git a/paddle/phi/kernels/funcs/padding.h b/paddle/phi/kernels/funcs/padding.h index d6faa5f824c0d5..fc83dfe53cda60 100644 --- a/paddle/phi/kernels/funcs/padding.h +++ b/paddle/phi/kernels/funcs/padding.h @@ -16,8 +16,8 @@ limitations under the License. */ #include #include +#include "paddle/common/enforce.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/eigen/eigen_function.h" diff --git a/paddle/phi/kernels/funcs/range_function.h b/paddle/phi/kernels/funcs/range_function.h index 5ace32f46ace17..e8d805833440ae 100644 --- a/paddle/phi/kernels/funcs/range_function.h +++ b/paddle/phi/kernels/funcs/range_function.h @@ -13,7 +13,7 @@ // limitations under the License. #pragma once -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" namespace phi { namespace funcs { diff --git a/paddle/phi/kernels/funcs/reduce_function.h b/paddle/phi/kernels/funcs/reduce_function.h index 93d4433ef6877b..75fe2f585a9f50 100644 --- a/paddle/phi/kernels/funcs/reduce_function.h +++ b/paddle/phi/kernels/funcs/reduce_function.h @@ -58,8 +58,8 @@ using dim3 = phi::kps::dim3; #endif #include "paddle/common/array.h" +#include "paddle/common/enforce.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_utils.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/eigen/eigen_function.h" diff --git a/paddle/phi/kernels/funcs/scatter.h b/paddle/phi/kernels/funcs/scatter.h index 5934f57b47ddec..6a30012d73e7cf 100644 --- a/paddle/phi/kernels/funcs/scatter.h +++ b/paddle/phi/kernels/funcs/scatter.h @@ -17,8 +17,8 @@ limitations under the License. */ #include #include +#include "paddle/common/ddim.h" #include "paddle/phi/common/place.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/eigen/common.h" diff --git a/paddle/phi/kernels/funcs/select_impl.cu.h b/paddle/phi/kernels/funcs/select_impl.cu.h index 96b7942cf27094..a756997dbfa53c 100644 --- a/paddle/phi/kernels/funcs/select_impl.cu.h +++ b/paddle/phi/kernels/funcs/select_impl.cu.h @@ -25,9 +25,9 @@ namespace cub = hipcub; #endif #include +#include "paddle/common/ddim.h" #include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/common/memory_utils.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/primitive/kernel_primitives.h" diff --git a/paddle/phi/kernels/funcs/selected_rows_functor.cc b/paddle/phi/kernels/funcs/selected_rows_functor.cc index 5696f44f68fd7e..b1a7f672c82c64 100644 --- a/paddle/phi/kernels/funcs/selected_rows_functor.cc +++ b/paddle/phi/kernels/funcs/selected_rows_functor.cc @@ -19,7 +19,7 @@ limitations under the License. */ #include #include -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" #include "paddle/phi/core/mixed_vector.h" #ifdef PADDLE_WITH_XPU diff --git a/paddle/phi/kernels/funcs/slice.h b/paddle/phi/kernels/funcs/slice.h index 38b127541650be..bbe95e5e5fe7d4 100644 --- a/paddle/phi/kernels/funcs/slice.h +++ b/paddle/phi/kernels/funcs/slice.h @@ -14,9 +14,9 @@ #pragma once -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" +#include "paddle/common/enforce.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/eigen/eigen_function.h" diff --git a/paddle/phi/kernels/funcs/softmax.cu b/paddle/phi/kernels/funcs/softmax.cu index 2ca97cd4ac2055..7874b54f0cb551 100644 --- a/paddle/phi/kernels/funcs/softmax.cu +++ b/paddle/phi/kernels/funcs/softmax.cu @@ -51,30 +51,30 @@ void SoftmaxCUDNNFunctor::operator()( miopenTensorDescriptor_t cudnn_y_desc = xDesc.descriptor(layout, cudnn_tensor_dims); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenSoftmaxForward_V2(context.cudnn_handle(), - CudnnDataType::kOne(), - cudnn_x_desc, - X->data(), - CudnnDataType::kZero(), - cudnn_y_desc, - context.template Alloc(Y), - MIOPEN_SOFTMAX_ACCURATE, - MIOPEN_SOFTMAX_MODE_INSTANCE)); + common::dynload::miopenSoftmaxForward_V2(context.cudnn_handle(), + CudnnDataType::kOne(), + cudnn_x_desc, + X->data(), + CudnnDataType::kZero(), + cudnn_y_desc, + context.template Alloc(Y), + MIOPEN_SOFTMAX_ACCURATE, + MIOPEN_SOFTMAX_MODE_INSTANCE)); #else cudnnTensorDescriptor_t cudnn_x_desc = xDesc.descriptor(layout, cudnn_tensor_dims); cudnnTensorDescriptor_t cudnn_y_desc = xDesc.descriptor(layout, cudnn_tensor_dims); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnSoftmaxForward(context.cudnn_handle(), - CUDNN_SOFTMAX_ACCURATE, - CUDNN_SOFTMAX_MODE_INSTANCE, - CudnnDataType::kOne(), - cudnn_x_desc, - X->data(), - CudnnDataType::kZero(), - cudnn_y_desc, - context.template Alloc(Y))); + common::dynload::cudnnSoftmaxForward(context.cudnn_handle(), + CUDNN_SOFTMAX_ACCURATE, + CUDNN_SOFTMAX_MODE_INSTANCE, + CudnnDataType::kOne(), + cudnn_x_desc, + X->data(), + CudnnDataType::kZero(), + cudnn_y_desc, + context.template Alloc(Y))); #endif } @@ -105,18 +105,18 @@ void SoftmaxGradCUDNNFunctor::operator()( dxDesc.descriptor(layout, cudnn_tensor_dims); miopenTensorDescriptor_t cudnn_ygrad_desc = dyDesc.descriptor(layout, cudnn_tensor_dims); - PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenSoftmaxBackward_V2(context.cudnn_handle(), - CudnnDataType::kOne(), - cudnn_y_desc, - Y->data(), - cudnn_ygrad_desc, - YGrad->data(), - CudnnDataType::kZero(), - cudnn_xgrad_desc, - context.template Alloc(XGrad), - MIOPEN_SOFTMAX_ACCURATE, - MIOPEN_SOFTMAX_MODE_INSTANCE)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::miopenSoftmaxBackward_V2( + context.cudnn_handle(), + CudnnDataType::kOne(), + cudnn_y_desc, + Y->data(), + cudnn_ygrad_desc, + YGrad->data(), + CudnnDataType::kZero(), + cudnn_xgrad_desc, + context.template Alloc(XGrad), + MIOPEN_SOFTMAX_ACCURATE, + MIOPEN_SOFTMAX_MODE_INSTANCE)); #else cudnnTensorDescriptor_t cudnn_y_desc = yDesc.descriptor(layout, cudnn_tensor_dims); @@ -125,17 +125,17 @@ void SoftmaxGradCUDNNFunctor::operator()( cudnnTensorDescriptor_t cudnn_ygrad_desc = dyDesc.descriptor(layout, cudnn_tensor_dims); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnSoftmaxBackward(context.cudnn_handle(), - CUDNN_SOFTMAX_ACCURATE, - CUDNN_SOFTMAX_MODE_INSTANCE, - CudnnDataType::kOne(), - cudnn_y_desc, - Y->data(), - cudnn_ygrad_desc, - YGrad->data(), - CudnnDataType::kZero(), - cudnn_xgrad_desc, - context.template Alloc(XGrad))); + common::dynload::cudnnSoftmaxBackward(context.cudnn_handle(), + CUDNN_SOFTMAX_ACCURATE, + CUDNN_SOFTMAX_MODE_INSTANCE, + CudnnDataType::kOne(), + cudnn_y_desc, + Y->data(), + cudnn_ygrad_desc, + YGrad->data(), + CudnnDataType::kZero(), + cudnn_xgrad_desc, + context.template Alloc(XGrad))); #endif } diff --git a/paddle/phi/kernels/funcs/sparse/common_shape.h b/paddle/phi/kernels/funcs/sparse/common_shape.h index e4c836d1162523..451fbabeee528e 100644 --- a/paddle/phi/kernels/funcs/sparse/common_shape.h +++ b/paddle/phi/kernels/funcs/sparse/common_shape.h @@ -16,7 +16,7 @@ limitations under the License. */ #include -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" namespace phi { namespace funcs { diff --git a/paddle/phi/kernels/funcs/sparse/convolution.h b/paddle/phi/kernels/funcs/sparse/convolution.h index e6f3a573088b28..7048ca1a127f5c 100644 --- a/paddle/phi/kernels/funcs/sparse/convolution.h +++ b/paddle/phi/kernels/funcs/sparse/convolution.h @@ -14,7 +14,7 @@ limitations under the License. */ #pragma once -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/blas/blas.h" diff --git a/paddle/phi/kernels/funcs/sparse/flatten_indices.h b/paddle/phi/kernels/funcs/sparse/flatten_indices.h index 9a031b8cc12ca4..4edcd839572dbb 100644 --- a/paddle/phi/kernels/funcs/sparse/flatten_indices.h +++ b/paddle/phi/kernels/funcs/sparse/flatten_indices.h @@ -16,7 +16,7 @@ limitations under the License. */ #include -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" namespace phi { namespace funcs { diff --git a/paddle/phi/kernels/funcs/sparse/softmax.h b/paddle/phi/kernels/funcs/sparse/softmax.h index fcb45def6c1fae..2a820461c4181b 100644 --- a/paddle/phi/kernels/funcs/sparse/softmax.h +++ b/paddle/phi/kernels/funcs/sparse/softmax.h @@ -14,7 +14,7 @@ limitations under the License. */ #pragma once -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" #include "paddle/phi/core/tensor_utils.h" namespace phi { diff --git a/paddle/phi/kernels/funcs/sparse/sparse_blas_impl.cu.h b/paddle/phi/kernels/funcs/sparse/sparse_blas_impl.cu.h index 63449ecbda7a22..c7bc3e2f4bb806 100644 --- a/paddle/phi/kernels/funcs/sparse/sparse_blas_impl.cu.h +++ b/paddle/phi/kernels/funcs/sparse/sparse_blas_impl.cu.h @@ -16,13 +16,13 @@ #include "glog/logging.h" +#include "paddle/common/ddim.h" +#include "paddle/common/enforce.h" #include "paddle/common/float16.h" #include "paddle/phi/backends/dynload/cusparse.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/common/memory_utils.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/sparse_coo_tensor.h" #include "paddle/phi/core/sparse_csr_tensor.h" #include "paddle/phi/core/visit_type.h" @@ -90,22 +90,22 @@ inline void CreateCsrDescriptor(const phi::SparseCsrTensor& x, int64_t batch_nnz = x.nnz() / batch_size; cudaDataType_t gpu_type = GetGpuDataType(); dev_ctx.CusparseCall([&](cusparseHandle_t handle) { - phi::dynload::cusparseCreateCsr(descriptor, - M, - N, - batch_nnz, - const_cast(crows_data), - const_cast(cols_data), - const_cast(values_data), - CUSPARSE_INDEX_64I, - CUSPARSE_INDEX_64I, - CUSPARSE_INDEX_BASE_ZERO, - gpu_type); + common::dynload::cusparseCreateCsr(descriptor, + M, + N, + batch_nnz, + const_cast(crows_data), + const_cast(cols_data), + const_cast(values_data), + CUSPARSE_INDEX_64I, + CUSPARSE_INDEX_64I, + CUSPARSE_INDEX_BASE_ZERO, + gpu_type); }); if (batch_size > 1) { #if CUDA_VERSION >= 11080 dev_ctx.CusparseCall([&](cusparseHandle_t handle) { - phi::dynload::cusparseCsrSetStridedBatch( + common::dynload::cusparseCsrSetStridedBatch( *descriptor, batch_size, M + 1, batch_nnz); }); #else @@ -144,22 +144,22 @@ inline void CreateCooDescriptor(const phi::SparseCooTensor& x, int64_t batch_nnz = nnz / batch_size; cudaDataType_t gpu_type = GetGpuDataType(); dev_ctx.CusparseCall([&](cusparseHandle_t handle) { - phi::dynload::cusparseCreateCoo(descriptor, - M, - N, - batch_nnz, - const_cast(rows_data), - const_cast(cols_data), - const_cast(values_data), - CUSPARSE_INDEX_64I, - CUSPARSE_INDEX_BASE_ZERO, - gpu_type); + common::dynload::cusparseCreateCoo(descriptor, + M, + N, + batch_nnz, + const_cast(rows_data), + const_cast(cols_data), + const_cast(values_data), + CUSPARSE_INDEX_64I, + CUSPARSE_INDEX_BASE_ZERO, + gpu_type); }); if (batch_size > 1) { #if CUDA_VERSION >= 11080 dev_ctx.CusparseCall([&](cusparseHandle_t handle) { - phi::dynload::cusparseCooSetStridedBatch( + common::dynload::cusparseCooSetStridedBatch( *descriptor, batch_size, batch_nnz); }); #else @@ -195,7 +195,7 @@ class CuSparseSpMatDescriptor { ~CuSparseSpMatDescriptor() { dev_ctx_.CusparseCall([&](cusparseHandle_t handle) { - phi::dynload::cusparseDestroySpMat(descriptor_); + common::dynload::cusparseDestroySpMat(descriptor_); }); VLOG(6) << "Destroy cusparseSpMatDescr_t " << &descriptor_; } @@ -232,20 +232,20 @@ class CuSparseDnMatDescriptor { const T* x_data = x.data(); cudaDataType_t gpu_type = GetGpuDataType(); dev_ctx_.CusparseCall([&](cusparseHandle_t handle) { - phi::dynload::cusparseCreateDnMat(&descriptor_, - M, - N, - N, - const_cast(x_data), - gpu_type, - CUSPARSE_ORDER_ROW); + common::dynload::cusparseCreateDnMat(&descriptor_, + M, + N, + N, + const_cast(x_data), + gpu_type, + CUSPARSE_ORDER_ROW); }); PADDLE_ENFORCE_EQ(x.numel(), batch_size * M * N); if (batch_size > 1) { #if CUDA_VERSION >= 11080 dev_ctx_.CusparseCall([&](cusparseHandle_t handle) { - phi::dynload::cusparseDnMatSetStridedBatch( + common::dynload::cusparseDnMatSetStridedBatch( descriptor_, batch_size, M * N); }); #else @@ -259,7 +259,7 @@ class CuSparseDnMatDescriptor { ~CuSparseDnMatDescriptor() { dev_ctx_.CusparseCall([&](cusparseHandle_t handle) { - phi::dynload::cusparseDestroyDnMat(descriptor_); + common::dynload::cusparseDestroyDnMat(descriptor_); }); VLOG(6) << "Destroy cusparseDnMatDescr_t " << &descriptor_; } @@ -288,7 +288,7 @@ class CuSparseDnVecDescriptor { const T* x_data = x.data(); cudaDataType_t gpu_type = GetGpuDataType(); dev_ctx_.CusparseCall([&](cusparseHandle_t handle) { - phi::dynload::cusparseCreateDnVec( + common::dynload::cusparseCreateDnVec( &descriptor_, x.numel(), const_cast(x_data), gpu_type); }); @@ -297,7 +297,7 @@ class CuSparseDnVecDescriptor { ~CuSparseDnVecDescriptor() { dev_ctx_.CusparseCall([&](cusparseHandle_t handle) { - phi::dynload::cusparseDestroyDnVec(descriptor_); + common::dynload::cusparseDestroyDnVec(descriptor_); }); VLOG(6) << "Destroy cusparseDnVecDescr_t " << &descriptor_; } @@ -326,17 +326,17 @@ void SparseBlas::SPMM(bool transa, cudaDataType_t gpu_type = GetGpuDataType(); size_t buffer_size = 0; dev_ctx_.CusparseCall([&](cusparseHandle_t handle) { - phi::dynload::cusparseSpMM_bufferSize(handle, - GetTransposeOperation(transa), - GetTransposeOperation(transb), - &alpha, - a_descriptor.descriptor(), - b_descriptor.descriptor(), - &beta, - out_descriptor.descriptor(), - gpu_type, - GetSpMMAlgorithm(mat_a), - &buffer_size); + common::dynload::cusparseSpMM_bufferSize(handle, + GetTransposeOperation(transa), + GetTransposeOperation(transb), + &alpha, + a_descriptor.descriptor(), + b_descriptor.descriptor(), + &beta, + out_descriptor.descriptor(), + gpu_type, + GetSpMMAlgorithm(mat_a), + &buffer_size); }); phi::Allocator::AllocationPtr tmp_buffer = phi::memory_utils::Alloc( @@ -345,17 +345,17 @@ void SparseBlas::SPMM(bool transa, phi::Stream(reinterpret_cast(dev_ctx_.stream()))); void* tmp_buffer_ptr = tmp_buffer->ptr(); dev_ctx_.CusparseCall([&](cusparseHandle_t handle) { - phi::dynload::cusparseSpMM(handle, - GetTransposeOperation(transa), - GetTransposeOperation(transb), - &alpha, - a_descriptor.descriptor(), - b_descriptor.descriptor(), - &beta, - out_descriptor.descriptor(), - gpu_type, - GetSpMMAlgorithm(mat_a), - tmp_buffer_ptr); + common::dynload::cusparseSpMM(handle, + GetTransposeOperation(transa), + GetTransposeOperation(transb), + &alpha, + a_descriptor.descriptor(), + b_descriptor.descriptor(), + &beta, + out_descriptor.descriptor(), + gpu_type, + GetSpMMAlgorithm(mat_a), + tmp_buffer_ptr); }); } @@ -375,20 +375,20 @@ void SparseBlas::SPMV(bool transa, cudaDataType_t gpu_type = GetGpuDataType(); size_t buffer_size = 0; dev_ctx_.CusparseCall([&](cusparseHandle_t handle) { - phi::dynload::cusparseSpMV_bufferSize(handle, - GetTransposeOperation(transa), - &alpha, - a_descriptor.descriptor(), - x_descriptor.descriptor(), - &beta, - out_descriptor.descriptor(), - gpu_type, + common::dynload::cusparseSpMV_bufferSize(handle, + GetTransposeOperation(transa), + &alpha, + a_descriptor.descriptor(), + x_descriptor.descriptor(), + &beta, + out_descriptor.descriptor(), + gpu_type, #if CUDA_VERSION >= 11040 - CUSPARSE_SPMV_ALG_DEFAULT, + CUSPARSE_SPMV_ALG_DEFAULT, #else - CUSPARSE_MV_ALG_DEFAULT, + CUSPARSE_MV_ALG_DEFAULT, #endif - &buffer_size); + &buffer_size); }); phi::Allocator::AllocationPtr tmp_buffer = phi::memory_utils::Alloc( @@ -397,20 +397,20 @@ void SparseBlas::SPMV(bool transa, phi::Stream(reinterpret_cast(dev_ctx_.stream()))); void* tmp_buffer_ptr = tmp_buffer->ptr(); dev_ctx_.CusparseCall([&](cusparseHandle_t handle) { - phi::dynload::cusparseSpMV(handle, - GetTransposeOperation(transa), - &alpha, - a_descriptor.descriptor(), - x_descriptor.descriptor(), - &beta, - out_descriptor.descriptor(), - gpu_type, + common::dynload::cusparseSpMV(handle, + GetTransposeOperation(transa), + &alpha, + a_descriptor.descriptor(), + x_descriptor.descriptor(), + &beta, + out_descriptor.descriptor(), + gpu_type, #if CUDA_VERSION >= 11040 - CUSPARSE_SPMV_ALG_DEFAULT, + CUSPARSE_SPMV_ALG_DEFAULT, #else - CUSPARSE_MV_ALG_DEFAULT, + CUSPARSE_MV_ALG_DEFAULT, #endif - tmp_buffer_ptr); + tmp_buffer_ptr); }); } @@ -432,17 +432,17 @@ void SparseBlas::SDDMM(bool transa, cudaDataType_t gpu_type = GetGpuDataType(); size_t buffer_size = 0; dev_ctx_.CusparseCall([&](cusparseHandle_t handle) { - phi::dynload::cusparseSDDMM_bufferSize(handle, - GetTransposeOperation(transa), - GetTransposeOperation(transb), - &alpha, - a_descriptor.descriptor(), - b_descriptor.descriptor(), - &beta, - out_descriptor.descriptor(), - gpu_type, - CUSPARSE_SDDMM_ALG_DEFAULT, - &buffer_size); + common::dynload::cusparseSDDMM_bufferSize(handle, + GetTransposeOperation(transa), + GetTransposeOperation(transb), + &alpha, + a_descriptor.descriptor(), + b_descriptor.descriptor(), + &beta, + out_descriptor.descriptor(), + gpu_type, + CUSPARSE_SDDMM_ALG_DEFAULT, + &buffer_size); }); phi::Allocator::AllocationPtr tmp_buffer = phi::memory_utils::Alloc( @@ -452,31 +452,31 @@ void SparseBlas::SDDMM(bool transa, void* tmp_buffer_ptr = tmp_buffer->ptr(); dev_ctx_.CusparseCall([&](cusparseHandle_t handle) { - phi::dynload::cusparseSDDMM_preprocess(handle, - GetTransposeOperation(transa), - GetTransposeOperation(transb), - &alpha, - a_descriptor.descriptor(), - b_descriptor.descriptor(), - &beta, - out_descriptor.descriptor(), - gpu_type, - CUSPARSE_SDDMM_ALG_DEFAULT, - tmp_buffer_ptr); + common::dynload::cusparseSDDMM_preprocess(handle, + GetTransposeOperation(transa), + GetTransposeOperation(transb), + &alpha, + a_descriptor.descriptor(), + b_descriptor.descriptor(), + &beta, + out_descriptor.descriptor(), + gpu_type, + CUSPARSE_SDDMM_ALG_DEFAULT, + tmp_buffer_ptr); }); dev_ctx_.CusparseCall([&](cusparseHandle_t handle) { - phi::dynload::cusparseSDDMM(handle, - GetTransposeOperation(transa), - GetTransposeOperation(transb), - &alpha, - a_descriptor.descriptor(), - b_descriptor.descriptor(), - &beta, - out_descriptor.descriptor(), - gpu_type, - CUSPARSE_SDDMM_ALG_DEFAULT, - tmp_buffer_ptr); + common::dynload::cusparseSDDMM(handle, + GetTransposeOperation(transa), + GetTransposeOperation(transb), + &alpha, + a_descriptor.descriptor(), + b_descriptor.descriptor(), + &beta, + out_descriptor.descriptor(), + gpu_type, + CUSPARSE_SDDMM_ALG_DEFAULT, + tmp_buffer_ptr); }); } #endif diff --git a/paddle/phi/kernels/funcs/sparse/sparse_blas_impl.hip.h b/paddle/phi/kernels/funcs/sparse/sparse_blas_impl.hip.h index cbd42be3cb6d49..62175c284b25ab 100644 --- a/paddle/phi/kernels/funcs/sparse/sparse_blas_impl.hip.h +++ b/paddle/phi/kernels/funcs/sparse/sparse_blas_impl.hip.h @@ -14,13 +14,13 @@ #pragma once +#include "paddle/common/ddim.h" +#include "paddle/common/enforce.h" #include "paddle/phi/backends/dynload/rocsparse.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/common/data_type.h" #include "paddle/phi/common/memory_utils.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/sparse_coo_tensor.h" #include "paddle/phi/core/sparse_csr_tensor.h" #include "paddle/phi/core/visit_type.h" @@ -91,17 +91,17 @@ inline void CreateCsrDescriptor(const phi::SparseCsrTensor& x, rocsparse_indextype jtype = GetGpuIndexType(); rocsparse_datatype ttype = GetGpuDataType(); dev_ctx.CusparseCall([&](rocsparse_handle handle) { - phi::dynload::rocsparse_create_csr_descr(descriptor, - M, - N, - batch_nnz, - const_cast(crows_data), - const_cast(cols_data), - const_cast(values_data), - itype, - jtype, - rocsparse_index_base_zero, - ttype); + common::dynload::rocsparse_create_csr_descr(descriptor, + M, + N, + batch_nnz, + const_cast(crows_data), + const_cast(cols_data), + const_cast(values_data), + itype, + jtype, + rocsparse_index_base_zero, + ttype); }); if (batch_size > 1) { // TODO(umiswing): Add batch sparse matmul support for ROCM after 5.2.0 @@ -140,16 +140,16 @@ inline void CreateCooDescriptor(const phi::SparseCooTensor& x, rocsparse_indextype itype = GetGpuIndexType(); rocsparse_datatype ttype = GetGpuDataType(); dev_ctx.CusparseCall([&](rocsparse_handle handle) { - phi::dynload::rocsparse_create_coo_descr(descriptor, - M, - N, - batch_nnz, - const_cast(rows_data), - const_cast(cols_data), - const_cast(values_data), - itype, - rocsparse_index_base_zero, - ttype); + common::dynload::rocsparse_create_coo_descr(descriptor, + M, + N, + batch_nnz, + const_cast(rows_data), + const_cast(cols_data), + const_cast(values_data), + itype, + rocsparse_index_base_zero, + ttype); }); if (batch_size > 1) { @@ -184,7 +184,7 @@ class RocSparseSpMatDescriptor { ~RocSparseSpMatDescriptor() { dev_ctx_.CusparseCall([&](rocsparse_handle handle) { - phi::dynload::rocsparse_destroy_spmat_descr(descriptor_); + common::dynload::rocsparse_destroy_spmat_descr(descriptor_); }); VLOG(6) << "Destroy roscparse_spmat_descr " << &descriptor_; } @@ -221,13 +221,13 @@ class RocSparseDnMatDescriptor { const T* x_data = x.data(); rocsparse_datatype ttype = GetGpuDataType(); dev_ctx.CusparseCall([&](rocsparse_handle handle) { - phi::dynload::rocsparse_create_dnmat_descr(&descriptor_, - M, - N, - N, - const_cast(x_data), - ttype, - rocsparse_order_row); + common::dynload::rocsparse_create_dnmat_descr(&descriptor_, + M, + N, + N, + const_cast(x_data), + ttype, + rocsparse_order_row); }); PADDLE_ENFORCE_EQ( @@ -246,7 +246,7 @@ class RocSparseDnMatDescriptor { ~RocSparseDnMatDescriptor() { dev_ctx_.CusparseCall([&](rocsparse_handle handle) { - phi::dynload::rocsparse_destroy_dnmat_descr(descriptor_); + common::dynload::rocsparse_destroy_dnmat_descr(descriptor_); }); VLOG(6) << "Destroy rocsparse_dnmat_descr " << &descriptor_; } @@ -277,19 +277,19 @@ void SparseBlas::SPMM(bool transa, // Query SpMM buffer dev_ctx_.CusparseCall([&](rocsparse_handle handle) { - phi::dynload::rocsparse_spmm(handle, - GetTransposeOperation(transa), - GetTransposeOperation(transb), - &alpha, - a_descriptor.descriptor(), - b_descriptor.descriptor(), - &beta, - out_descriptor.descriptor(), - ttype, - GetSpMMAlgorithm(mat_a), - rocsparse_spmm_stage_buffer_size, - &buffer_size, - nullptr); + common::dynload::rocsparse_spmm(handle, + GetTransposeOperation(transa), + GetTransposeOperation(transb), + &alpha, + a_descriptor.descriptor(), + b_descriptor.descriptor(), + &beta, + out_descriptor.descriptor(), + ttype, + GetSpMMAlgorithm(mat_a), + rocsparse_spmm_stage_buffer_size, + &buffer_size, + nullptr); }); // Allocate buffer @@ -301,36 +301,36 @@ void SparseBlas::SPMM(bool transa, // Preprocess data dev_ctx_.CusparseCall([&](rocsparse_handle handle) { - phi::dynload::rocsparse_spmm(handle, - GetTransposeOperation(transa), - GetTransposeOperation(transb), - &alpha, - a_descriptor.descriptor(), - b_descriptor.descriptor(), - &beta, - out_descriptor.descriptor(), - ttype, - GetSpMMAlgorithm(mat_a), - rocsparse_spmm_stage_preprocess, - &buffer_size, - tmp_buffer_ptr); + common::dynload::rocsparse_spmm(handle, + GetTransposeOperation(transa), + GetTransposeOperation(transb), + &alpha, + a_descriptor.descriptor(), + b_descriptor.descriptor(), + &beta, + out_descriptor.descriptor(), + ttype, + GetSpMMAlgorithm(mat_a), + rocsparse_spmm_stage_preprocess, + &buffer_size, + tmp_buffer_ptr); }); // Performs the actual SpMM computation dev_ctx_.CusparseCall([&](rocsparse_handle handle) { - phi::dynload::rocsparse_spmm(handle, - GetTransposeOperation(transa), - GetTransposeOperation(transb), - &alpha, - a_descriptor.descriptor(), - b_descriptor.descriptor(), - &beta, - out_descriptor.descriptor(), - ttype, - GetSpMMAlgorithm(mat_a), - rocsparse_spmm_stage_compute, - &buffer_size, - tmp_buffer_ptr); + common::dynload::rocsparse_spmm(handle, + GetTransposeOperation(transa), + GetTransposeOperation(transb), + &alpha, + a_descriptor.descriptor(), + b_descriptor.descriptor(), + &beta, + out_descriptor.descriptor(), + ttype, + GetSpMMAlgorithm(mat_a), + rocsparse_spmm_stage_compute, + &buffer_size, + tmp_buffer_ptr); }); } @@ -352,17 +352,17 @@ void SparseBlas::SDDMM(bool transa, rocsparse_datatype gpu_type = GetGpuDataType(); size_t buffer_size = 0; dev_ctx_.CusparseCall([&](rocsparse_handle handle) { - phi::dynload::rocsparse_sddmm_buffer_size(handle, - GetTransposeOperation(transa), - GetTransposeOperation(transb), - &alpha, - a_descriptor.descriptor(), - b_descriptor.descriptor(), - &beta, - out_descriptor.descriptor(), - gpu_type, - rocsparse_sddmm_alg_default, - &buffer_size); + common::dynload::rocsparse_sddmm_buffer_size(handle, + GetTransposeOperation(transa), + GetTransposeOperation(transb), + &alpha, + a_descriptor.descriptor(), + b_descriptor.descriptor(), + &beta, + out_descriptor.descriptor(), + gpu_type, + rocsparse_sddmm_alg_default, + &buffer_size); }); phi::Allocator::AllocationPtr tmp_buffer = phi::memory_utils::Alloc( @@ -372,31 +372,31 @@ void SparseBlas::SDDMM(bool transa, void* tmp_buffer_ptr = tmp_buffer->ptr(); dev_ctx_.CusparseCall([&](rocsparse_handle handle) { - phi::dynload::rocsparse_sddmm_preprocess(handle, - GetTransposeOperation(transa), - GetTransposeOperation(transb), - &alpha, - a_descriptor.descriptor(), - b_descriptor.descriptor(), - &beta, - out_descriptor.descriptor(), - gpu_type, - rocsparse_sddmm_alg_default, - tmp_buffer_ptr); + common::dynload::rocsparse_sddmm_preprocess(handle, + GetTransposeOperation(transa), + GetTransposeOperation(transb), + &alpha, + a_descriptor.descriptor(), + b_descriptor.descriptor(), + &beta, + out_descriptor.descriptor(), + gpu_type, + rocsparse_sddmm_alg_default, + tmp_buffer_ptr); }); dev_ctx_.CusparseCall([&](rocsparse_handle handle) { - phi::dynload::rocsparse_sddmm(handle, - GetTransposeOperation(transa), - GetTransposeOperation(transb), - &alpha, - a_descriptor.descriptor(), - b_descriptor.descriptor(), - &beta, - out_descriptor.descriptor(), - gpu_type, - rocsparse_sddmm_alg_default, - tmp_buffer_ptr); + common::dynload::rocsparse_sddmm(handle, + GetTransposeOperation(transa), + GetTransposeOperation(transb), + &alpha, + a_descriptor.descriptor(), + b_descriptor.descriptor(), + &beta, + out_descriptor.descriptor(), + gpu_type, + rocsparse_sddmm_alg_default, + tmp_buffer_ptr); }); } #endif diff --git a/paddle/phi/kernels/funcs/strided_slice.h b/paddle/phi/kernels/funcs/strided_slice.h index 4a88c1e0660b79..b71fbeab519f63 100644 --- a/paddle/phi/kernels/funcs/strided_slice.h +++ b/paddle/phi/kernels/funcs/strided_slice.h @@ -17,9 +17,9 @@ #include #include -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" +#include "paddle/common/enforce.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/tensor_array.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/eigen/common.h" diff --git a/paddle/phi/kernels/funcs/unique_functor.h b/paddle/phi/kernels/funcs/unique_functor.h index 806d7cca84851d..ce50227ea921a5 100644 --- a/paddle/phi/kernels/funcs/unique_functor.h +++ b/paddle/phi/kernels/funcs/unique_functor.h @@ -15,8 +15,8 @@ #pragma once #include +#include "paddle/common/data_type.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/kernels/funcs/concat_and_split_functor.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/phi/kernels/funcs/unsqueeze.h b/paddle/phi/kernels/funcs/unsqueeze.h index b15e781b25117b..9abb989df23ae7 100644 --- a/paddle/phi/kernels/funcs/unsqueeze.h +++ b/paddle/phi/kernels/funcs/unsqueeze.h @@ -14,7 +14,7 @@ #pragma once -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" #include "paddle/phi/core/dense_tensor.h" // TODO(paddle-dev): Remove this file when we can call related Kernel directly diff --git a/paddle/phi/kernels/funcs/values_vectors_functor.h b/paddle/phi/kernels/funcs/values_vectors_functor.h index e7dae6b82711b9..66287b69ba12e2 100644 --- a/paddle/phi/kernels/funcs/values_vectors_functor.h +++ b/paddle/phi/kernels/funcs/values_vectors_functor.h @@ -15,7 +15,7 @@ #pragma once #ifdef PADDLE_WITH_CUDA #include "paddle/common/backends/dynload/cusolver.h" -#include "paddle/phi/core/errors.h" +#include "paddle/common/errors.h" #endif // PADDLE_WITH_CUDA #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/backends/gpu/gpu_context.h" diff --git a/paddle/phi/kernels/funcs/vol2col.h b/paddle/phi/kernels/funcs/vol2col.h index 283ab3ea065635..bd909927952d04 100644 --- a/paddle/phi/kernels/funcs/vol2col.h +++ b/paddle/phi/kernels/funcs/vol2col.h @@ -16,8 +16,8 @@ limitations under the License. */ #include +#include "paddle/common/errors.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/errors.h" namespace phi { namespace funcs { diff --git a/paddle/phi/kernels/fusion/cpu/distributed_fused_lamb_init_kernel.cc b/paddle/phi/kernels/fusion/cpu/distributed_fused_lamb_init_kernel.cc index 3cb37ccf2ed89d..bbcb61bd454765 100644 --- a/paddle/phi/kernels/fusion/cpu/distributed_fused_lamb_init_kernel.cc +++ b/paddle/phi/kernels/fusion/cpu/distributed_fused_lamb_init_kernel.cc @@ -13,7 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/distributed_fused_lamb_init_kernel.h" -#include "paddle/phi/core/errors.h" +#include "paddle/common/errors.h" #include "paddle/phi/core/kernel_registry.h" namespace phi { diff --git a/paddle/phi/kernels/fusion/cpu/fused_softmax_mask_upper_triangle_kernel.cc b/paddle/phi/kernels/fusion/cpu/fused_softmax_mask_upper_triangle_kernel.cc index b9ded16d1b0958..6257e9c451aaa7 100644 --- a/paddle/phi/kernels/fusion/cpu/fused_softmax_mask_upper_triangle_kernel.cc +++ b/paddle/phi/kernels/fusion/cpu/fused_softmax_mask_upper_triangle_kernel.cc @@ -13,7 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/fused_softmax_mask_upper_triangle_kernel.h" -#include "paddle/phi/core/errors.h" +#include "paddle/common/errors.h" #include "paddle/phi/core/kernel_registry.h" namespace phi { diff --git a/paddle/phi/kernels/fusion/cpu/fusion_gru_kernel.cc b/paddle/phi/kernels/fusion/cpu/fusion_gru_kernel.cc index b88a93b419beb7..5683c7a09c82ed 100644 --- a/paddle/phi/kernels/fusion/cpu/fusion_gru_kernel.cc +++ b/paddle/phi/kernels/fusion/cpu/fusion_gru_kernel.cc @@ -16,9 +16,9 @@ #include #include +#include "paddle/common/enforce.h" +#include "paddle/common/errors.h" #include "paddle/common/float16.h" -#include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/blas/blas.h" diff --git a/paddle/phi/kernels/fusion/cpu/fusion_seqconv_eltadd_relu_kernel.cc b/paddle/phi/kernels/fusion/cpu/fusion_seqconv_eltadd_relu_kernel.cc index e16c081a37ea08..c5ee7983c493e0 100644 --- a/paddle/phi/kernels/fusion/cpu/fusion_seqconv_eltadd_relu_kernel.cc +++ b/paddle/phi/kernels/fusion/cpu/fusion_seqconv_eltadd_relu_kernel.cc @@ -15,9 +15,9 @@ #include // for min, max #include +#include "paddle/common/enforce.h" +#include "paddle/common/errors.h" #include "paddle/common/float16.h" -#include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/blas/blas.h" diff --git a/paddle/phi/kernels/fusion/cpu/fusion_seqexpand_concat_fc_kernel.cc b/paddle/phi/kernels/fusion/cpu/fusion_seqexpand_concat_fc_kernel.cc index 09026060965aaa..1b93ab7a139609 100644 --- a/paddle/phi/kernels/fusion/cpu/fusion_seqexpand_concat_fc_kernel.cc +++ b/paddle/phi/kernels/fusion/cpu/fusion_seqexpand_concat_fc_kernel.cc @@ -14,10 +14,10 @@ #include +#include "paddle/common/enforce.h" +#include "paddle/common/errors.h" #include "paddle/common/float16.h" #include "paddle/phi/backends/cpu/cpu_info.h" -#include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/blas/blas.h" diff --git a/paddle/phi/kernels/fusion/cutlass/conv2d/conv2d_util.h b/paddle/phi/kernels/fusion/cutlass/conv2d/conv2d_util.h index eaceb46d69d741..0d7c29457be3c9 100644 --- a/paddle/phi/kernels/fusion/cutlass/conv2d/conv2d_util.h +++ b/paddle/phi/kernels/fusion/cutlass/conv2d/conv2d_util.h @@ -24,9 +24,9 @@ #include "cutlass/conv/device/implicit_gemm_convolution.h" +#include "paddle/common/enforce.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/common/memory_utils.h" -#include "paddle/phi/core/enforce.h" namespace phi { namespace fusion { diff --git a/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention.cu b/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention.cu index cc4fd467dfc20b..f9c3cb0e7c7610 100644 --- a/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention.cu +++ b/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention.cu @@ -14,8 +14,8 @@ #include "glog/logging.h" +#include "paddle/common/errors.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/autogen/memory_efficient_attention.h" #include "paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/gemm_kernel_utils.h" diff --git a/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/gemm_kernel_utils.h b/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/gemm_kernel_utils.h index 3442818c817172..5e043341d2b757 100644 --- a/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/gemm_kernel_utils.h +++ b/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/gemm_kernel_utils.h @@ -20,8 +20,8 @@ #pragma once #include "cutlass/arch/mma.h" +#include "paddle/common/enforce.h" #include "paddle/fluid/platform/errors.h" -#include "paddle/phi/core/enforce.h" //////////////////////////////////////////////////////////////////////////////// // Some helper functions diff --git a/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/kernel_backward.h b/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/kernel_backward.h index 56ed034ff5ad5a..15fe7a2ea573fb 100644 --- a/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/kernel_backward.h +++ b/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/kernel_backward.h @@ -66,8 +66,8 @@ #include "iterators/epilogue_predicated_tile_iterator.h" #include "transform/tile_smem_loader.h" +#include "paddle/common/enforce.h" #include "paddle/fluid/platform/errors.h" -#include "paddle/phi/core/enforce.h" namespace phi { diff --git a/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/kernel_forward.h b/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/kernel_forward.h index 232ded25a7390e..f992955ecfdc89 100644 --- a/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/kernel_forward.h +++ b/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/kernel_forward.h @@ -57,8 +57,8 @@ #include "gemm/mma_from_smem.h" #include "transform/tile_smem_loader.h" +#include "paddle/common/enforce.h" #include "paddle/fluid/platform/errors.h" -#include "paddle/phi/core/enforce.h" // namespace phi { using namespace gemm_kernel_utils; // NOLINT diff --git a/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention_utils.h b/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention_utils.h index 65dfb1bc8eced4..43afbdb55707dd 100644 --- a/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention_utils.h +++ b/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention_utils.h @@ -14,7 +14,7 @@ #pragma once -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" namespace phi { namespace fusion { diff --git a/paddle/phi/kernels/fusion/gpu/cast_with_ptr.h b/paddle/phi/kernels/fusion/gpu/cast_with_ptr.h index 5ae8aed256ccdd..0b569dec8d588e 100644 --- a/paddle/phi/kernels/fusion/gpu/cast_with_ptr.h +++ b/paddle/phi/kernels/fusion/gpu/cast_with_ptr.h @@ -14,10 +14,10 @@ #pragma once +#include "paddle/common/ddim.h" +#include "paddle/common/enforce.h" #include "paddle/phi/api/include/tensor.h" #include "paddle/phi/backends/gpu/gpu_launch_config.h" -#include "paddle/phi/core/ddim.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/kernels/funcs/elementwise_base.h" namespace phi { diff --git a/paddle/phi/kernels/fusion/gpu/conv_fusion_kernel.cu b/paddle/phi/kernels/fusion/gpu/conv_fusion_kernel.cu index 1a23b39b3a2ff9..614c934338a503 100644 --- a/paddle/phi/kernels/fusion/gpu/conv_fusion_kernel.cu +++ b/paddle/phi/kernels/fusion/gpu/conv_fusion_kernel.cu @@ -25,11 +25,11 @@ #include "glog/logging.h" #include "paddle/common/backends/dynload/cudnn.h" +#include "paddle/common/ddim.h" #include "paddle/phi/backends/context_pool.h" #include "paddle/phi/backends/gpu/cuda/cudnn_desc.h" #include "paddle/phi/common/backend.h" #include "paddle/phi/common/data_type.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/kernels/impl/conv_cudnn_impl.h" #include "paddle/utils/optional.h" @@ -63,14 +63,14 @@ class CudnnConvDescManager { bool is_sys_pad; // TODO(wilber): The destruction of cudnn descriptor depends on the - // phi::dynload::cudnn singleton, but when the process exits, the singleton - // destruction order cannot be determined. - // After testing, it is found that the phi::dynload::cudnn related singleton - // on Windows is destructed first, causing the descriptor to be destructed - // and failed, while the descriptor on Linux is destructed first, and the - // phi::dynload::cudnn singleton is destructed later, so that it is correct. - // To circumvent this problem, we rely entirely on freeing resources when - // the process exits. + // common::dynload::cudnn singleton, but when the process exits, the + // singleton destruction order cannot be determined. After testing, it is + // found that the common::dynload::cudnn related singleton on Windows is + // destructed first, causing the descriptor to be destructed and failed, + // while the descriptor on Linux is destructed first, and the + // common::dynload::cudnn singleton is destructed later, so that it is + // correct. To circumvent this problem, we rely entirely on freeing + // resources when the process exits. // ~CudnnCacheInfo() { // if (x_desc) delete x_desc; @@ -462,7 +462,7 @@ void ConvFusionKernel(const Context& ctx, std::unique_ptr perf_results( new cudnnConvolutionFwdAlgoPerf_t[phi::kNUM_CUDNN_FWD_ALGS]); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnGetConvolutionForwardAlgorithm_v7( + common::dynload::cudnnGetConvolutionForwardAlgorithm_v7( handle, x_desc, w_desc, @@ -474,7 +474,7 @@ void ConvFusionKernel(const Context& ctx, *cudnn_algo = (perf_results.get())[best_algo_idx].algo; #else PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnGetConvolutionForwardAlgorithm( + common::dynload::cudnnGetConvolutionForwardAlgorithm( handle, x_desc, w_desc, @@ -485,20 +485,21 @@ void ConvFusionKernel(const Context& ctx, cudnn_algo)); #endif PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnGetConvolutionForwardWorkspaceSize(handle, - x_desc, - w_desc, - cudnn_conv_desc, - o_desc, - *cudnn_algo, - wks_bytes)); + common::dynload::cudnnGetConvolutionForwardWorkspaceSize( + handle, + x_desc, + w_desc, + cudnn_conv_desc, + o_desc, + *cudnn_algo, + wks_bytes)); } else { std::array fwd_perf_stat; int returned_algo_count; auto cudnn_find_func = [&](void* cudnn_workspace) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnFindConvolutionForwardAlgorithmEx( + common::dynload::cudnnFindConvolutionForwardAlgorithmEx( handle, x_desc, transformed_input.data(), @@ -517,7 +518,7 @@ void ConvFusionKernel(const Context& ctx, *cudnn_algo = fwd_perf_stat[0].algo; PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnGetConvolutionForwardWorkspaceSize( + common::dynload::cudnnGetConvolutionForwardWorkspaceSize( handle, x_desc, w_desc, @@ -561,22 +562,22 @@ void ConvFusionKernel(const Context& ctx, ScalingParamType alpha = 1.0f, beta = 0.0f; auto cudnn_func = [&](void* cudnn_workspace) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnConvolutionForward(handle, - &alpha, - x_desc, - transformed_input.data(), - w_desc, - filter.data(), - cudnn_conv_desc, - algo, - cudnn_workspace, - workspace_size, - &beta, - o_desc, - output->data())); + common::dynload::cudnnConvolutionForward(handle, + &alpha, + x_desc, + transformed_input.data(), + w_desc, + filter.data(), + cudnn_conv_desc, + algo, + cudnn_workspace, + workspace_size, + &beta, + o_desc, + output->data())); }; workspace_handle.RunFunc(cudnn_func, workspace_size); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnAddTensor( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnAddTensor( handle, &alpha, b_desc, bias.data(), &alpha, o_desc, output->data())); } else { // Only the CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_​PRECOMP_GEMM algo is @@ -589,7 +590,7 @@ void ConvFusionKernel(const Context& ctx, ScalingParamType beta = residual.get_ptr() ? 1.0f : 0.0f; auto cudnn_func = [&](void* cudnn_workspace) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnConvolutionBiasActivationForward( + common::dynload::cudnnConvolutionBiasActivationForward( handle, &alpha, x_desc, diff --git a/paddle/phi/kernels/fusion/gpu/distributed_fused_lamb_init_kernel.cu b/paddle/phi/kernels/fusion/gpu/distributed_fused_lamb_init_kernel.cu index 3ae7f0682bc75b..cdca87eb14f8fc 100644 --- a/paddle/phi/kernels/fusion/gpu/distributed_fused_lamb_init_kernel.cu +++ b/paddle/phi/kernels/fusion/gpu/distributed_fused_lamb_init_kernel.cu @@ -13,8 +13,8 @@ // limitations under the License. #include "paddle/phi/kernels/distributed_fused_lamb_init_kernel.h" +#include "paddle/common/enforce.h" #include "paddle/phi/common/data_type.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/algorithm.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/phi/kernels/fusion/gpu/fused_bn_activation_grad_kernel.cu b/paddle/phi/kernels/fusion/gpu/fused_bn_activation_grad_kernel.cu index e795d37ea490e1..7437645f4f28c2 100644 --- a/paddle/phi/kernels/fusion/gpu/fused_bn_activation_grad_kernel.cu +++ b/paddle/phi/kernels/fusion/gpu/fused_bn_activation_grad_kernel.cu @@ -123,9 +123,9 @@ void FusedBatchNormActGradKernel(const Context &dev_ctx, cudnnBatchNormMode_t mode_ = CUDNN_BATCHNORM_SPATIAL_PERSISTENT; PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnCreateTensorDescriptor(&data_desc_)); + common::dynload::cudnnCreateTensorDescriptor(&data_desc_)); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnCreateTensorDescriptor(&bn_param_desc_)); + common::dynload::cudnnCreateTensorDescriptor(&bn_param_desc_)); if (epsilon1 <= CUDNN_BN_MIN_EPSILON - FLT_EPSILON) { LOG(ERROR) << "Provided epsilon is smaller than " << "CUDNN_BN_MIN_EPSILON. Setting it to " @@ -133,13 +133,13 @@ void FusedBatchNormActGradKernel(const Context &dev_ctx, } epsilon1 = std::max(epsilon1, CUDNN_BN_MIN_EPSILON); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetTensorNdDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnSetTensorNdDescriptor( data_desc_, CudnnDataType::type, x_dims.size() > 3 ? x_dims.size() : 4, dims.data(), strides.data())); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnDeriveBNTensorDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnDeriveBNTensorDescriptor( bn_param_desc_, data_desc_, mode_)); const auto *saved_mean_data = saved_mean.template data(); @@ -156,7 +156,7 @@ void FusedBatchNormActGradKernel(const Context &dev_ctx, scope_act_desc.descriptor(act_type); // --------------- cudnn batchnorm workspace --------------- PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnGetBatchNormalizationBackwardExWorkspaceSize( + common::dynload::cudnnGetBatchNormalizationBackwardExWorkspaceSize( /*handle=*/dev_ctx.cudnn_handle(), /*mode=*/mode_, /*bnOps=*/bnOps_, @@ -173,7 +173,7 @@ void FusedBatchNormActGradKernel(const Context &dev_ctx, (workspace_size + phi::SizeOf(x.dtype()) - 1) / phi::SizeOf(x.dtype()))}); workspace_ptr = dev_ctx.template Alloc(&workspace_tensor); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnBatchNormalizationBackwardEx( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnBatchNormalizationBackwardEx( /*handle=*/dev_ctx.cudnn_handle(), /*mode=*/mode_, /*bnOps=*/bnOps_, @@ -210,9 +210,9 @@ void FusedBatchNormActGradKernel(const Context &dev_ctx, // clean when exit. PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnDestroyTensorDescriptor(data_desc_)); + common::dynload::cudnnDestroyTensorDescriptor(data_desc_)); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnDestroyTensorDescriptor(bn_param_desc_)); + common::dynload::cudnnDestroyTensorDescriptor(bn_param_desc_)); #else PADDLE_THROW(phi::errors::Unimplemented( "The fused_batch_norm_act operator is not supported on GPU " diff --git a/paddle/phi/kernels/fusion/gpu/fused_bn_activation_kernel.cu b/paddle/phi/kernels/fusion/gpu/fused_bn_activation_kernel.cu index 700141f1e03318..904660c7f158b7 100644 --- a/paddle/phi/kernels/fusion/gpu/fused_bn_activation_kernel.cu +++ b/paddle/phi/kernels/fusion/gpu/fused_bn_activation_kernel.cu @@ -113,22 +113,22 @@ void FusedBatchNormActKernel(const Context &dev_ctx, cudnnBatchNormMode_t mode_ = CUDNN_BATCHNORM_SPATIAL_PERSISTENT; PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnCreateTensorDescriptor(&data_desc_)); + common::dynload::cudnnCreateTensorDescriptor(&data_desc_)); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnCreateTensorDescriptor(&bn_param_desc_)); + common::dynload::cudnnCreateTensorDescriptor(&bn_param_desc_)); VLOG(3) << "Setting descriptors."; std::vector dims = {N, C, H, W, D}; std::vector strides = {H * W * D * C, 1, W * D * C, D * C, C}; - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetTensorNdDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnSetTensorNdDescriptor( data_desc_, CudnnDataType::type, x_dims.size() > 3 ? x_dims.size() : 4, dims.data(), strides.data())); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnDeriveBNTensorDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnDeriveBNTensorDescriptor( bn_param_desc_, data_desc_, mode_)); double this_factor = 1. - momentum; @@ -149,7 +149,7 @@ void FusedBatchNormActKernel(const Context &dev_ctx, // --------------- cudnn batchnorm workspace --------------- PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize( + common::dynload::cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize( /*handle=*/handle, /*mode=*/mode_, /*bnOps=*/bnOps_, @@ -162,7 +162,7 @@ void FusedBatchNormActKernel(const Context &dev_ctx, // -------------- cudnn batchnorm reserve space -------------- PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnGetBatchNormalizationTrainingExReserveSpaceSize( + common::dynload::cudnnGetBatchNormalizationTrainingExReserveSpaceSize( /*handle=*/handle, /*mode=*/mode_, /*bnOps=*/bnOps_, @@ -179,7 +179,7 @@ void FusedBatchNormActKernel(const Context &dev_ctx, workspace_ptr = dev_ctx.template Alloc(&workspace_tensor); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnBatchNormalizationForwardTrainingEx( + common::dynload::cudnnBatchNormalizationForwardTrainingEx( handle, mode_, bnOps_, @@ -208,9 +208,9 @@ void FusedBatchNormActKernel(const Context &dev_ctx, // clean when exit. PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnDestroyTensorDescriptor(data_desc_)); + common::dynload::cudnnDestroyTensorDescriptor(data_desc_)); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnDestroyTensorDescriptor(bn_param_desc_)); + common::dynload::cudnnDestroyTensorDescriptor(bn_param_desc_)); #else PADDLE_THROW(phi::errors::Unimplemented( "The fused_batch_norm_act operator is not supported on GPU " diff --git a/paddle/phi/kernels/fusion/gpu/fused_bn_add_activation_grad_kernel.cu b/paddle/phi/kernels/fusion/gpu/fused_bn_add_activation_grad_kernel.cu index 894903fb0fab83..850af21da4578a 100644 --- a/paddle/phi/kernels/fusion/gpu/fused_bn_add_activation_grad_kernel.cu +++ b/paddle/phi/kernels/fusion/gpu/fused_bn_add_activation_grad_kernel.cu @@ -115,9 +115,9 @@ void FusedBatchNormAddActGradKernel(const Context &dev_ctx, cudnnBatchNormMode_t mode_ = CUDNN_BATCHNORM_SPATIAL_PERSISTENT; PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnCreateTensorDescriptor(&data_desc_)); + common::dynload::cudnnCreateTensorDescriptor(&data_desc_)); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnCreateTensorDescriptor(&bn_param_desc_)); + common::dynload::cudnnCreateTensorDescriptor(&bn_param_desc_)); if (epsilon1 <= CUDNN_BN_MIN_EPSILON - FLT_EPSILON) { LOG(ERROR) << "Provided epsilon is smaller than " << "CUDNN_BN_MIN_EPSILON. Setting it to " @@ -125,13 +125,13 @@ void FusedBatchNormAddActGradKernel(const Context &dev_ctx, } epsilon1 = std::max(epsilon1, CUDNN_BN_MIN_EPSILON); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetTensorNdDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnSetTensorNdDescriptor( data_desc_, CudnnDataType::type, in_dims.size() > 3 ? in_dims.size() : 4, dims.data(), strides.data())); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnDeriveBNTensorDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnDeriveBNTensorDescriptor( bn_param_desc_, data_desc_, mode_)); const auto *saved_mean_ptr = &saved_mean; @@ -151,7 +151,7 @@ void FusedBatchNormAddActGradKernel(const Context &dev_ctx, scope_act_desc.descriptor(act_type); // --------------- cudnn batchnorm workspace --------------- PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnGetBatchNormalizationBackwardExWorkspaceSize( + common::dynload::cudnnGetBatchNormalizationBackwardExWorkspaceSize( /*handle=*/dev_ctx.cudnn_handle(), /*mode=*/mode_, /*bnOps=*/bnOps_, @@ -167,7 +167,7 @@ void FusedBatchNormAddActGradKernel(const Context &dev_ctx, workspace_tensor.Resize({static_cast(workspace_size)}); workspace_ptr = dev_ctx.template Alloc(&workspace_tensor); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnBatchNormalizationBackwardEx( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnBatchNormalizationBackwardEx( /*handle=*/dev_ctx.cudnn_handle(), /*mode=*/mode_, /*bnOps=*/bnOps_, @@ -201,9 +201,9 @@ void FusedBatchNormAddActGradKernel(const Context &dev_ctx, // clean when exit. PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnDestroyTensorDescriptor(data_desc_)); + common::dynload::cudnnDestroyTensorDescriptor(data_desc_)); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnDestroyTensorDescriptor(bn_param_desc_)); + common::dynload::cudnnDestroyTensorDescriptor(bn_param_desc_)); #else PADDLE_THROW(phi::errors::Unimplemented( "The fused_bn_add_activation operator is not supported on GPU " diff --git a/paddle/phi/kernels/fusion/gpu/fused_bn_add_activation_kernel.cu b/paddle/phi/kernels/fusion/gpu/fused_bn_add_activation_kernel.cu index 52152476e4aca1..96761aa3baac1f 100644 --- a/paddle/phi/kernels/fusion/gpu/fused_bn_add_activation_kernel.cu +++ b/paddle/phi/kernels/fusion/gpu/fused_bn_add_activation_kernel.cu @@ -100,20 +100,20 @@ void FusedBatchNormAddActKernel(const Context &dev_ctx, cudnnBatchNormMode_t mode_ = CUDNN_BATCHNORM_SPATIAL_PERSISTENT; PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnCreateTensorDescriptor(&data_desc_)); + common::dynload::cudnnCreateTensorDescriptor(&data_desc_)); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnCreateTensorDescriptor(&bn_param_desc_)); + common::dynload::cudnnCreateTensorDescriptor(&bn_param_desc_)); std::vector dims = {N, C, H, W, D}; std::vector strides = {H * W * D * C, 1, W * D * C, D * C, C}; - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetTensorNdDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnSetTensorNdDescriptor( data_desc_, CudnnDataType::type, in_dims.size() > 3 ? in_dims.size() : 4, dims.data(), strides.data())); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnDeriveBNTensorDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnDeriveBNTensorDescriptor( bn_param_desc_, data_desc_, mode_)); double this_factor = 1. - momentum; @@ -136,7 +136,7 @@ void FusedBatchNormAddActKernel(const Context &dev_ctx, // --------------- cudnn batchnorm workspace --------------- PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize( + common::dynload::cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize( /*handle=*/handle, /*mode=*/mode_, /*bnOps=*/bnOps_, @@ -149,7 +149,7 @@ void FusedBatchNormAddActKernel(const Context &dev_ctx, // -------------- cudnn batchnorm reserve space -------------- PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnGetBatchNormalizationTrainingExReserveSpaceSize( + common::dynload::cudnnGetBatchNormalizationTrainingExReserveSpaceSize( /*handle=*/handle, /*mode=*/mode_, /*bnOps=*/bnOps_, @@ -168,7 +168,7 @@ void FusedBatchNormAddActKernel(const Context &dev_ctx, &workspace_tensor, workspace_tensor.numel() * sizeof(T)); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnBatchNormalizationForwardTrainingEx( + common::dynload::cudnnBatchNormalizationForwardTrainingEx( handle, mode_, bnOps_, @@ -203,9 +203,9 @@ void FusedBatchNormAddActKernel(const Context &dev_ctx, // clean when exit. PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnDestroyTensorDescriptor(data_desc_)); + common::dynload::cudnnDestroyTensorDescriptor(data_desc_)); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnDestroyTensorDescriptor(bn_param_desc_)); + common::dynload::cudnnDestroyTensorDescriptor(bn_param_desc_)); #else PADDLE_THROW(phi::errors::Unimplemented( "The fused_bn_add_activation operator is not supported on GPU " diff --git a/paddle/phi/kernels/fusion/gpu/fused_embedding_eltwise_layernorm_kernel.cu b/paddle/phi/kernels/fusion/gpu/fused_embedding_eltwise_layernorm_kernel.cu index 2670c80c1aa07c..bb5008146e0d1c 100644 --- a/paddle/phi/kernels/fusion/gpu/fused_embedding_eltwise_layernorm_kernel.cu +++ b/paddle/phi/kernels/fusion/gpu/fused_embedding_eltwise_layernorm_kernel.cu @@ -15,11 +15,11 @@ #include #include +#include "paddle/common/enforce.h" +#include "paddle/common/errors.h" #include "paddle/common/float16.h" #include "paddle/phi/common/memory_utils.h" #include "paddle/phi/common/place.h" -#include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/emb_eltwise_layer_norm_functor.h" diff --git a/paddle/phi/kernels/fusion/gpu/fused_fc_elementwise_layernorm_kernel.cu b/paddle/phi/kernels/fusion/gpu/fused_fc_elementwise_layernorm_kernel.cu index 6fd829aa757346..cbb2e1b8fb0321 100644 --- a/paddle/phi/kernels/fusion/gpu/fused_fc_elementwise_layernorm_kernel.cu +++ b/paddle/phi/kernels/fusion/gpu/fused_fc_elementwise_layernorm_kernel.cu @@ -27,11 +27,11 @@ namespace cub = hipcub; #include #endif +#include "paddle/common/enforce.h" +#include "paddle/common/errors.h" #include "paddle/common/float16.h" #include "paddle/phi/backends/gpu/gpu_device_function.h" #include "paddle/phi/backends/gpu/gpu_launch_config.h" -#include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/blas/blas.h" diff --git a/paddle/phi/kernels/fusion/gpu/fused_rope_grad_kernel.cu b/paddle/phi/kernels/fusion/gpu/fused_rope_grad_kernel.cu index 70ea70912f6397..b1664e4b15d48e 100644 --- a/paddle/phi/kernels/fusion/gpu/fused_rope_grad_kernel.cu +++ b/paddle/phi/kernels/fusion/gpu/fused_rope_grad_kernel.cu @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/common/enforce.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/common/amp_type_traits.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/aligned_vector.h" #include "paddle/phi/kernels/fusion/gpu/fused_rope_utils.h" diff --git a/paddle/phi/kernels/fusion/gpu/fused_rope_kernel.cu b/paddle/phi/kernels/fusion/gpu/fused_rope_kernel.cu index 6e032211cc6a09..ecb439c906112d 100644 --- a/paddle/phi/kernels/fusion/gpu/fused_rope_kernel.cu +++ b/paddle/phi/kernels/fusion/gpu/fused_rope_kernel.cu @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/common/enforce.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/common/amp_type_traits.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/aligned_vector.h" #include "paddle/phi/kernels/fusion/gpu/fused_rope_utils.h" diff --git a/paddle/phi/kernels/fusion/gpu/fused_softmax_mask_upper_triangle_grad_kernel.cu b/paddle/phi/kernels/fusion/gpu/fused_softmax_mask_upper_triangle_grad_kernel.cu index 6c7fe36d364576..a08af5a5b89581 100644 --- a/paddle/phi/kernels/fusion/gpu/fused_softmax_mask_upper_triangle_grad_kernel.cu +++ b/paddle/phi/kernels/fusion/gpu/fused_softmax_mask_upper_triangle_grad_kernel.cu @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/common/errors.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/core/generator.h" #include "paddle/phi/core/kernel_registry.h" diff --git a/paddle/phi/kernels/fusion/gpu/fused_softmax_mask_upper_triangle_kernel.cu b/paddle/phi/kernels/fusion/gpu/fused_softmax_mask_upper_triangle_kernel.cu index 30e5599aac2363..a0b7cf5b2689ce 100644 --- a/paddle/phi/kernels/fusion/gpu/fused_softmax_mask_upper_triangle_kernel.cu +++ b/paddle/phi/kernels/fusion/gpu/fused_softmax_mask_upper_triangle_kernel.cu @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/common/errors.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/core/generator.h" #include "paddle/phi/core/kernel_registry.h" diff --git a/paddle/phi/kernels/fusion/gpu/fusion_group_kernel.cu b/paddle/phi/kernels/fusion/gpu/fusion_group_kernel.cu index eee5a4b84b54a6..91f5347276f9d2 100644 --- a/paddle/phi/kernels/fusion/gpu/fusion_group_kernel.cu +++ b/paddle/phi/kernels/fusion/gpu/fusion_group_kernel.cu @@ -14,10 +14,10 @@ #include "glog/logging.h" +#include "paddle/common/data_type.h" #include "paddle/phi/backends/device_code.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/core/utils/data_type.h" namespace phi { namespace fusion { diff --git a/paddle/phi/kernels/fusion/gpu/fusion_transpose_flatten_concat_kernel.cu b/paddle/phi/kernels/fusion/gpu/fusion_transpose_flatten_concat_kernel.cu index 9921a2db5ad173..67856582b1dbd3 100644 --- a/paddle/phi/kernels/fusion/gpu/fusion_transpose_flatten_concat_kernel.cu +++ b/paddle/phi/kernels/fusion/gpu/fusion_transpose_flatten_concat_kernel.cu @@ -15,10 +15,10 @@ #include #include +#include "paddle/common/enforce.h" +#include "paddle/common/errors.h" #include "paddle/common/float16.h" #include "paddle/phi/backends/gpu/gpu_dnn.h" -#include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/common_shape.h" @@ -51,9 +51,9 @@ void TransposeFlattenConcatFusionKernel( cudnnTensorDescriptor_t in_desc; cudnnTensorDescriptor_t out_desc; PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnCreateTensorDescriptor(&in_desc)); + common::dynload::cudnnCreateTensorDescriptor(&in_desc)); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnCreateTensorDescriptor(&out_desc)); + common::dynload::cudnnCreateTensorDescriptor(&out_desc)); cudnnDataType_t cudnn_dtype = CudnnDataType::type; auto handle = dev_ctx.cudnn_handle(); @@ -91,12 +91,12 @@ void TransposeFlattenConcatFusionKernel( dims_y[i] = 1; } - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetTensorNdDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnSetTensorNdDescriptor( in_desc, cudnn_dtype, max_dim, dims_y.data(), stride_x.data())); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetTensorNdDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnSetTensorNdDescriptor( out_desc, cudnn_dtype, max_dim, dims_y.data(), stride_y.data())); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnTransformTensor( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnTransformTensor( handle, CudnnDataType::kOne(), in_desc, @@ -112,9 +112,9 @@ void TransposeFlattenConcatFusionKernel( } } PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnDestroyTensorDescriptor(in_desc)); + common::dynload::cudnnDestroyTensorDescriptor(in_desc)); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnDestroyTensorDescriptor(out_desc)); + common::dynload::cudnnDestroyTensorDescriptor(out_desc)); #else PADDLE_THROW(phi::errors::Unimplemented( "The fusion_transpose_flatten_concat operator is not supported on HIP.")); diff --git a/paddle/phi/kernels/fusion/gpu/multihead_matmul_kernel.cu b/paddle/phi/kernels/fusion/gpu/multihead_matmul_kernel.cu index 87bb36f1162e1a..0efb0718a36fe0 100644 --- a/paddle/phi/kernels/fusion/gpu/multihead_matmul_kernel.cu +++ b/paddle/phi/kernels/fusion/gpu/multihead_matmul_kernel.cu @@ -15,9 +15,9 @@ #include #include +#include "paddle/common/enforce.h" +#include "paddle/common/errors.h" #include "paddle/common/float16.h" -#include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/blas/blas.h" diff --git a/paddle/phi/kernels/fusion/onednn/fusion_gru_kernel.cc b/paddle/phi/kernels/fusion/onednn/fusion_gru_kernel.cc index e3fa939aad7537..51b1dac9f07618 100644 --- a/paddle/phi/kernels/fusion/onednn/fusion_gru_kernel.cc +++ b/paddle/phi/kernels/fusion/onednn/fusion_gru_kernel.cc @@ -12,12 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/common/data_type.h" +#include "paddle/common/enforce.h" +#include "paddle/common/errors.h" #include "paddle/phi/backends/onednn/onednn_reuse.h" #include "paddle/phi/core/compat/convert_utils.h" -#include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/core/expect.h" -#include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/core/kernel_registry.h" diff --git a/paddle/phi/kernels/fusion/xpu/fused_feedforward_grad_kernel.cc b/paddle/phi/kernels/fusion/xpu/fused_feedforward_grad_kernel.cc index 3448efca7c3ab1..29f74e8e1fe237 100644 --- a/paddle/phi/kernels/fusion/xpu/fused_feedforward_grad_kernel.cc +++ b/paddle/phi/kernels/fusion/xpu/fused_feedforward_grad_kernel.cc @@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/common/ddim.h" #include "paddle/phi/backends/xpu/enforce_xpu.h" #include "paddle/phi/backends/xpu/xpu_context.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/blas/blas.h" diff --git a/paddle/phi/kernels/fusion/xpu/fused_feedforward_kernel.cc b/paddle/phi/kernels/fusion/xpu/fused_feedforward_kernel.cc index 221305014190bd..dab55c1bbc10ae 100644 --- a/paddle/phi/kernels/fusion/xpu/fused_feedforward_kernel.cc +++ b/paddle/phi/kernels/fusion/xpu/fused_feedforward_kernel.cc @@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/common/ddim.h" #include "paddle/phi/backends/xpu/enforce_xpu.h" #include "paddle/phi/backends/xpu/xpu_context.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/blas/blas.h" diff --git a/paddle/phi/kernels/gpu/allclose_kernel.cu b/paddle/phi/kernels/gpu/allclose_kernel.cu index 99ccfcd8667e6d..c021667bbec3fb 100644 --- a/paddle/phi/kernels/gpu/allclose_kernel.cu +++ b/paddle/phi/kernels/gpu/allclose_kernel.cu @@ -16,9 +16,9 @@ #include "glog/logging.h" +#include "paddle/common/enforce.h" #include "paddle/phi/common/amp_type_traits.h" #include "paddle/phi/common/data_type.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_registry.h" namespace phi { diff --git a/paddle/phi/kernels/gpu/arange_kernel.cu b/paddle/phi/kernels/gpu/arange_kernel.cu index 8b59c79f0a4e08..c4b1f46f5ac525 100644 --- a/paddle/phi/kernels/gpu/arange_kernel.cu +++ b/paddle/phi/kernels/gpu/arange_kernel.cu @@ -15,11 +15,11 @@ #include "paddle/phi/kernels/arange_kernel.h" #include "paddle/common/bfloat16.h" +#include "paddle/common/enforce.h" +#include "paddle/common/errors.h" #include "paddle/common/float16.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/common/amp_type_traits.h" -#include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/range_function.h" diff --git a/paddle/phi/kernels/gpu/arg_min_max_kernel.cu b/paddle/phi/kernels/gpu/arg_min_max_kernel.cu index caa635255b9878..191a62698c817f 100644 --- a/paddle/phi/kernels/gpu/arg_min_max_kernel.cu +++ b/paddle/phi/kernels/gpu/arg_min_max_kernel.cu @@ -28,8 +28,8 @@ namespace cub = hipcub; #endif #include -#include "paddle/phi/core/ddim.h" -#include "paddle/phi/core/utils/data_type.h" +#include "paddle/common/data_type.h" +#include "paddle/common/ddim.h" #include "paddle/phi/kernels/funcs/math_function.h" namespace phi { diff --git a/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu b/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu index c3c353859728b7..53e151ed27faa5 100644 --- a/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu @@ -14,10 +14,10 @@ #include "glog/logging.h" +#include "paddle/common/enforce.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_dnn.h" #include "paddle/phi/common/layout.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/flags.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/batch_norm_kernel.h" @@ -675,9 +675,9 @@ void BatchNormGradFunctor(const Context &ctx, cudnnBatchNormMode_t mode_; PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnCreateTensorDescriptor(&data_desc_)); + common::dynload::cudnnCreateTensorDescriptor(&data_desc_)); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnCreateTensorDescriptor(&bn_param_desc_)); + common::dynload::cudnnCreateTensorDescriptor(&bn_param_desc_)); #endif if (epsilon <= CUDNN_BN_MIN_EPSILON - FLT_EPSILON) { LOG(ERROR) << "Provided epsilon is smaller than " @@ -714,13 +714,13 @@ void BatchNormGradFunctor(const Context &ctx, // platform::dynload::miopenDeriveBNTensorDescriptor(bn_param_desc_, // data_desc_, mode_)); #else - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetTensorNdDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnSetTensorNdDescriptor( data_desc_, CudnnDataType::type, x_dims.size() > 3 ? x_dims.size() : 4, dims.data(), strides.data())); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnDeriveBNTensorDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnDeriveBNTensorDescriptor( bn_param_desc_, data_desc_, mode_)); #endif @@ -951,7 +951,7 @@ void BatchNormGradFunctor(const Context &ctx, auto reserve_space_size = reserve_space->memory_size(); // --------------- cudnn batchnorm workspace --------------- PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnGetBatchNormalizationBackwardExWorkspaceSize( + common::dynload::cudnnGetBatchNormalizationBackwardExWorkspaceSize( /*handle=*/ctx.cudnn_handle(), /*mode=*/mode_, /*bnIps=*/CUDNN_BATCHNORM_OPS_BN, @@ -969,7 +969,7 @@ void BatchNormGradFunctor(const Context &ctx, static_cast(ctx.template Alloc(&workspace_tensor)); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnBatchNormalizationBackwardEx( + common::dynload::cudnnBatchNormalizationBackwardEx( /*handle=*/ctx.cudnn_handle(), /*mode=*/mode_, /*bnOps=*/CUDNN_BATCHNORM_OPS_BN, @@ -1006,7 +1006,7 @@ void BatchNormGradFunctor(const Context &ctx, /*reserveSpaceSizeInBytes=*/reserve_space_size)); #else PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnBatchNormalizationBackward( + common::dynload::cudnnBatchNormalizationBackward( ctx.cudnn_handle(), mode_, CudnnDataType::kOne(), @@ -1136,9 +1136,9 @@ void BatchNormGradFunctor(const Context &ctx, #else // clean when exit. PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnDestroyTensorDescriptor(data_desc_)); + common::dynload::cudnnDestroyTensorDescriptor(data_desc_)); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnDestroyTensorDescriptor(bn_param_desc_)); + common::dynload::cudnnDestroyTensorDescriptor(bn_param_desc_)); #endif } else { diff --git a/paddle/phi/kernels/gpu/batch_norm_kernel.cu b/paddle/phi/kernels/gpu/batch_norm_kernel.cu index 20aa02a5f24856..6609d555871473 100644 --- a/paddle/phi/kernels/gpu/batch_norm_kernel.cu +++ b/paddle/phi/kernels/gpu/batch_norm_kernel.cu @@ -22,10 +22,10 @@ namespace cub = hipcub; #include "glog/logging.h" +#include "paddle/common/enforce.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_dnn.h" #include "paddle/phi/common/layout.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/flags.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/batch_norm_kernel.h" @@ -618,9 +618,9 @@ void BatchNormKernel(const Context &ctx, cudnnBatchNormMode_t mode_; PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnCreateTensorDescriptor(&data_desc_)); + common::dynload::cudnnCreateTensorDescriptor(&data_desc_)); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnCreateTensorDescriptor(&bn_param_desc_)); + common::dynload::cudnnCreateTensorDescriptor(&bn_param_desc_)); #endif if (epsilon <= CUDNN_BN_MIN_EPSILON - FLT_EPSILON) { @@ -671,14 +671,14 @@ void BatchNormKernel(const Context &ctx, // platform::dynload::miopenDeriveBNTensorDescriptor( // bn_param_desc_, data_desc_, test_mode ? miopenBNSpatial : mode_)); #else - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetTensorNdDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnSetTensorNdDescriptor( data_desc_, CudnnDataType::type, x_dims.size() > 3 ? x_dims.size() : 4, dims.data(), strides.data())); // Note: PERSISTENT not implemented for inference - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnDeriveBNTensorDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnDeriveBNTensorDescriptor( bn_param_desc_, data_desc_, test_mode ? CUDNN_BATCHNORM_SPATIAL : mode_)); #endif @@ -846,7 +846,7 @@ void BatchNormKernel(const Context &ctx, } } else { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnBatchNormalizationForwardInference( + common::dynload::cudnnBatchNormalizationForwardInference( handle, // Note: PERSISTENT not implemented for inference CUDNN_BATCHNORM_SPATIAL, @@ -1132,7 +1132,7 @@ void BatchNormKernel(const Context &ctx, "The argument ReserveSpace of batch_norm op is not found.")); // --------------- cudnn batchnorm workspace --------------- PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload:: + common::dynload:: cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize( /*handle=*/handle, /*mode=*/mode_, @@ -1146,13 +1146,14 @@ void BatchNormKernel(const Context &ctx, // -------------- cudnn batchnorm reserve space -------------- PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnGetBatchNormalizationTrainingExReserveSpaceSize( - /*handle=*/handle, - /*mode=*/mode_, - /*bnOps=*/CUDNN_BATCHNORM_OPS_BN, - /*activationDesc=*/nullptr, - /*xDesc=*/data_desc_, - /*sizeInBytes=*/&reserve_space_size)); + common::dynload:: + cudnnGetBatchNormalizationTrainingExReserveSpaceSize( + /*handle=*/handle, + /*mode=*/mode_, + /*bnOps=*/CUDNN_BATCHNORM_OPS_BN, + /*activationDesc=*/nullptr, + /*xDesc=*/data_desc_, + /*sizeInBytes=*/&reserve_space_size)); reserve_space->Resize({static_cast(reserve_space_size)}); reserve_space_ptr = @@ -1161,7 +1162,7 @@ void BatchNormKernel(const Context &ctx, workspace_ptr = static_cast(ctx.template Alloc(&workspace_tensor)); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnBatchNormalizationForwardTrainingEx( + common::dynload::cudnnBatchNormalizationForwardTrainingEx( handle, mode_, CUDNN_BATCHNORM_OPS_BN, @@ -1189,7 +1190,7 @@ void BatchNormKernel(const Context &ctx, reserve_space_size)); #else PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnBatchNormalizationForwardTraining( + common::dynload::cudnnBatchNormalizationForwardTraining( handle, mode_, CudnnDataType::kOne(), @@ -1228,9 +1229,9 @@ void BatchNormKernel(const Context &ctx, #else // clean when exit. PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnDestroyTensorDescriptor(data_desc_)); + common::dynload::cudnnDestroyTensorDescriptor(data_desc_)); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnDestroyTensorDescriptor(bn_param_desc_)); + common::dynload::cudnnDestroyTensorDescriptor(bn_param_desc_)); #endif } diff --git a/paddle/phi/kernels/gpu/broadcast_tensors_grad_kernel.cu b/paddle/phi/kernels/gpu/broadcast_tensors_grad_kernel.cu index 40ea1f195069e9..c77e9dbcbb1eb7 100644 --- a/paddle/phi/kernels/gpu/broadcast_tensors_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/broadcast_tensors_grad_kernel.cu @@ -16,9 +16,9 @@ #include +#include "paddle/common/enforce.h" #include "paddle/phi/common/amp_type_traits.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/reduce_function.h" #include "paddle/phi/kernels/primitive/functor_primitives.h" diff --git a/paddle/phi/kernels/gpu/cholesky_solve_kernel.cu b/paddle/phi/kernels/gpu/cholesky_solve_kernel.cu index add5b8b7448c9a..b8fe833c2cb431 100644 --- a/paddle/phi/kernels/gpu/cholesky_solve_kernel.cu +++ b/paddle/phi/kernels/gpu/cholesky_solve_kernel.cu @@ -17,8 +17,8 @@ #include "paddle/common/backends/dynload/cusolver.h" #include "paddle/common/complex.h" +#include "paddle/common/enforce.h" #include "paddle/phi/backends/gpu/gpu_context.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/lapack/lapack_function.h" #include "paddle/phi/kernels/impl/cholesky_solve_kernel_impl.h" diff --git a/paddle/phi/kernels/gpu/conv_transpose_grad_kernel.cu b/paddle/phi/kernels/gpu/conv_transpose_grad_kernel.cu index c64facc1e6879b..88cce731ef2495 100644 --- a/paddle/phi/kernels/gpu/conv_transpose_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/conv_transpose_grad_kernel.cu @@ -14,8 +14,8 @@ #include "paddle/phi/kernels/conv_transpose_grad_kernel.h" +#include "paddle/common/ddim.h" #include "paddle/phi/common/layout.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/cpu/conv_util.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/phi/kernels/gpu/conv_transpose_kernel.cu b/paddle/phi/kernels/gpu/conv_transpose_kernel.cu index bee31450cbf70f..647f9316dc6caa 100644 --- a/paddle/phi/kernels/gpu/conv_transpose_kernel.cu +++ b/paddle/phi/kernels/gpu/conv_transpose_kernel.cu @@ -14,8 +14,8 @@ #include "paddle/phi/kernels/conv_transpose_kernel.h" +#include "paddle/common/ddim.h" #include "paddle/phi/common/layout.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/cpu/conv_util.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/phi/kernels/gpu/cross_entropy_kernel.cu b/paddle/phi/kernels/gpu/cross_entropy_kernel.cu index 63e52527cb9cdd..53502c096e7286 100644 --- a/paddle/phi/kernels/gpu/cross_entropy_kernel.cu +++ b/paddle/phi/kernels/gpu/cross_entropy_kernel.cu @@ -764,7 +764,7 @@ static void SoftmaxWithCrossEntropySoftLabel(const GPUContext& dev_ctx, #ifdef PADDLE_WITH_HIP auto mode = axis == rank - 1 ? MIOPEN_SOFTMAX_MODE_INSTANCE : MIOPEN_SOFTMAX_MODE_CHANNEL; - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSoftmaxForward_V2( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::miopenSoftmaxForward_V2( handle, phi::backends::gpu::CudnnDataType::kOne(), descp, @@ -777,7 +777,7 @@ static void SoftmaxWithCrossEntropySoftLabel(const GPUContext& dev_ctx, #else auto mode = axis == rank - 1 ? CUDNN_SOFTMAX_MODE_INSTANCE : CUDNN_SOFTMAX_MODE_CHANNEL; - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSoftmaxForward( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnSoftmaxForward( handle, CUDNN_SOFTMAX_LOG, mode, @@ -1196,7 +1196,7 @@ static void SoftmaxWithCrossEntropyHardLabel(const GPUContext& dev_ctx, #ifdef PADDLE_WITH_HIP auto mode = axis == rank - 1 ? MIOPEN_SOFTMAX_MODE_INSTANCE : MIOPEN_SOFTMAX_MODE_CHANNEL; - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSoftmaxForward_V2( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::miopenSoftmaxForward_V2( handle, phi::backends::gpu::CudnnDataType::kOne(), descp, @@ -1209,7 +1209,7 @@ static void SoftmaxWithCrossEntropyHardLabel(const GPUContext& dev_ctx, #else auto mode = axis == rank - 1 ? CUDNN_SOFTMAX_MODE_INSTANCE : CUDNN_SOFTMAX_MODE_CHANNEL; - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSoftmaxForward( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnSoftmaxForward( handle, CUDNN_SOFTMAX_LOG, mode, diff --git a/paddle/phi/kernels/gpu/cudnn_lstm_cache.h b/paddle/phi/kernels/gpu/cudnn_lstm_cache.h index 88b20c4dd80977..599878372b6cc8 100644 --- a/paddle/phi/kernels/gpu/cudnn_lstm_cache.h +++ b/paddle/phi/kernels/gpu/cudnn_lstm_cache.h @@ -92,7 +92,7 @@ class ScopedRNNBase { size_t state_size; if (!initialized_) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnDropoutGetStatesSize(handle, &state_size)); + common::dynload::cudnnDropoutGetStatesSize(handle, &state_size)); phi::DeviceContextPool& pool = phi::DeviceContextPool::Instance(); auto* dev_ctx = reinterpret_cast(pool.Get(place)); dropout_state->Resize({static_cast(state_size)}); @@ -107,7 +107,7 @@ class ScopedRNNBase { state_size); // ------------------- cudnn rnn descriptors --------------------- - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetRNNDescriptor_v6( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnSetRNNDescriptor_v6( handle, rnn_desc_.desc(), hidden_size_, @@ -121,14 +121,14 @@ class ScopedRNNBase { #if CUDNN_VERSION >= 7201 if (!sequence_length.empty()) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetRNNPaddingMode( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnSetRNNPaddingMode( rnn_desc_.desc(), CUDNN_RNN_PADDED_IO_ENABLED)); } #endif // ------------------- cudnn weights_size --------------------- size_t weights_size_; - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnGetRNNParamsSize( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnGetRNNParamsSize( handle, rnn_desc_.desc(), x_descs_[0], &weights_size_, cudnn_type)); PADDLE_ENFORCE_EQ( weights_size_, @@ -143,12 +143,12 @@ class ScopedRNNBase { weight_desc_.descriptor(layout, dim_w); // ------------------- cudnn workspace, reserve size --------------------- PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnGetRNNWorkspaceSize(handle, - rnn_desc_.desc(), - seq_length_, - x_descs_.data(), - workspace_size)); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnGetRNNTrainingReserveSize( + common::dynload::cudnnGetRNNWorkspaceSize(handle, + rnn_desc_.desc(), + seq_length_, + x_descs_.data(), + workspace_size)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnGetRNNTrainingReserveSize( handle, rnn_desc_.desc(), seq_length_, x_descs_.data(), reserve_size)); } cudnnTensorDescriptor_t* x_descs() { return x_descs_.data(); } diff --git a/paddle/phi/kernels/gpu/cudnn_lstm_grad_kernel.cu b/paddle/phi/kernels/gpu/cudnn_lstm_grad_kernel.cu index 661a1dd90e7e9b..ccfadb99b4fefc 100644 --- a/paddle/phi/kernels/gpu/cudnn_lstm_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/cudnn_lstm_grad_kernel.cu @@ -148,36 +148,36 @@ void CudnnLSTMGradKernel( if (!has_seq_length) { // This interface is used when the input/output is unpadded. #ifdef PADDLE_WITH_HIP - PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenRNNBackwardData(handle, - rnn.rnn_desc(), - seq_length, - rnn.y_descs(), - out_data, - rnn.y_descs(), - out_grad_data, - rnn.last_h_desc(), - last_h_grad_data, - rnn.last_c_desc(), - last_c_grad_data, - rnn.weight_desc(), - weight_data, - rnn.init_h_desc(), - init_h_data, - rnn.init_c_desc(), - init_c_data, - rnn.x_descs(), - in_grad_data, - rnn.init_h_desc(), - init_h_grad_data, - rnn.init_c_desc(), - init_c_grad_data, - workspace_data_.data(), - workspace_size, - const_cast(reserve_data), - reserve_size)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::miopenRNNBackwardData( + handle, + rnn.rnn_desc(), + seq_length, + rnn.y_descs(), + out_data, + rnn.y_descs(), + out_grad_data, + rnn.last_h_desc(), + last_h_grad_data, + rnn.last_c_desc(), + last_c_grad_data, + rnn.weight_desc(), + weight_data, + rnn.init_h_desc(), + init_h_data, + rnn.init_c_desc(), + init_c_data, + rnn.x_descs(), + in_grad_data, + rnn.init_h_desc(), + init_h_grad_data, + rnn.init_c_desc(), + init_c_grad_data, + workspace_data_.data(), + workspace_size, + const_cast(reserve_data), + reserve_size)); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenRNNBackwardWeights( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::miopenRNNBackwardWeights( handle, rnn.rnn_desc(), seq_length, @@ -194,36 +194,36 @@ void CudnnLSTMGradKernel( const_cast(reserve_data), reserve_size)); #else - PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnRNNBackwardData(handle, - rnn.rnn_desc(), - seq_length, - rnn.y_descs(), - out_data, - rnn.y_descs(), - out_grad_data, - rnn.last_h_desc(), - last_h_grad_data, - rnn.last_c_desc(), - last_c_grad_data, - rnn.weight_desc(), - weight_data, - rnn.init_h_desc(), - init_h_data, - rnn.init_c_desc(), - init_c_data, - rnn.x_descs(), - in_grad_data, - rnn.init_h_desc(), - init_h_grad_data, - rnn.init_c_desc(), - init_c_grad_data, - workspace_data_.data(), - workspace_size, - const_cast(reserve_data), - reserve_size)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnRNNBackwardData( + handle, + rnn.rnn_desc(), + seq_length, + rnn.y_descs(), + out_data, + rnn.y_descs(), + out_grad_data, + rnn.last_h_desc(), + last_h_grad_data, + rnn.last_c_desc(), + last_c_grad_data, + rnn.weight_desc(), + weight_data, + rnn.init_h_desc(), + init_h_data, + rnn.init_c_desc(), + init_c_data, + rnn.x_descs(), + in_grad_data, + rnn.init_h_desc(), + init_h_grad_data, + rnn.init_c_desc(), + init_c_grad_data, + workspace_data_.data(), + workspace_size, + const_cast(reserve_data), + reserve_size)); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnRNNBackwardWeights( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnRNNBackwardWeights( handle, rnn.rnn_desc(), seq_length, @@ -244,7 +244,7 @@ void CudnnLSTMGradKernel( #if !defined(PADDLE_WITH_HIP) && CUDNN_VERSION >= 7201 // for train // This interface is used when the input/output is padded. - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnRNNBackwardDataEx( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnRNNBackwardDataEx( handle, rnn.rnn_desc(), rnn.y_seq_desc(), @@ -276,7 +276,7 @@ void CudnnLSTMGradKernel( const_cast(reserve_data), reserve_size)); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnRNNBackwardWeightsEx( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnRNNBackwardWeightsEx( handle, rnn.rnn_desc(), rnn.x_seq_desc(), diff --git a/paddle/phi/kernels/gpu/cudnn_lstm_kernel.cu b/paddle/phi/kernels/gpu/cudnn_lstm_kernel.cu index f3a03727e0bc45..121f15ec87b7e2 100644 --- a/paddle/phi/kernels/gpu/cudnn_lstm_kernel.cu +++ b/paddle/phi/kernels/gpu/cudnn_lstm_kernel.cu @@ -44,53 +44,53 @@ void LSTMInferece(const bool &has_seq_length, // for inference // This interface is used when the input/output is unpadded. #ifdef PADDLE_WITH_HIP - PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenRNNForwardInference(handle, - rnn->rnn_desc(), - seq_length, - rnn->x_descs(), - x_data, - rnn->init_h_desc(), - init_h_data, - rnn->init_c_desc(), - init_c_data, - rnn->weight_desc(), - w_data, - rnn->y_descs(), - out_data, - rnn->last_h_desc(), - last_h_data, - rnn->last_c_desc(), - last_c_data, - workspace_data->data(), - workspace_size)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::miopenRNNForwardInference( + handle, + rnn->rnn_desc(), + seq_length, + rnn->x_descs(), + x_data, + rnn->init_h_desc(), + init_h_data, + rnn->init_c_desc(), + init_c_data, + rnn->weight_desc(), + w_data, + rnn->y_descs(), + out_data, + rnn->last_h_desc(), + last_h_data, + rnn->last_c_desc(), + last_c_data, + workspace_data->data(), + workspace_size)); #else - PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnRNNForwardInference(handle, - rnn->rnn_desc(), - seq_length, - rnn->x_descs(), - x_data, - rnn->init_h_desc(), - init_h_data, - rnn->init_c_desc(), - init_c_data, - rnn->weight_desc(), - w_data, - rnn->y_descs(), - out_data, - rnn->last_h_desc(), - last_h_data, - rnn->last_c_desc(), - last_c_data, - workspace_data->data(), - workspace_size)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnRNNForwardInference( + handle, + rnn->rnn_desc(), + seq_length, + rnn->x_descs(), + x_data, + rnn->init_h_desc(), + init_h_data, + rnn->init_c_desc(), + init_c_data, + rnn->weight_desc(), + w_data, + rnn->y_descs(), + out_data, + rnn->last_h_desc(), + last_h_data, + rnn->last_c_desc(), + last_c_data, + workspace_data->data(), + workspace_size)); #endif } else { #if !defined(PADDLE_WITH_HIP) && CUDNN_VERSION >= 7201 // for inference // This interface is used when the input/output is padded. - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnRNNForwardInferenceEx( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnRNNForwardInferenceEx( handle, rnn->rnn_desc(), rnn->x_seq_desc(), @@ -269,7 +269,7 @@ void CudnnLSTMKernel( // for train // This interface is used when the input/output is unpadded. #ifdef PADDLE_WITH_HIP - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenRNNForwardTraining( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::miopenRNNForwardTraining( handle, rnn.rnn_desc(), seq_length, @@ -292,34 +292,34 @@ void CudnnLSTMKernel( reserve_data, reserve_size)); #else - PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnRNNForwardTraining(handle, - rnn.rnn_desc(), - seq_length, - rnn.x_descs(), - x_data, - rnn.init_h_desc(), - init_h_data, - rnn.init_c_desc(), - init_c_data, - rnn.weight_desc(), - w_data, - rnn.y_descs(), - out_data, - rnn.last_h_desc(), - last_h_data, - rnn.last_c_desc(), - last_c_data, - workspace_data_.data(), - workspace_size, - reserve_data, - reserve_size)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnRNNForwardTraining( + handle, + rnn.rnn_desc(), + seq_length, + rnn.x_descs(), + x_data, + rnn.init_h_desc(), + init_h_data, + rnn.init_c_desc(), + init_c_data, + rnn.weight_desc(), + w_data, + rnn.y_descs(), + out_data, + rnn.last_h_desc(), + last_h_data, + rnn.last_c_desc(), + last_c_data, + workspace_data_.data(), + workspace_size, + reserve_data, + reserve_size)); #endif } else { #if !defined(PADDLE_WITH_HIP) && CUDNN_VERSION >= 7201 // for train // This interface is used when the input/output is padded. - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnRNNForwardTrainingEx( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnRNNForwardTrainingEx( handle, rnn.rnn_desc(), rnn.x_seq_desc(), diff --git a/paddle/phi/kernels/gpu/cumprod_grad_kernel.cu b/paddle/phi/kernels/gpu/cumprod_grad_kernel.cu index fdd9b4ba499146..ad1df809564d52 100644 --- a/paddle/phi/kernels/gpu/cumprod_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/cumprod_grad_kernel.cu @@ -24,7 +24,7 @@ #include "paddle/phi/kernels/funcs/for_range.h" #include "paddle/phi/kernels/funcs/inclusive_scan.h" // NOTE(@xiongkun): use of IsComplex<> -#include "paddle/phi/core/utils/data_type.h" +#include "paddle/common/data_type.h" namespace phi { diff --git a/paddle/phi/kernels/gpu/decode_jpeg_kernel.cu b/paddle/phi/kernels/gpu/decode_jpeg_kernel.cu index 0b5a10b93d85a1..77fb6b78a25737 100644 --- a/paddle/phi/kernels/gpu/decode_jpeg_kernel.cu +++ b/paddle/phi/kernels/gpu/decode_jpeg_kernel.cu @@ -41,7 +41,7 @@ void DecodeJpegKernel(const Context& dev_ctx, // Create nvJPEG handle if (nvjpeg_handle == nullptr) { nvjpegStatus_t create_status = - phi::dynload::nvjpegCreateSimple(&nvjpeg_handle); + common::dynload::nvjpegCreateSimple(&nvjpeg_handle); PADDLE_ENFORCE_EQ( create_status, @@ -51,7 +51,7 @@ void DecodeJpegKernel(const Context& dev_ctx, nvjpegJpegState_t nvjpeg_state; nvjpegStatus_t state_status = - phi::dynload::nvjpegJpegStateCreate(nvjpeg_handle, &nvjpeg_state); + common::dynload::nvjpegJpegStateCreate(nvjpeg_handle, &nvjpeg_state); PADDLE_ENFORCE_EQ( state_status, @@ -66,13 +66,13 @@ void DecodeJpegKernel(const Context& dev_ctx, auto* x_data = x.data(); nvjpegStatus_t info_status = - phi::dynload::nvjpegGetImageInfo(nvjpeg_handle, - x_data, - (std::size_t)x.numel(), - &components, - &subsampling, - widths, - heights); + common::dynload::nvjpegGetImageInfo(nvjpeg_handle, + x_data, + (std::size_t)x.numel(), + &components, + &subsampling, + widths, + heights); PADDLE_ENFORCE_EQ(info_status, NVJPEG_STATUS_SUCCESS, errors::Fatal("nvjpegGetImageInfo failed: ", info_status)); @@ -91,7 +91,7 @@ void DecodeJpegKernel(const Context& dev_ctx, output_format = NVJPEG_OUTPUT_RGB; output_components = 3; } else { - phi::dynload::nvjpegJpegStateDestroy(nvjpeg_state); + common::dynload::nvjpegJpegStateDestroy(nvjpeg_state); PADDLE_THROW(errors::Fatal( "The provided mode is not supported for JPEG files on GPU")); } @@ -102,7 +102,7 @@ void DecodeJpegKernel(const Context& dev_ctx, output_format = NVJPEG_OUTPUT_RGB; output_components = 3; } else { - phi::dynload::nvjpegJpegStateDestroy(nvjpeg_state); + common::dynload::nvjpegJpegStateDestroy(nvjpeg_state); PADDLE_THROW(errors::Fatal( "The provided mode is not supported for JPEG files on GPU")); } @@ -127,13 +127,13 @@ void DecodeJpegKernel(const Context& dev_ctx, out_image.pitch[c] = width; } - nvjpegStatus_t decode_status = phi::dynload::nvjpegDecode(nvjpeg_handle, - nvjpeg_state, - x_data, - x.numel(), - output_format, - &out_image, - nvjpeg_stream); + nvjpegStatus_t decode_status = common::dynload::nvjpegDecode(nvjpeg_handle, + nvjpeg_state, + x_data, + x.numel(), + output_format, + &out_image, + nvjpeg_stream); } } // namespace phi diff --git a/paddle/phi/kernels/gpu/flash_attn_grad_kernel.cu b/paddle/phi/kernels/gpu/flash_attn_grad_kernel.cu index bfe924bf3c56c2..5b961210281c8b 100644 --- a/paddle/phi/kernels/gpu/flash_attn_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/flash_attn_grad_kernel.cu @@ -98,7 +98,7 @@ void FlashAttnUnpaddedGradKernel(const Context& ctx, VLOG(10) << "FlashAttn bwd seed: " << params.seed << ", offset: " << params.offset; - bool succ = phi::dynload::flash_attn_varlen_bwd( + bool succ = common::dynload::flash_attn_varlen_bwd( dout.data(), q.data(), k.data(), @@ -207,7 +207,7 @@ void FlashAttnGradKernel(const Context& ctx, int num_splits = get_num_split(); - bool succ = phi::dynload::flash_attn_bwd( + bool succ = common::dynload::flash_attn_bwd( dout.data(), q.data(), k.data(), diff --git a/paddle/phi/kernels/gpu/flash_attn_kernel.cu b/paddle/phi/kernels/gpu/flash_attn_kernel.cu index aadae0f29c3427..5b2f9f711c4bbf 100644 --- a/paddle/phi/kernels/gpu/flash_attn_kernel.cu +++ b/paddle/phi/kernels/gpu/flash_attn_kernel.cu @@ -96,7 +96,7 @@ void FlashAttnUnpaddedKernel( VLOG(10) << "FlashAttn fwd seed: " << params.seed << ", offset: " << params.offset; - bool succ = phi::dynload::flash_attn_varlen_fwd( + bool succ = common::dynload::flash_attn_varlen_fwd( q.data(), k.data(), v.data(), @@ -200,7 +200,7 @@ void FlashAttnKernel(const Context& ctx, cudaStream_t stream = ctx.stream(); - bool succ = phi::dynload::flash_attn_fwd( + bool succ = common::dynload::flash_attn_fwd( q.data(), k.data(), v.data(), diff --git a/paddle/phi/kernels/gpu/flash_attn_utils.h b/paddle/phi/kernels/gpu/flash_attn_utils.h index ea438014f43125..cad32b299f037a 100644 --- a/paddle/phi/kernels/gpu/flash_attn_utils.h +++ b/paddle/phi/kernels/gpu/flash_attn_utils.h @@ -14,9 +14,9 @@ #pragma once +#include "paddle/common/enforce.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_launch_config.h" -#include "paddle/phi/core/enforce.h" #ifdef PADDLE_WITH_FLASHATTN #include "paddle/phi/backends/dynload/flashattn.h" @@ -264,7 +264,7 @@ static void CheckFlashAttnStatus(const bool status) { true, phi::errors::External( "Error in Flash-Attention, detail information is: %s", - phi::dynload::flash_attn_error())); + common::dynload::flash_attn_error())); } template diff --git a/paddle/phi/kernels/gpu/gather_tree_kernel.cu b/paddle/phi/kernels/gpu/gather_tree_kernel.cu index 3ae71992d24236..fc084bc56f5ccd 100644 --- a/paddle/phi/kernels/gpu/gather_tree_kernel.cu +++ b/paddle/phi/kernels/gpu/gather_tree_kernel.cu @@ -16,7 +16,7 @@ #include -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" #include "paddle/phi/core/kernel_registry.h" namespace phi { diff --git a/paddle/phi/kernels/gpu/index_add_grad_kernel.cu b/paddle/phi/kernels/gpu/index_add_grad_kernel.cu index c0d5b737c5fbbf..401ac7cac670c6 100644 --- a/paddle/phi/kernels/gpu/index_add_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/index_add_grad_kernel.cu @@ -14,11 +14,11 @@ #include "paddle/phi/kernels/index_add_grad_kernel.h" +#include "paddle/common/data_type.h" #include "paddle/phi/backends/gpu/gpu_info.h" #include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/backends/gpu/gpu_primitives.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/gpu/index_select_impl.h" diff --git a/paddle/phi/kernels/gpu/index_add_kernel.cu b/paddle/phi/kernels/gpu/index_add_kernel.cu index 8fd15d5435f98b..3f72349a727f2e 100644 --- a/paddle/phi/kernels/gpu/index_add_kernel.cu +++ b/paddle/phi/kernels/gpu/index_add_kernel.cu @@ -15,11 +15,11 @@ #include "paddle/phi/kernels/index_add_kernel.h" #include "glog/logging.h" +#include "paddle/common/data_type.h" #include "paddle/phi/backends/gpu/gpu_info.h" #include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/backends/gpu/gpu_primitives.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/core/utils/data_type.h" #include "paddle/utils/flags.h" PD_DECLARE_bool(cudnn_deterministic); diff --git a/paddle/phi/kernels/gpu/index_sample_grad_kernel.cu b/paddle/phi/kernels/gpu/index_sample_grad_kernel.cu index ff23dc0c394b94..c0ca88b59d10e0 100644 --- a/paddle/phi/kernels/gpu/index_sample_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/index_sample_grad_kernel.cu @@ -17,11 +17,11 @@ #include #include +#include "paddle/common/data_type.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/backends/gpu/gpu_primitives.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/kernels/funcs/math_function.h" namespace phi { diff --git a/paddle/phi/kernels/gpu/index_sample_kernel.cu b/paddle/phi/kernels/gpu/index_sample_kernel.cu index c60abdb3372cb1..669a107e2208f3 100644 --- a/paddle/phi/kernels/gpu/index_sample_kernel.cu +++ b/paddle/phi/kernels/gpu/index_sample_kernel.cu @@ -17,10 +17,10 @@ #include #include +#include "paddle/common/data_type.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/kernels/funcs/math_function.h" namespace phi { diff --git a/paddle/phi/kernels/gpu/index_select_grad_kernel.cu b/paddle/phi/kernels/gpu/index_select_grad_kernel.cu index 6d0ba9e5bd4ef9..1519119a2b8807 100644 --- a/paddle/phi/kernels/gpu/index_select_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/index_select_grad_kernel.cu @@ -15,11 +15,11 @@ #include "paddle/phi/kernels/index_select_grad_kernel.h" #include "glog/logging.h" +#include "paddle/common/data_type.h" #include "paddle/phi/backends/gpu/gpu_info.h" #include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/backends/gpu/gpu_primitives.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/utils/flags.h" diff --git a/paddle/phi/kernels/gpu/index_select_impl.h b/paddle/phi/kernels/gpu/index_select_impl.h index deeb6e5eb20f27..f010de7a807102 100644 --- a/paddle/phi/kernels/gpu/index_select_impl.h +++ b/paddle/phi/kernels/gpu/index_select_impl.h @@ -14,11 +14,11 @@ #pragma once +#include "paddle/common/data_type.h" #include "paddle/phi/backends/gpu/gpu_info.h" #include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/backends/gpu/gpu_primitives.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/core/utils/data_type.h" namespace phi { diff --git a/paddle/phi/kernels/gpu/index_select_kernel.cu b/paddle/phi/kernels/gpu/index_select_kernel.cu index 910015e00f0103..6678c14022d802 100644 --- a/paddle/phi/kernels/gpu/index_select_kernel.cu +++ b/paddle/phi/kernels/gpu/index_select_kernel.cu @@ -14,11 +14,11 @@ #include "paddle/phi/kernels/index_select_kernel.h" +#include "paddle/common/data_type.h" #include "paddle/phi/backends/gpu/gpu_info.h" #include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/backends/gpu/gpu_primitives.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/kernels/gpu/index_select_impl.h" namespace phi { diff --git a/paddle/phi/kernels/gpu/instance_norm_grad_kernel.cu b/paddle/phi/kernels/gpu/instance_norm_grad_kernel.cu index 0f17a1bcc318a7..57b5e1cdee2820 100644 --- a/paddle/phi/kernels/gpu/instance_norm_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/instance_norm_grad_kernel.cu @@ -398,17 +398,17 @@ void InstanceNormGradKernel(const Context &dev_ctx, miopenTensorDescriptor_t in_param_desc_; PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenCreateTensorDescriptor(&data_desc_)); + common::dynload::miopenCreateTensorDescriptor(&data_desc_)); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenCreateTensorDescriptor(&in_param_desc_)); + common::dynload::miopenCreateTensorDescriptor(&in_param_desc_)); #else cudnnTensorDescriptor_t data_desc_; cudnnTensorDescriptor_t in_param_desc_; PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnCreateTensorDescriptor(&data_desc_)); + common::dynload::cudnnCreateTensorDescriptor(&data_desc_)); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnCreateTensorDescriptor(&in_param_desc_)); + common::dynload::cudnnCreateTensorDescriptor(&in_param_desc_)); #endif if (epsilon <= CUDNN_BN_MIN_EPSILON - FLT_EPSILON) { @@ -419,22 +419,22 @@ void InstanceNormGradKernel(const Context &dev_ctx, epsilon = std::max(epsilon, CUDNN_BN_MIN_EPSILON); #ifdef PADDLE_WITH_HIP - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetTensorDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::miopenSetTensorDescriptor( data_desc_, CudnnDataType::type, x_dims.size() > 3 ? x_dims.size() : 4, const_cast(dims.data()), const_cast(strides.data()))); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenDeriveBNTensorDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::miopenDeriveBNTensorDescriptor( in_param_desc_, data_desc_, miopenBNSpatial)); #else - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetTensorNdDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnSetTensorNdDescriptor( data_desc_, CudnnDataType::type, x_dims.size() > 3 ? x_dims.size() : 4, dims.data(), strides.data())); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnDeriveBNTensorDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnDeriveBNTensorDescriptor( in_param_desc_, data_desc_, CUDNN_BATCHNORM_SPATIAL)); #endif const auto *saved_mean_data = @@ -444,28 +444,29 @@ void InstanceNormGradKernel(const Context &dev_ctx, if (d_scale && d_bias) { #ifdef PADDLE_WITH_HIP - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenBatchNormalizationBackward( - dev_ctx.cudnn_handle(), - miopenBNSpatial, - CudnnDataType::kOne(), - CudnnDataType::kZero(), - CudnnDataType::kOne(), - CudnnDataType::kZero(), - data_desc_, - x_tmp.template data(), - data_desc_, - d_y_tmp.template data(), - data_desc_, - d_x->template data(), - in_param_desc_, - scale_tmp.template data>(), - d_scale_tmp.template data>(), - d_bias_tmp.template data>(), - epsilon, - saved_mean_data, - saved_var_data)); + PADDLE_ENFORCE_GPU_SUCCESS( + common::dynload::miopenBatchNormalizationBackward( + dev_ctx.cudnn_handle(), + miopenBNSpatial, + CudnnDataType::kOne(), + CudnnDataType::kZero(), + CudnnDataType::kOne(), + CudnnDataType::kZero(), + data_desc_, + x_tmp.template data(), + data_desc_, + d_y_tmp.template data(), + data_desc_, + d_x->template data(), + in_param_desc_, + scale_tmp.template data>(), + d_scale_tmp.template data>(), + d_bias_tmp.template data>(), + epsilon, + saved_mean_data, + saved_var_data)); #else - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnBatchNormalizationBackward( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnBatchNormalizationBackward( dev_ctx.cudnn_handle(), CUDNN_BATCHNORM_SPATIAL, CudnnDataType::kOne(), @@ -508,14 +509,14 @@ void InstanceNormGradKernel(const Context &dev_ctx, #ifdef PADDLE_WITH_HIP PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenDestroyTensorDescriptor(data_desc_)); + common::dynload::miopenDestroyTensorDescriptor(data_desc_)); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenDestroyTensorDescriptor(in_param_desc_)); + common::dynload::miopenDestroyTensorDescriptor(in_param_desc_)); #else PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnDestroyTensorDescriptor(data_desc_)); + common::dynload::cudnnDestroyTensorDescriptor(data_desc_)); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnDestroyTensorDescriptor(in_param_desc_)); + common::dynload::cudnnDestroyTensorDescriptor(in_param_desc_)); #endif } diff --git a/paddle/phi/kernels/gpu/instance_norm_kernel.cu b/paddle/phi/kernels/gpu/instance_norm_kernel.cu index 7f10eac67c67c8..4d4a0865c88a02 100644 --- a/paddle/phi/kernels/gpu/instance_norm_kernel.cu +++ b/paddle/phi/kernels/gpu/instance_norm_kernel.cu @@ -66,17 +66,17 @@ void InstanceNormKernel(const Context &dev_ctx, miopenTensorDescriptor_t in_param_desc_; PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenCreateTensorDescriptor(&data_desc_)); + common::dynload::miopenCreateTensorDescriptor(&data_desc_)); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenCreateTensorDescriptor(&in_param_desc_)); + common::dynload::miopenCreateTensorDescriptor(&in_param_desc_)); #else cudnnTensorDescriptor_t data_desc_; cudnnTensorDescriptor_t in_param_desc_; PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnCreateTensorDescriptor(&data_desc_)); + common::dynload::cudnnCreateTensorDescriptor(&data_desc_)); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnCreateTensorDescriptor(&in_param_desc_)); + common::dynload::cudnnCreateTensorDescriptor(&in_param_desc_)); #endif if (epsilon <= CUDNN_BN_MIN_EPSILON - FLT_EPSILON) { LOG(ERROR) << "Provided epsilon is smaller than " @@ -92,22 +92,22 @@ void InstanceNormKernel(const Context &dev_ctx, strides = {NxC * H * W * D, H * W * D, W * D, D, 1}; #ifdef PADDLE_WITH_HIP - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetTensorDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::miopenSetTensorDescriptor( data_desc_, CudnnDataType::type, x_dims.size() > 3 ? x_dims.size() : 4, const_cast(dims.data()), const_cast(strides.data()))); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenDeriveBNTensorDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::miopenDeriveBNTensorDescriptor( in_param_desc_, data_desc_, miopenBNSpatial)); #else - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetTensorNdDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnSetTensorNdDescriptor( data_desc_, CudnnDataType::type, x_dims.size() > 3 ? x_dims.size() : 4, dims.data(), strides.data())); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnDeriveBNTensorDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnDeriveBNTensorDescriptor( in_param_desc_, data_desc_, CUDNN_BATCHNORM_SPATIAL)); #endif @@ -169,7 +169,7 @@ void InstanceNormKernel(const Context &dev_ctx, #ifdef PADDLE_WITH_HIP PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenBatchNormalizationForwardTraining( + common::dynload::miopenBatchNormalizationForwardTraining( handle, miopenBNSpatial, const_cast( @@ -193,12 +193,12 @@ void InstanceNormKernel(const Context &dev_ctx, static_cast(saved_variance_data))); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenDestroyTensorDescriptor(data_desc_)); + common::dynload::miopenDestroyTensorDescriptor(data_desc_)); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenDestroyTensorDescriptor(in_param_desc_)); + common::dynload::miopenDestroyTensorDescriptor(in_param_desc_)); #else PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnBatchNormalizationForwardTraining( + common::dynload::cudnnBatchNormalizationForwardTraining( handle, CUDNN_BATCHNORM_SPATIAL, CudnnDataType::kOne(), @@ -218,9 +218,9 @@ void InstanceNormKernel(const Context &dev_ctx, saved_variance_data)); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnDestroyTensorDescriptor(data_desc_)); + common::dynload::cudnnDestroyTensorDescriptor(data_desc_)); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnDestroyTensorDescriptor(in_param_desc_)); + common::dynload::cudnnDestroyTensorDescriptor(in_param_desc_)); #endif } diff --git a/paddle/phi/kernels/gpu/miopen_lstm_cache.h b/paddle/phi/kernels/gpu/miopen_lstm_cache.h index 63604b4bd01e7e..a74604a0281087 100644 --- a/paddle/phi/kernels/gpu/miopen_lstm_cache.h +++ b/paddle/phi/kernels/gpu/miopen_lstm_cache.h @@ -80,7 +80,7 @@ class ScopedRNNBase { size_t state_size; if (!initialized_) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenDropoutGetStatesSize(handle, &state_size)); + common::dynload::miopenDropoutGetStatesSize(handle, &state_size)); phi::DeviceContextPool& pool = phi::DeviceContextPool::Instance(); auto* dev_ctx = reinterpret_cast(pool.Get(place)); dropout_state->Resize({static_cast(state_size)}); @@ -95,7 +95,7 @@ class ScopedRNNBase { state_size); // ------------------- miopen rnn descriptors --------------------- - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetRNNDescriptor_V2( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::miopenSetRNNDescriptor_V2( rnn_desc_.desc(), hidden_size_, num_layers_, @@ -109,7 +109,7 @@ class ScopedRNNBase { // ------------------- miopen weights_size --------------------- size_t weights_size_; - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenGetRNNParamsSize( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::miopenGetRNNParamsSize( handle, rnn_desc_.desc(), x_descs_[0], &weights_size_, miopen_type)); PADDLE_ENFORCE_EQ( weights_size_, @@ -124,12 +124,12 @@ class ScopedRNNBase { weight_desc_.descriptor(layout, dim_w); // ------------------- miopen workspace, reserve size --------------------- PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenGetRNNWorkspaceSize(handle, - rnn_desc_.desc(), - seq_length_, - x_descs_.data(), - workspace_size)); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenGetRNNTrainingReserveSize( + common::dynload::miopenGetRNNWorkspaceSize(handle, + rnn_desc_.desc(), + seq_length_, + x_descs_.data(), + workspace_size)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::miopenGetRNNTrainingReserveSize( handle, rnn_desc_.desc(), seq_length_, x_descs_.data(), reserve_size)); } miopenTensorDescriptor_t* x_descs() { return x_descs_.data(); } diff --git a/paddle/phi/kernels/gpu/multinomial_kernel.cu b/paddle/phi/kernels/gpu/multinomial_kernel.cu index d23965811f9eb0..45c80a892e6c46 100644 --- a/paddle/phi/kernels/gpu/multinomial_kernel.cu +++ b/paddle/phi/kernels/gpu/multinomial_kernel.cu @@ -22,10 +22,10 @@ limitations under the License. */ namespace cub = hipcub; #endif +#include "paddle/common/ddim.h" #include "paddle/common/scalar.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/common/data_type.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/arg_min_max_kernel.h" #include "paddle/phi/kernels/empty_kernel.h" diff --git a/paddle/phi/kernels/gpu/nonzero_kernel.cu b/paddle/phi/kernels/gpu/nonzero_kernel.cu index bc44f4f033c458..65cdcd3d6a058d 100644 --- a/paddle/phi/kernels/gpu/nonzero_kernel.cu +++ b/paddle/phi/kernels/gpu/nonzero_kernel.cu @@ -20,7 +20,7 @@ namespace cub = hipcub; #endif -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/phi/kernels/gpu/p_recv_kernel.cu b/paddle/phi/kernels/gpu/p_recv_kernel.cu index 1e413797b6b893..25d6a677a2e7f6 100644 --- a/paddle/phi/kernels/gpu/p_recv_kernel.cu +++ b/paddle/phi/kernels/gpu/p_recv_kernel.cu @@ -16,9 +16,9 @@ #include "glog/logging.h" +#include "paddle/common/ddim.h" #include "paddle/phi/backends/all_context.h" #include "paddle/phi/common/memory_utils.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/kernel_registry.h" #if defined(PADDLE_WITH_NCCL) || \ diff --git a/paddle/phi/kernels/gpu/p_send_kernel.cu b/paddle/phi/kernels/gpu/p_send_kernel.cu index 520adcf730a1d6..3c2af9103114b4 100644 --- a/paddle/phi/kernels/gpu/p_send_kernel.cu +++ b/paddle/phi/kernels/gpu/p_send_kernel.cu @@ -16,10 +16,10 @@ #include "glog/logging.h" +#include "paddle/common/data_type.h" #include "paddle/phi/backends/all_context.h" #include "paddle/phi/common/memory_utils.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/core/utils/data_type.h" #if defined(PADDLE_WITH_NCCL) || \ defined(PADDLE_WITH_RCCL) && NCCL_VERSION_CODE >= 2703 diff --git a/paddle/phi/kernels/gpu/put_along_axis_grad_kernel.cu b/paddle/phi/kernels/gpu/put_along_axis_grad_kernel.cu index 8321bcd1aa7acf..6705a98d976d48 100644 --- a/paddle/phi/kernels/gpu/put_along_axis_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/put_along_axis_grad_kernel.cu @@ -14,11 +14,11 @@ #include "paddle/phi/kernels/put_along_axis_grad_kernel.h" +#include "paddle/common/data_type.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" -#include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/kernels/funcs/gather_scatter_functor.h" namespace phi { diff --git a/paddle/phi/kernels/gpu/put_along_axis_kernel.cu b/paddle/phi/kernels/gpu/put_along_axis_kernel.cu index b63047973e9b82..b2ce3eb747c2fd 100644 --- a/paddle/phi/kernels/gpu/put_along_axis_kernel.cu +++ b/paddle/phi/kernels/gpu/put_along_axis_kernel.cu @@ -14,11 +14,11 @@ #include "paddle/phi/kernels/put_along_axis_kernel.h" +#include "paddle/common/data_type.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" -#include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/kernels/funcs/gather_scatter_functor.h" namespace phi { diff --git a/paddle/phi/kernels/gpu/qr_kernel.cu b/paddle/phi/kernels/gpu/qr_kernel.cu index 6c036b83ee9d1f..dfaf23c74823a0 100644 --- a/paddle/phi/kernels/gpu/qr_kernel.cu +++ b/paddle/phi/kernels/gpu/qr_kernel.cu @@ -19,9 +19,9 @@ #include #include "paddle/common/backends/dynload/cusolver.h" +#include "paddle/common/enforce.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/common/memory_utils.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/infermeta/unary.h" #include "paddle/phi/kernels/funcs/complex_functors.h" @@ -191,8 +191,8 @@ void BatchedGeqrf(const GPUContext& dev_ctx, int lwork = 0; auto handle = dev_ctx.cusolver_dn_handle(); - PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cusolverDnSgeqrf_bufferSize(handle, m, n, a, lda, &lwork)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cusolverDnSgeqrf_bufferSize( + handle, m, n, a, lda, &lwork)); DenseTensor workspace = DenseTensor(); workspace.Resize(make_ddim({lwork})); @@ -206,15 +206,16 @@ void BatchedGeqrf(const GPUContext& dev_ctx, float* a_working_ptr = &a[i * a_stride]; float* tau_working_ptr = &tau[i * tau_stride]; // compute geqrf - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cusolverDnSgeqrf(handle, - m, - n, - a_working_ptr, - lda, - tau_working_ptr, - workspace_ptr, - lwork, - info_d)); + PADDLE_ENFORCE_GPU_SUCCESS( + common::dynload::cusolverDnSgeqrf(handle, + m, + n, + a_working_ptr, + lda, + tau_working_ptr, + workspace_ptr, + lwork, + info_d)); // Do we need synchronized here? // check the error info int info_h; @@ -245,8 +246,8 @@ void BatchedGeqrf(const GPUContext& dev_ctx, int lwork = 0; auto handle = dev_ctx.cusolver_dn_handle(); - PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cusolverDnDgeqrf_bufferSize(handle, m, n, a, lda, &lwork)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cusolverDnDgeqrf_bufferSize( + handle, m, n, a, lda, &lwork)); DenseTensor workspace = DenseTensor(); workspace.Resize(make_ddim({lwork})); @@ -260,15 +261,16 @@ void BatchedGeqrf(const GPUContext& dev_ctx, double* a_working_ptr = &a[i * a_stride]; double* tau_working_ptr = &tau[i * tau_stride]; // compute geqrf - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cusolverDnDgeqrf(handle, - m, - n, - a_working_ptr, - lda, - tau_working_ptr, - workspace_ptr, - lwork, - info_d)); + PADDLE_ENFORCE_GPU_SUCCESS( + common::dynload::cusolverDnDgeqrf(handle, + m, + n, + a_working_ptr, + lda, + tau_working_ptr, + workspace_ptr, + lwork, + info_d)); // Do we need synchronized here? // check the error info int info_h; @@ -300,7 +302,7 @@ void BatchedOrgqr(const GPUContext& dev_ctx, int lwork = 0; auto handle = dev_ctx.cusolver_dn_handle(); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cusolverDnSorgqr_bufferSize( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cusolverDnSorgqr_bufferSize( handle, m, n, k, a, lda, tau, &lwork)); DenseTensor workspace = DenseTensor(); @@ -315,16 +317,17 @@ void BatchedOrgqr(const GPUContext& dev_ctx, float* a_working_ptr = &a[i * a_stride]; float* tau_working_ptr = &tau[i * tau_stride]; // compute orggr - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cusolverDnSorgqr(handle, - m, - n, - k, - a_working_ptr, - lda, - tau_working_ptr, - workspace_ptr, - lwork, - info_d)); + PADDLE_ENFORCE_GPU_SUCCESS( + common::dynload::cusolverDnSorgqr(handle, + m, + n, + k, + a_working_ptr, + lda, + tau_working_ptr, + workspace_ptr, + lwork, + info_d)); // Do we need synchronized here? // check the error info int info_h; @@ -356,7 +359,7 @@ void BatchedOrgqr(const GPUContext& dev_ctx, int lwork = 0; auto handle = dev_ctx.cusolver_dn_handle(); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cusolverDnDorgqr_bufferSize( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cusolverDnDorgqr_bufferSize( handle, m, n, k, a, lda, tau, &lwork)); DenseTensor workspace = DenseTensor(); @@ -371,16 +374,17 @@ void BatchedOrgqr(const GPUContext& dev_ctx, double* a_working_ptr = &a[i * a_stride]; double* tau_working_ptr = &tau[i * tau_stride]; // compute orggr - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cusolverDnDorgqr(handle, - m, - n, - k, - a_working_ptr, - lda, - tau_working_ptr, - workspace_ptr, - lwork, - info_d)); + PADDLE_ENFORCE_GPU_SUCCESS( + common::dynload::cusolverDnDorgqr(handle, + m, + n, + k, + a_working_ptr, + lda, + tau_working_ptr, + workspace_ptr, + lwork, + info_d)); // Do we need synchronized here? // check the error info int info_h; diff --git a/paddle/phi/kernels/gpu/rnn_functor.h b/paddle/phi/kernels/gpu/rnn_functor.h index bdbcd05e65772a..861a17a8efc094 100644 --- a/paddle/phi/kernels/gpu/rnn_functor.h +++ b/paddle/phi/kernels/gpu/rnn_functor.h @@ -102,10 +102,10 @@ class RNNDescriptors { if (!is_test_ && !is_initialized) { #ifdef PADDLE_WITH_HIP PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenDropoutGetStatesSize(handle, &state_size)); + common::dynload::miopenDropoutGetStatesSize(handle, &state_size)); #else PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnDropoutGetStatesSize(handle, &state_size)); + common::dynload::cudnnDropoutGetStatesSize(handle, &state_size)); #endif dropout_state->Resize({static_cast(state_size)}); dev_ctx.template Alloc(dropout_state); @@ -120,7 +120,7 @@ class RNNDescriptors { // ------------------- cudnn rnn descriptors --------------------- #ifdef PADDLE_WITH_HIP - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSetRNNDescriptor_V2( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::miopenSetRNNDescriptor_V2( rnn_desc_.desc(), hidden_size_, num_layers_, @@ -132,7 +132,7 @@ class RNNDescriptors { miopenRNNdefault, cudnn_type)); #elif CUDNN_VERSION >= 6000 - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetRNNDescriptor_v6( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnSetRNNDescriptor_v6( handle, rnn_desc_.desc(), hidden_size_, @@ -144,7 +144,7 @@ class RNNDescriptors { CUDNN_RNN_ALGO_STANDARD, cudnn_type)); #else - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetRNNDescriptor( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnSetRNNDescriptor( rnn_desc_.desc(), hidden_size_, num_layers_, @@ -157,7 +157,7 @@ class RNNDescriptors { #if defined(PADDLE_WITH_CUDA) && CUDNN_VERSION >= 7201 if (!sequence_length.empty()) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetRNNPaddingMode( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnSetRNNPaddingMode( rnn_desc_.desc(), CUDNN_RNN_PADDED_IO_ENABLED)); } #endif @@ -165,10 +165,10 @@ class RNNDescriptors { // ------------------- cudnn weights_size --------------------- size_t weights_size_; #ifdef PADDLE_WITH_HIP - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenGetRNNParamsSize( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::miopenGetRNNParamsSize( handle, rnn_desc_.desc(), x_descs_[0], &weights_size_, cudnn_type)); #else - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnGetRNNParamsSize( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnGetRNNParamsSize( handle, rnn_desc_.desc(), x_descs_[0], &weights_size_, cudnn_type)); #endif PADDLE_ENFORCE_EQ( @@ -184,21 +184,21 @@ class RNNDescriptors { // ------------------- cudnn workspace, reserve size --------------------- #ifdef PADDLE_WITH_HIP PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenGetRNNWorkspaceSize(handle, - rnn_desc_.desc(), - seq_length_, - x_descs_.data(), - workspace_size)); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenGetRNNTrainingReserveSize( + common::dynload::miopenGetRNNWorkspaceSize(handle, + rnn_desc_.desc(), + seq_length_, + x_descs_.data(), + workspace_size)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::miopenGetRNNTrainingReserveSize( handle, rnn_desc_.desc(), seq_length_, x_descs_.data(), reserve_size)); #else PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnGetRNNWorkspaceSize(handle, - rnn_desc_.desc(), - seq_length_, - x_descs_.data(), - workspace_size)); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnGetRNNTrainingReserveSize( + common::dynload::cudnnGetRNNWorkspaceSize(handle, + rnn_desc_.desc(), + seq_length_, + x_descs_.data(), + workspace_size)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnGetRNNTrainingReserveSize( handle, rnn_desc_.desc(), seq_length_, x_descs_.data(), reserve_size)); #endif } diff --git a/paddle/phi/kernels/gpu/rnn_grad_kernel.cu.cc b/paddle/phi/kernels/gpu/rnn_grad_kernel.cu.cc index 3e8dfe813cad70..b8d9f9db25cf81 100644 --- a/paddle/phi/kernels/gpu/rnn_grad_kernel.cu.cc +++ b/paddle/phi/kernels/gpu/rnn_grad_kernel.cu.cc @@ -259,7 +259,7 @@ void RnnGradKernel(const Context &dev_ctx, if (!has_seq_length) { if (x_grad) { #ifdef PADDLE_WITH_HIP - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenRNNBackwardData( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::miopenRNNBackwardData( handle, rnn.rnn_desc(), seq_length, @@ -289,7 +289,7 @@ void RnnGradKernel(const Context &dev_ctx, reserve_size)); #else // This interface is used when the input/output is unpadded. - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnRNNBackwardData( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnRNNBackwardData( handle, rnn.rnn_desc(), seq_length, @@ -321,7 +321,7 @@ void RnnGradKernel(const Context &dev_ctx, } if (!weight_grad_list.empty()) { #ifdef PADDLE_WITH_HIP - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenRNNBackwardWeights( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::miopenRNNBackwardWeights( handle, rnn.rnn_desc(), seq_length, @@ -341,7 +341,7 @@ void RnnGradKernel(const Context &dev_ctx, TensorToPermutedWeight( place, stream, weight_grad, &weight_grad_list, rnn_mode, is_bidirec); #else - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnRNNBackwardWeights( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnRNNBackwardWeights( handle, rnn.rnn_desc(), seq_length, @@ -364,7 +364,7 @@ void RnnGradKernel(const Context &dev_ctx, // for train // This interface is used when the input/output is padded. if (x_grad) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnRNNBackwardDataEx( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnRNNBackwardDataEx( handle, rnn.rnn_desc(), rnn.y_seq_desc(), @@ -398,7 +398,7 @@ void RnnGradKernel(const Context &dev_ctx, } if (!weight_grad_list.empty()) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnRNNBackwardWeightsEx( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnRNNBackwardWeightsEx( handle, rnn.rnn_desc(), rnn.x_seq_desc(), diff --git a/paddle/phi/kernels/gpu/rnn_kernel.cu.cc b/paddle/phi/kernels/gpu/rnn_kernel.cu.cc index 82800607bae9de..53c9231f3b8798 100644 --- a/paddle/phi/kernels/gpu/rnn_kernel.cu.cc +++ b/paddle/phi/kernels/gpu/rnn_kernel.cu.cc @@ -43,53 +43,53 @@ void RNNInferece(bool has_seq_length, // for inference // This interface is used when the input/output is unpadded. #ifdef PADDLE_WITH_HIP - PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenRNNForwardInference(handle, - rnn->rnn_desc(), - seq_length, - rnn->x_descs(), - x_data, - rnn->init_h_desc(), - init_h_data, - rnn->init_c_desc(), - init_c_data, - rnn->weight_desc(), - w_data, - rnn->y_descs(), - out_data, - rnn->last_h_desc(), - last_h_data, - rnn->last_c_desc(), - last_c_data, - workspace_data->data(), - workspace_size)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::miopenRNNForwardInference( + handle, + rnn->rnn_desc(), + seq_length, + rnn->x_descs(), + x_data, + rnn->init_h_desc(), + init_h_data, + rnn->init_c_desc(), + init_c_data, + rnn->weight_desc(), + w_data, + rnn->y_descs(), + out_data, + rnn->last_h_desc(), + last_h_data, + rnn->last_c_desc(), + last_c_data, + workspace_data->data(), + workspace_size)); #else - PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnRNNForwardInference(handle, - rnn->rnn_desc(), - seq_length, - rnn->x_descs(), - x_data, - rnn->init_h_desc(), - init_h_data, - rnn->init_c_desc(), - init_c_data, - rnn->weight_desc(), - w_data, - rnn->y_descs(), - out_data, - rnn->last_h_desc(), - last_h_data, - rnn->last_c_desc(), - last_c_data, - workspace_data->data(), - workspace_size)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnRNNForwardInference( + handle, + rnn->rnn_desc(), + seq_length, + rnn->x_descs(), + x_data, + rnn->init_h_desc(), + init_h_data, + rnn->init_c_desc(), + init_c_data, + rnn->weight_desc(), + w_data, + rnn->y_descs(), + out_data, + rnn->last_h_desc(), + last_h_data, + rnn->last_c_desc(), + last_c_data, + workspace_data->data(), + workspace_size)); #endif } else { #if defined(PADDLE_WITH_CUDA) && CUDNN_VERSION >= 7201 // for inference // This interface is used when the input/output is padded. - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnRNNForwardInferenceEx( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnRNNForwardInferenceEx( handle, rnn->rnn_desc(), rnn->x_seq_desc(), @@ -309,7 +309,7 @@ void RnnKernel(const Context &dev_ctx, // for train // This interface is used when the input/output is unpadded. #ifdef PADDLE_WITH_HIP - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenRNNForwardTraining( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::miopenRNNForwardTraining( handle, rnn.rnn_desc(), seq_length, @@ -332,34 +332,34 @@ void RnnKernel(const Context &dev_ctx, reserve_data, reserve_size)); #else - PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnRNNForwardTraining(handle, - rnn.rnn_desc(), - seq_length, - rnn.x_descs(), - x_data, - rnn.init_h_desc(), - init_h_data, - rnn.init_c_desc(), - init_c_data, - rnn.weight_desc(), - w_data, - rnn.y_descs(), - out_data, - rnn.last_h_desc(), - last_h_data, - rnn.last_c_desc(), - last_c_data, - workspace_data_.data(), - workspace_size, - reserve_data, - reserve_size)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnRNNForwardTraining( + handle, + rnn.rnn_desc(), + seq_length, + rnn.x_descs(), + x_data, + rnn.init_h_desc(), + init_h_data, + rnn.init_c_desc(), + init_c_data, + rnn.weight_desc(), + w_data, + rnn.y_descs(), + out_data, + rnn.last_h_desc(), + last_h_data, + rnn.last_c_desc(), + last_c_data, + workspace_data_.data(), + workspace_size, + reserve_data, + reserve_size)); #endif } else { #if defined(PADDLE_WITH_CUDA) && CUDNN_VERSION >= 7201 // for train // This interface is used when the input/output is padded. - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnRNNForwardTrainingEx( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnRNNForwardTrainingEx( handle, rnn.rnn_desc(), rnn.x_seq_desc(), diff --git a/paddle/phi/kernels/gpu/shuffle_batch_grad_kernel.cu b/paddle/phi/kernels/gpu/shuffle_batch_grad_kernel.cu index 33b39666edf071..9472861a64c8e3 100644 --- a/paddle/phi/kernels/gpu/shuffle_batch_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/shuffle_batch_grad_kernel.cu @@ -21,9 +21,9 @@ #include #endif +#include "paddle/common/errors.h" #include "paddle/phi/common/memory_utils.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/for_range.h" diff --git a/paddle/phi/kernels/gpu/shuffle_batch_kernel.cu b/paddle/phi/kernels/gpu/shuffle_batch_kernel.cu index e145e7e1c8a206..058c3c6e686b0d 100644 --- a/paddle/phi/kernels/gpu/shuffle_batch_kernel.cu +++ b/paddle/phi/kernels/gpu/shuffle_batch_kernel.cu @@ -21,9 +21,9 @@ #include #endif +#include "paddle/common/errors.h" #include "paddle/phi/common/memory_utils.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/for_range.h" diff --git a/paddle/phi/kernels/gpu/svd_kernel.cu b/paddle/phi/kernels/gpu/svd_kernel.cu index 28857334e77b61..0d9dcc302bcb74 100644 --- a/paddle/phi/kernels/gpu/svd_kernel.cu +++ b/paddle/phi/kernels/gpu/svd_kernel.cu @@ -61,22 +61,22 @@ void GesvdjBatched(const phi::GPUContext& dev_ctx, int lwork = 0; auto handle = dev_ctx.cusolver_dn_handle(); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cusolverDnCreateGesvdjInfo(&gesvdj_params)); + common::dynload::cusolverDnCreateGesvdjInfo(&gesvdj_params)); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cusolverDnSgesvdj_bufferSize(handle, - jobz, - thin_UV, - m, - n, - A, - lda, - S, - U, - ldu, - V, - ldt, - &lwork, - gesvdj_params)); + common::dynload::cusolverDnSgesvdj_bufferSize(handle, + jobz, + thin_UV, + m, + n, + A, + lda, + S, + U, + ldu, + V, + ldt, + &lwork, + gesvdj_params)); auto workspace = phi::memory_utils::Alloc( dev_ctx.GetPlace(), lwork * sizeof(float), @@ -86,22 +86,23 @@ void GesvdjBatched(const phi::GPUContext& dev_ctx, int stride_U = ldu * (thin_UV ? k : m); int stride_V = ldt * (thin_UV ? k : n); for (int i = 0; i < batchSize; ++i) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cusolverDnSgesvdj(handle, - jobz, - thin_UV, - m, - n, - A + stride_A * i, - lda, - S + k * i, - U + stride_U * i, - ldu, - V + stride_V * i, - ldt, - workspace_ptr, - lwork, - info, - gesvdj_params)); + PADDLE_ENFORCE_GPU_SUCCESS( + common::dynload::cusolverDnSgesvdj(handle, + jobz, + thin_UV, + m, + n, + A + stride_A * i, + lda, + S + k * i, + U + stride_U * i, + ldu, + V + stride_V * i, + ldt, + workspace_ptr, + lwork, + info, + gesvdj_params)); // check the error info int error_info; memory_utils::Copy(phi::CPUPlace(), @@ -117,7 +118,7 @@ void GesvdjBatched(const phi::GPUContext& dev_ctx, "For batch [%d]: CUSolver SVD is not zero. [%d]", i, error_info)); } PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cusolverDnDestroyGesvdjInfo(gesvdj_params)); + common::dynload::cusolverDnDestroyGesvdjInfo(gesvdj_params)); } template <> @@ -142,22 +143,22 @@ void GesvdjBatched(const phi::GPUContext& dev_ctx, int lwork = 0; auto handle = dev_ctx.cusolver_dn_handle(); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cusolverDnCreateGesvdjInfo(&gesvdj_params)); + common::dynload::cusolverDnCreateGesvdjInfo(&gesvdj_params)); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cusolverDnDgesvdj_bufferSize(handle, - jobz, - thin_UV, - m, - n, - A, - lda, - S, - U, - ldu, - V, - ldt, - &lwork, - gesvdj_params)); + common::dynload::cusolverDnDgesvdj_bufferSize(handle, + jobz, + thin_UV, + m, + n, + A, + lda, + S, + U, + ldu, + V, + ldt, + &lwork, + gesvdj_params)); auto workspace = phi::memory_utils::Alloc( dev_ctx.GetPlace(), lwork * sizeof(double), @@ -167,22 +168,23 @@ void GesvdjBatched(const phi::GPUContext& dev_ctx, int stride_U = ldu * (thin_UV ? k : m); int stride_V = ldt * (thin_UV ? k : n); for (int i = 0; i < batchSize; ++i) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cusolverDnDgesvdj(handle, - jobz, - thin_UV, - m, - n, - A + stride_A * i, - lda, - S + k * i, - U + stride_U * i, - ldu, - V + stride_V * i, - ldt, - workspace_ptr, - lwork, - info, - gesvdj_params)); + PADDLE_ENFORCE_GPU_SUCCESS( + common::dynload::cusolverDnDgesvdj(handle, + jobz, + thin_UV, + m, + n, + A + stride_A * i, + lda, + S + k * i, + U + stride_U * i, + ldu, + V + stride_V * i, + ldt, + workspace_ptr, + lwork, + info, + gesvdj_params)); // check the error info int error_info; memory_utils::Copy(phi::CPUPlace(), @@ -198,7 +200,7 @@ void GesvdjBatched(const phi::GPUContext& dev_ctx, "For batch [%d]: CUSolver SVD is not zero. [%d]", i, error_info)); } PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cusolverDnDestroyGesvdjInfo(gesvdj_params)); + common::dynload::cusolverDnDestroyGesvdjInfo(gesvdj_params)); } template diff --git a/paddle/phi/kernels/gpu/take_along_axis_grad_kernel.cu b/paddle/phi/kernels/gpu/take_along_axis_grad_kernel.cu index 6cea7592836730..6191b97fba398f 100644 --- a/paddle/phi/kernels/gpu/take_along_axis_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/take_along_axis_grad_kernel.cu @@ -14,10 +14,10 @@ #include "paddle/phi/kernels/take_along_axis_grad_kernel.h" +#include "paddle/common/data_type.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/kernels/funcs/gather_scatter_functor.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/phi/kernels/gpu/take_along_axis_kernel.cu b/paddle/phi/kernels/gpu/take_along_axis_kernel.cu index ba4c6ba27e6824..bbdd7b914ceea4 100644 --- a/paddle/phi/kernels/gpu/take_along_axis_kernel.cu +++ b/paddle/phi/kernels/gpu/take_along_axis_kernel.cu @@ -14,10 +14,10 @@ #include "paddle/phi/kernels/take_along_axis_kernel.h" +#include "paddle/common/data_type.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/kernels/funcs/gather_scatter_functor.h" namespace phi { diff --git a/paddle/phi/kernels/gpu/triangular_solve_kernel.cu b/paddle/phi/kernels/gpu/triangular_solve_kernel.cu index 889c421eb0bb96..4db6dc842f2a5e 100644 --- a/paddle/phi/kernels/gpu/triangular_solve_kernel.cu +++ b/paddle/phi/kernels/gpu/triangular_solve_kernel.cu @@ -14,9 +14,9 @@ #include "paddle/phi/kernels/triangular_solve_kernel.h" +#include "paddle/common/ddim.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/common/memory_utils.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/expand_kernel.h" diff --git a/paddle/phi/kernels/gpu/unique_consecutive_kernel.cu b/paddle/phi/kernels/gpu/unique_consecutive_kernel.cu index 448e6ca38b3f50..83cae05b1350fa 100644 --- a/paddle/phi/kernels/gpu/unique_consecutive_kernel.cu +++ b/paddle/phi/kernels/gpu/unique_consecutive_kernel.cu @@ -17,8 +17,8 @@ #include "paddle/phi/kernels/unique_consecutive_kernel.h" #include "paddle/phi/kernels/gpu/unique_consecutive_functor.h" +#include "paddle/common/errors.h" #include "paddle/phi/backends/gpu/gpu_context.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/core/kernel_registry.h" namespace phi { diff --git a/paddle/phi/kernels/gpudnn/affine_grid_grad_kernel.cu b/paddle/phi/kernels/gpudnn/affine_grid_grad_kernel.cu index 2a3c9515ac2ea7..79bdcb1d474967 100644 --- a/paddle/phi/kernels/gpudnn/affine_grid_grad_kernel.cu +++ b/paddle/phi/kernels/gpudnn/affine_grid_grad_kernel.cu @@ -58,8 +58,9 @@ void AffineGridGradCudnnKernel(const Context& dev_ctx, const T* output_grad_data = output_grad.data(); T* theta_grad_data = dev_ctx.template Alloc(theta_grad); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSpatialTfGridGeneratorBackward( - handle, cudnn_st_desc, output_grad_data, theta_grad_data)); + PADDLE_ENFORCE_GPU_SUCCESS( + common::dynload::cudnnSpatialTfGridGeneratorBackward( + handle, cudnn_st_desc, output_grad_data, theta_grad_data)); } } // namespace phi diff --git a/paddle/phi/kernels/gpudnn/affine_grid_kernel.cu b/paddle/phi/kernels/gpudnn/affine_grid_kernel.cu index 060f8c86710b58..1ee7a6fcb164ef 100644 --- a/paddle/phi/kernels/gpudnn/affine_grid_kernel.cu +++ b/paddle/phi/kernels/gpudnn/affine_grid_kernel.cu @@ -55,7 +55,7 @@ void AffineGridCudnnKernel(const Context& dev_ctx, cudnnSpatialTransformerDescriptor_t cudnn_st_desc = st_desc.descriptor(4, h_size_data); - PADDLE_ENFORCE_EQ(phi::dynload::cudnnSpatialTfGridGeneratorForward( + PADDLE_ENFORCE_EQ(common::dynload::cudnnSpatialTfGridGeneratorForward( handle, cudnn_st_desc, theta_data, output_data), 0, phi::errors::Fatal("Some errors has occurred " diff --git a/paddle/phi/kernels/gpudnn/conv_cudnn_frontend.h b/paddle/phi/kernels/gpudnn/conv_cudnn_frontend.h index d0bdcc10beaa83..726dff49812797 100644 --- a/paddle/phi/kernels/gpudnn/conv_cudnn_frontend.h +++ b/paddle/phi/kernels/gpudnn/conv_cudnn_frontend.h @@ -19,11 +19,11 @@ limitations under the License. */ #include "glog/logging.h" +#include "paddle/common/data_type.h" #include "paddle/phi/backends/dynload/cudnn_frontend.h" #include "paddle/phi/backends/gpu/cuda/cudnn_desc.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/kernels/autotune/cache.h" #include "paddle/phi/kernels/autotune/switch_autotune.h" #include "paddle/phi/kernels/gpudnn/conv_gpudnn_base.h" @@ -286,7 +286,7 @@ class CudnnFrontendConvHelper { .setDataPointers(data_ptrs->size(), data_ptrs->data()) .setUids(uids->size(), uids->data()) .build(); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnBackendExecute( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnBackendExecute( handle_, plan_desc, variant_pack.get_raw_desc())); }, workspace_size); diff --git a/paddle/phi/kernels/gpudnn/conv_cudnn_v7.h b/paddle/phi/kernels/gpudnn/conv_cudnn_v7.h index dfea9013ab0b87..05007c3c195f19 100644 --- a/paddle/phi/kernels/gpudnn/conv_cudnn_v7.h +++ b/paddle/phi/kernels/gpudnn/conv_cudnn_v7.h @@ -112,13 +112,14 @@ struct SearchAlgorithmBase { cudnnConvolutionFwdAlgo_t algo) { size_t workspace_size = 0; PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnGetConvolutionForwardWorkspaceSize(args.handle, - args.idesc.desc(), - args.wdesc.desc(), - args.cdesc.desc(), - args.odesc.desc(), - algo, - &workspace_size)); + common::dynload::cudnnGetConvolutionForwardWorkspaceSize( + args.handle, + args.idesc.desc(), + args.wdesc.desc(), + args.cdesc.desc(), + args.odesc.desc(), + algo, + &workspace_size)); return workspace_size; } @@ -140,7 +141,7 @@ struct SearchAlgorithmBase { int best_algo_idx = 0; std::vector perf_results(kNUM_CUDNN_FWD_ALGS); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnGetConvolutionForwardAlgorithm_v7( + common::dynload::cudnnGetConvolutionForwardAlgorithm_v7( args.handle, args.idesc.desc(), args.wdesc.desc(), @@ -167,7 +168,7 @@ struct SearchAlgorithmBase { << result.workspace_size << ") exceeds the limit(" << workspace_size_limit << ")"; PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnGetConvolutionForwardAlgorithm( + common::dynload::cudnnGetConvolutionForwardAlgorithm( args.handle, args.idesc.desc(), args.wdesc.desc(), @@ -180,7 +181,7 @@ struct SearchAlgorithmBase { } #else PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnGetConvolutionForwardAlgorithm( + common::dynload::cudnnGetConvolutionForwardAlgorithm( args.handle, args.idesc.desc(), args.wdesc.desc(), @@ -208,7 +209,7 @@ struct SearchAlgorithmBase { std::vector perf_results(kNUM_CUDNN_FWD_ALGS); auto cudnn_find_func = [&](void* workspace_ptr) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnFindConvolutionForwardAlgorithmEx( + common::dynload::cudnnFindConvolutionForwardAlgorithmEx( args.handle, args.idesc.desc(), args.x->data(), @@ -246,7 +247,7 @@ struct SearchAlgorithmBase { size_t max_workspace_size = 0; for (size_t algo = 0; algo < kNUM_CUDNN_FWD_ALGS; ++algo) { size_t workspace_size = 0; - auto status = phi::dynload::cudnnGetConvolutionForwardWorkspaceSize( + auto status = common::dynload::cudnnGetConvolutionForwardWorkspaceSize( args.handle, args.idesc.desc(), args.wdesc.desc(), @@ -286,7 +287,7 @@ struct SearchAlgorithmBase { cudnnConvolutionBwdDataAlgo_t algo) { size_t workspace_size = 0; PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnGetConvolutionBackwardDataWorkspaceSize( + common::dynload::cudnnGetConvolutionBackwardDataWorkspaceSize( args.handle, args.wdesc.desc(), args.odesc.desc(), @@ -316,7 +317,7 @@ struct SearchAlgorithmBase { int best_algo_idx = 0; std::vector perf_results(kNUM_CUDNN_BWD_DATA_ALGS); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnGetConvolutionBackwardDataAlgorithm_v7( + common::dynload::cudnnGetConvolutionBackwardDataAlgorithm_v7( args.handle, args.wdesc.desc(), args.odesc.desc(), @@ -351,7 +352,7 @@ struct SearchAlgorithmBase { << result.workspace_size << ") exceeds the limit(" << workspace_size_limit << ")"; PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnGetConvolutionBackwardDataAlgorithm( + common::dynload::cudnnGetConvolutionBackwardDataAlgorithm( args.handle, args.wdesc.desc(), args.odesc.desc(), @@ -364,7 +365,7 @@ struct SearchAlgorithmBase { } #else PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnGetConvolutionBackwardDataAlgorithm( + common::dynload::cudnnGetConvolutionBackwardDataAlgorithm( args.handle, args.wdesc.desc(), args.odesc.desc(), @@ -392,7 +393,7 @@ struct SearchAlgorithmBase { std::vector perf_results(kNUM_CUDNN_BWD_DATA_ALGS); auto cudnn_find_func = [&](void* workspace_ptr) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnFindConvolutionBackwardDataAlgorithmEx( + common::dynload::cudnnFindConvolutionBackwardDataAlgorithmEx( args.handle, args.wdesc.desc(), args.w->data(), @@ -431,7 +432,7 @@ struct SearchAlgorithmBase { for (size_t algo = 0; algo < kNUM_CUDNN_BWD_DATA_ALGS; ++algo) { size_t workspace_size = 0; auto status = - phi::dynload::cudnnGetConvolutionBackwardDataWorkspaceSize( + common::dynload::cudnnGetConvolutionBackwardDataWorkspaceSize( args.handle, args.wdesc.desc(), args.odesc.desc(), @@ -470,7 +471,7 @@ struct SearchAlgorithmBase { phi::backends::gpu::CUDAGraphCaptureModeGuard guard; size_t workspace_size = 0; PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnGetConvolutionBackwardFilterWorkspaceSize( + common::dynload::cudnnGetConvolutionBackwardFilterWorkspaceSize( args.handle, args.idesc.desc(), args.odesc.desc(), @@ -500,7 +501,7 @@ struct SearchAlgorithmBase { int best_algo_idx = 0; std::vector perf_results(kNUM_CUDNN_BWD_FILTER_ALGS); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnGetConvolutionBackwardFilterAlgorithm_v7( + common::dynload::cudnnGetConvolutionBackwardFilterAlgorithm_v7( args.handle, args.idesc.desc(), args.odesc.desc(), @@ -523,7 +524,7 @@ struct SearchAlgorithmBase { << result.workspace_size << ") exceeds the limit(" << workspace_size_limit << ")"; PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnGetConvolutionBackwardFilterAlgorithm( + common::dynload::cudnnGetConvolutionBackwardFilterAlgorithm( args.handle, args.idesc.desc(), args.odesc.desc(), @@ -536,7 +537,7 @@ struct SearchAlgorithmBase { } #else PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnGetConvolutionBackwardFilterAlgorithm( + common::dynload::cudnnGetConvolutionBackwardFilterAlgorithm( args.handle, args.idesc.desc(), args.odesc.desc(), @@ -568,7 +569,7 @@ struct SearchAlgorithmBase { auto cudnn_find_func = [&](void* workspace_ptr) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnFindConvolutionBackwardFilterAlgorithmEx( + common::dynload::cudnnFindConvolutionBackwardFilterAlgorithmEx( args.handle, args.idesc.desc(), args.x->data(), @@ -597,7 +598,7 @@ struct SearchAlgorithmBase { int max_algos = GetAlgorithmMaxCount(args.handle); std::vector perf_results(max_algos); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnFindConvolutionBackwardFilterAlgorithm( + common::dynload::cudnnFindConvolutionBackwardFilterAlgorithm( args.handle, args.idesc.desc(), args.odesc.desc(), @@ -625,7 +626,7 @@ struct SearchAlgorithmBase { #if CUDNN_VERSION_MIN(7, 0, 1) int max_algos = 0; auto status = - phi::dynload::cudnnGetConvolutionBackwardFilterAlgorithmMaxCount( + common::dynload::cudnnGetConvolutionBackwardFilterAlgorithmMaxCount( handle, &max_algos); if (status == gpuSuccess) { VLOG(5) << "[BackwardFilter] max_algos: predefined=" @@ -643,7 +644,7 @@ struct SearchAlgorithmBase { for (size_t algo = 0; algo < kNUM_CUDNN_BWD_FILTER_ALGS; ++algo) { size_t workspace_size = 0; auto status = - phi::dynload::cudnnGetConvolutionBackwardFilterWorkspaceSize( + common::dynload::cudnnGetConvolutionBackwardFilterWorkspaceSize( args.handle, args.idesc.desc(), args.odesc.desc(), @@ -738,7 +739,7 @@ struct SearchAlgorithm : public SearchAlgorithmBase { const phi::backends::gpu::ConvolutionDescriptor& cdesc) { #if CUDA_VERSION >= 9000 && CUDNN_VERSION_MIN(7, 0, 1) if (ctx.GetComputeCapability() >= 70 && dtype == CUDNN_DATA_HALF) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetConvolutionMathType( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnSetConvolutionMathType( cdesc.desc(), CUDNN_TENSOR_OP_MATH)); VLOG(5) << "Enable Tensor Core for FLOAT16"; #if CUDA_VERSION >= 11000 @@ -746,16 +747,16 @@ struct SearchAlgorithm : public SearchAlgorithmBase { } else if (ctx.GetComputeCapability() >= 80 && dtype == CUDNN_DATA_BFLOAT16) { VLOG(5) << "Enable Tensor Core for BFLOAT16"; - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetConvolutionMathType( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnSetConvolutionMathType( cdesc.desc(), CUDNN_TENSOR_OP_MATH)); #endif // CUDNN_VERSION_MIN(8, 1, 0) } else if (dtype == CUDNN_DATA_FLOAT && !cdesc.allow_tf32_) { VLOG(5) << "Disable TensorFloat (Tensor Core) for FLOAT"; - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetConvolutionMathType( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnSetConvolutionMathType( cdesc.desc(), CUDNN_FMA_MATH)); #endif // CUDA_VERSION >= 11000 } else { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetConvolutionMathType( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnSetConvolutionMathType( cdesc.desc(), CUDNN_DEFAULT_MATH)); } #endif @@ -788,7 +789,7 @@ struct ConvRunner { for (int i = 0; i < groups; i++) { workspace_handle->RunFunc( [&](void* workspace_ptr) { - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnConvolutionForward( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnConvolutionForward( cudnn_handle, &alpha, args.idesc.desc(), @@ -832,7 +833,7 @@ struct ConvRunner { workspace_handle->RunFunc( [&](void* workspace_ptr) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnConvolutionBackwardData( + common::dynload::cudnnConvolutionBackwardData( cudnn_handle, &alpha, args.wdesc.desc(), @@ -876,7 +877,7 @@ struct ConvRunner { workspace_handle->RunFunc( [&](void* workspace_ptr) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnConvolutionBackwardFilter( + common::dynload::cudnnConvolutionBackwardFilter( cudnn_handle, &alpha, args.idesc.desc(), diff --git a/paddle/phi/kernels/gpudnn/conv_grad_kernel.cu b/paddle/phi/kernels/gpudnn/conv_grad_kernel.cu index a2c4db700c4ba6..deeb384b4510b7 100644 --- a/paddle/phi/kernels/gpudnn/conv_grad_kernel.cu +++ b/paddle/phi/kernels/gpudnn/conv_grad_kernel.cu @@ -230,38 +230,39 @@ void ConvCudnnGradKernelImplV7( workspace_handle.RunFunc( [&](void* cudnn_workspace_ptr) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenConvolutionBackwardData(handle, - &alpha, - args1.odesc.desc(), - output_grad_data, - args1.wdesc.desc(), - filter_data, - args1.cdesc.desc(), - bwd_result.algo, - &beta, - args1.idesc.desc(), - temp_tensor_data, - cudnn_workspace_ptr, - workspace_size)); + common::dynload::miopenConvolutionBackwardData( + handle, + &alpha, + args1.odesc.desc(), + output_grad_data, + args1.wdesc.desc(), + filter_data, + args1.cdesc.desc(), + bwd_result.algo, + &beta, + args1.idesc.desc(), + temp_tensor_data, + cudnn_workspace_ptr, + workspace_size)); }, workspace_size); PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenOpTensor(handle, - miopenTensorOpAdd, - &alpha, - args1.idesc.desc(), - transformed_input_grad_data, - &alpha, - args1.idesc.desc(), - temp_tensor_data, - &beta, - args1.idesc.desc(), - transformed_input_grad_data)); + common::dynload::miopenOpTensor(handle, + miopenTensorOpAdd, + &alpha, + args1.idesc.desc(), + transformed_input_grad_data, + &alpha, + args1.idesc.desc(), + temp_tensor_data, + &beta, + args1.idesc.desc(), + transformed_input_grad_data)); } else { workspace_handle.RunFunc( [&](void* cudnn_workspace_ptr) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenConvolutionBackwardData( + common::dynload::miopenConvolutionBackwardData( handle, &alpha, args1.odesc.desc(), @@ -302,7 +303,7 @@ void ConvCudnnGradKernelImplV7( workspace_handle.RunFunc( [&](void* cudnn_workspace_ptr) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenConvolutionBackwardWeights( + common::dynload::miopenConvolutionBackwardWeights( handle, &alpha, args2.odesc.desc(), @@ -1211,19 +1212,20 @@ void ConvCudnnGradGradKernel( workspace_handle.RunFunc( [&](void* workspace_ptr) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenConvolutionForward(handle, - &alpha, - args1.idesc.desc(), - ddx, - args1.wdesc.desc(), - w, - args1.cdesc.desc(), - fwd_result1.algo, - &beta, - args1.odesc.desc(), - transformed_ddy_channel, - workspace_ptr, - workspace_size)); + common::dynload::miopenConvolutionForward( + handle, + &alpha, + args1.idesc.desc(), + ddx, + args1.wdesc.desc(), + w, + args1.cdesc.desc(), + fwd_result1.algo, + &beta, + args1.odesc.desc(), + transformed_ddy_channel, + workspace_ptr, + workspace_size)); }, workspace_size); #else @@ -1248,19 +1250,20 @@ void ConvCudnnGradGradKernel( workspace_handle.RunFunc( [&](void* workspace_ptr) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenConvolutionForward(handle, - &alpha, - args2.idesc.desc(), - x, - args2.wdesc.desc(), - ddw, - args2.cdesc.desc(), - fwd_result2.algo, - &beta, - args2.odesc.desc(), - transformed_ddy_channel, - workspace_ptr, - workspace_size)); + common::dynload::miopenConvolutionForward( + handle, + &alpha, + args2.idesc.desc(), + x, + args2.wdesc.desc(), + ddw, + args2.cdesc.desc(), + fwd_result2.algo, + &beta, + args2.odesc.desc(), + transformed_ddy_channel, + workspace_ptr, + workspace_size)); }, workspace_size); #else @@ -1290,7 +1293,7 @@ void ConvCudnnGradGradKernel( workspace_handle.RunFunc( [&](void* workspace_ptr) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenConvolutionBackwardWeights( + common::dynload::miopenConvolutionBackwardWeights( handle, &alpha, args3.odesc.desc(), @@ -1329,7 +1332,7 @@ void ConvCudnnGradGradKernel( workspace_handle.RunFunc( [&](void* workspace_ptr) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenConvolutionBackwardData( + common::dynload::miopenConvolutionBackwardData( handle, &alpha, args4.odesc.desc(), diff --git a/paddle/phi/kernels/gpudnn/conv_kernel.cu b/paddle/phi/kernels/gpudnn/conv_kernel.cu index 093f6cf80e449b..036ef19f49f8da 100644 --- a/paddle/phi/kernels/gpudnn/conv_kernel.cu +++ b/paddle/phi/kernels/gpudnn/conv_kernel.cu @@ -93,8 +93,8 @@ void ConvCudnnKernelImplV7(const DenseTensor* transformed_input, // cudnn 7 can support groups, no need to do it manually // FIXME(typhoonzero): find a better way to disable groups // rather than setting it to 1. - PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::cudnnSetConvolutionGroupCount(args.cdesc.desc(), groups)); + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnSetConvolutionGroupCount( + args.cdesc.desc(), groups)); groups = 1; #endif #ifdef PADDLE_WITH_HIP @@ -180,19 +180,19 @@ void ConvCudnnKernelImplV7(const DenseTensor* transformed_input, workspace_handle.RunFunc( [&](void* workspace_ptr) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenConvolutionForward(handle, - &alpha, - args.idesc.desc(), - input_data, - args.wdesc.desc(), - filter_data, - args.cdesc.desc(), - fwd_result.algo, - &beta, - args.odesc.desc(), - output_data, - workspace_ptr, - workspace_size)); + common::dynload::miopenConvolutionForward(handle, + &alpha, + args.idesc.desc(), + input_data, + args.wdesc.desc(), + filter_data, + args.cdesc.desc(), + fwd_result.algo, + &beta, + args.odesc.desc(), + output_data, + workspace_ptr, + workspace_size)); }, workspace_size); #else diff --git a/paddle/phi/kernels/gpudnn/conv_miopen_helper.h b/paddle/phi/kernels/gpudnn/conv_miopen_helper.h index be2c09bf8d18a8..324f89b86056a5 100644 --- a/paddle/phi/kernels/gpudnn/conv_miopen_helper.h +++ b/paddle/phi/kernels/gpudnn/conv_miopen_helper.h @@ -44,7 +44,7 @@ struct SearchAlgorithm { miopenConvAlgoPerf_t find_result; auto cudnn_find_func = [&](void* cudnn_workspace_ptr) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenFindConvolutionForwardAlgorithm( + common::dynload::miopenFindConvolutionForwardAlgorithm( args.handle, args.idesc.desc(), args.x->data(), @@ -70,7 +70,7 @@ struct SearchAlgorithm { static size_t GetWorkspaceSize(const ConvArgs& args) { size_t workspace_size = 0; PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenConvolutionForwardGetWorkSpaceSize( + common::dynload::miopenConvolutionForwardGetWorkSpaceSize( args.handle, args.wdesc.desc(), args.idesc.desc(), @@ -100,7 +100,7 @@ struct SearchAlgorithm { miopenConvAlgoPerf_t find_result; auto cudnn_find_func = [&](void* cudnn_workspace_ptr) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenFindConvolutionBackwardDataAlgorithm( + common::dynload::miopenFindConvolutionBackwardDataAlgorithm( args.handle, args.odesc.desc(), args.o->data(), @@ -126,7 +126,7 @@ struct SearchAlgorithm { static size_t GetWorkspaceSize(const ConvArgs& args) { size_t workspace_size = 0; PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenConvolutionBackwardDataGetWorkSpaceSize( + common::dynload::miopenConvolutionBackwardDataGetWorkSpaceSize( args.handle, args.odesc.desc(), args.wdesc.desc(), @@ -156,7 +156,7 @@ struct SearchAlgorithm { miopenConvAlgoPerf_t find_result; auto cudnn_find_func = [&](void* cudnn_workspace_ptr) { PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenFindConvolutionBackwardWeightsAlgorithm( + common::dynload::miopenFindConvolutionBackwardWeightsAlgorithm( args.handle, args.odesc.desc(), args.o->data(), @@ -182,7 +182,7 @@ struct SearchAlgorithm { static size_t GetWorkspaceSize(const ConvArgs& args) { size_t workspace_size = 0; PADDLE_ENFORCE_GPU_SUCCESS( - phi::dynload::miopenConvolutionBackwardWeightsGetWorkSpaceSize( + common::dynload::miopenConvolutionBackwardWeightsGetWorkSpaceSize( args.handle, args.odesc.desc(), args.idesc.desc(), diff --git a/paddle/phi/kernels/gpudnn/conv_transpose_grad_kernel.cu b/paddle/phi/kernels/gpudnn/conv_transpose_grad_kernel.cu index f4b0ac0f926729..f1f5ef97bb0d4e 100644 --- a/paddle/phi/kernels/gpudnn/conv_transpose_grad_kernel.cu +++ b/paddle/phi/kernels/gpudnn/conv_transpose_grad_kernel.cu @@ -18,9 +18,9 @@ limitations under the License. */ #include "paddle/common/backends/dynload/cudnn.h" #include "paddle/common/bfloat16.h" +#include "paddle/common/ddim.h" #include "paddle/common/float16.h" #include "paddle/phi/backends/context_pool.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/cpu/conv_util.h" #include "paddle/phi/kernels/funcs/batch_norm_utils.h" diff --git a/paddle/phi/kernels/gpudnn/conv_transpose_kernel.cu b/paddle/phi/kernels/gpudnn/conv_transpose_kernel.cu index 0468572a741181..eb273f3fd09999 100644 --- a/paddle/phi/kernels/gpudnn/conv_transpose_kernel.cu +++ b/paddle/phi/kernels/gpudnn/conv_transpose_kernel.cu @@ -18,9 +18,9 @@ limitations under the License. */ #include "paddle/common/backends/dynload/cudnn.h" #include "paddle/common/bfloat16.h" +#include "paddle/common/ddim.h" #include "paddle/common/float16.h" #include "paddle/phi/backends/context_pool.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/cpu/conv_util.h" #include "paddle/phi/kernels/funcs/padding.h" diff --git a/paddle/phi/kernels/gpudnn/softmax_gpudnn.h b/paddle/phi/kernels/gpudnn/softmax_gpudnn.h index d73a1eaeb000cc..bdf8df3bbc1b08 100644 --- a/paddle/phi/kernels/gpudnn/softmax_gpudnn.h +++ b/paddle/phi/kernels/gpudnn/softmax_gpudnn.h @@ -1041,7 +1041,7 @@ void SoftmaxForwardCudnnKernel(const GPUContext& dev_ctx, auto mode = axis == rank - 1 ? MIOPEN_SOFTMAX_MODE_INSTANCE : MIOPEN_SOFTMAX_MODE_CHANNEL; auto algo = log_mode ? MIOPEN_SOFTMAX_LOG : MIOPEN_SOFTMAX_ACCURATE; - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSoftmaxForward_V2( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::miopenSoftmaxForward_V2( handle, phi::backends::gpu::CudnnDataType::kOne(), desc, @@ -1056,7 +1056,7 @@ void SoftmaxForwardCudnnKernel(const GPUContext& dev_ctx, auto mode = axis == rank - 1 ? CUDNN_SOFTMAX_MODE_INSTANCE : CUDNN_SOFTMAX_MODE_CHANNEL; auto algo = log_mode ? CUDNN_SOFTMAX_LOG : CUDNN_SOFTMAX_ACCURATE; - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSoftmaxForward( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnSoftmaxForward( handle, algo, mode, @@ -1113,7 +1113,7 @@ void SoftmaxBackwardCudnnKernel(const GPUContext& dev_ctx, auto mode = axis == rank - 1 ? MIOPEN_SOFTMAX_MODE_INSTANCE : MIOPEN_SOFTMAX_MODE_CHANNEL; auto algo = log_mode ? MIOPEN_SOFTMAX_LOG : MIOPEN_SOFTMAX_ACCURATE; - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::miopenSoftmaxBackward_V2( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::miopenSoftmaxBackward_V2( handle, phi::backends::gpu::CudnnDataType::kOne(), desc, @@ -1130,7 +1130,7 @@ void SoftmaxBackwardCudnnKernel(const GPUContext& dev_ctx, auto mode = axis == rank - 1 ? CUDNN_SOFTMAX_MODE_INSTANCE : CUDNN_SOFTMAX_MODE_CHANNEL; auto algo = log_mode ? CUDNN_SOFTMAX_LOG : CUDNN_SOFTMAX_ACCURATE; - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSoftmaxBackward( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cudnnSoftmaxBackward( handle, algo, mode, diff --git a/paddle/phi/kernels/impl/box_coder.h b/paddle/phi/kernels/impl/box_coder.h index 739293ef54e6bf..95900153da165a 100644 --- a/paddle/phi/kernels/impl/box_coder.h +++ b/paddle/phi/kernels/impl/box_coder.h @@ -16,7 +16,7 @@ #include -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" namespace phi { namespace funcs { diff --git a/paddle/phi/kernels/impl/broadcast_tensors_kernel_impl.h b/paddle/phi/kernels/impl/broadcast_tensors_kernel_impl.h index c61b10d5a21995..3d907f06bc3a9b 100644 --- a/paddle/phi/kernels/impl/broadcast_tensors_kernel_impl.h +++ b/paddle/phi/kernels/impl/broadcast_tensors_kernel_impl.h @@ -16,8 +16,8 @@ #include +#include "paddle/common/enforce.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/broadcast_tensors_kernel.h" #include "paddle/phi/kernels/funcs/eigen/common.h" diff --git a/paddle/phi/kernels/impl/conv_transpose_grad_kernel_impl.h b/paddle/phi/kernels/impl/conv_transpose_grad_kernel_impl.h index 2d92f8156b607d..bd997a12ee3a02 100644 --- a/paddle/phi/kernels/impl/conv_transpose_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/conv_transpose_grad_kernel_impl.h @@ -14,8 +14,8 @@ #pragma once +#include "paddle/common/ddim.h" #include "paddle/phi/common/layout.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/kernels/conv_transpose_grad_kernel.h" #include "paddle/phi/kernels/cpu/conv_util.h" #include "paddle/phi/kernels/funcs/blas/blas.h" diff --git a/paddle/phi/kernels/impl/conv_transpose_kernel_impl.h b/paddle/phi/kernels/impl/conv_transpose_kernel_impl.h index 9fab3e6735b40d..9e71eaf8506533 100644 --- a/paddle/phi/kernels/impl/conv_transpose_kernel_impl.h +++ b/paddle/phi/kernels/impl/conv_transpose_kernel_impl.h @@ -14,8 +14,8 @@ #pragma once +#include "paddle/common/ddim.h" #include "paddle/phi/common/layout.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/kernels/conv_transpose_kernel.h" #include "paddle/phi/kernels/cpu/conv_util.h" #include "paddle/phi/kernels/funcs/blas/blas.h" diff --git a/paddle/phi/kernels/impl/determinant_kernel_impl.h b/paddle/phi/kernels/impl/determinant_kernel_impl.h index 01c54d780b4b0e..7fa53a3d86c3c4 100644 --- a/paddle/phi/kernels/impl/determinant_kernel_impl.h +++ b/paddle/phi/kernels/impl/determinant_kernel_impl.h @@ -23,7 +23,7 @@ #include "glog/logging.h" #include "paddle/phi/common/amp_type_traits.h" -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/determinant_kernel.h" diff --git a/paddle/phi/kernels/impl/fft_grad_kernel_impl.h b/paddle/phi/kernels/impl/fft_grad_kernel_impl.h index de4bb8d4bd1734..83a37abb5d89a1 100644 --- a/paddle/phi/kernels/impl/fft_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/fft_grad_kernel_impl.h @@ -18,8 +18,8 @@ #include #include +#include "paddle/common/ddim.h" #include "paddle/phi/common/data_type.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_meta.h" #include "paddle/phi/kernels/complex_kernel.h" diff --git a/paddle/phi/kernels/impl/fft_kernel_impl.h b/paddle/phi/kernels/impl/fft_kernel_impl.h index 13c54182d1d316..e542d758c4ce64 100644 --- a/paddle/phi/kernels/impl/fft_kernel_impl.h +++ b/paddle/phi/kernels/impl/fft_kernel_impl.h @@ -18,7 +18,7 @@ #include #include -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/funcs/fft.h" diff --git a/paddle/phi/kernels/impl/fold_kernel_impl.h b/paddle/phi/kernels/impl/fold_kernel_impl.h index 694d754ecfb8e4..b585a7267a14f8 100644 --- a/paddle/phi/kernels/impl/fold_kernel_impl.h +++ b/paddle/phi/kernels/impl/fold_kernel_impl.h @@ -16,8 +16,8 @@ #include +#include "paddle/common/enforce.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/kernels/funcs/im2col.h" #include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/unfold_functor.h" diff --git a/paddle/phi/kernels/impl/lstsq_kernel_impl.h b/paddle/phi/kernels/impl/lstsq_kernel_impl.h index a12e0650824140..6c04554c4a5f99 100644 --- a/paddle/phi/kernels/impl/lstsq_kernel_impl.h +++ b/paddle/phi/kernels/impl/lstsq_kernel_impl.h @@ -14,8 +14,8 @@ #pragma once +#include "paddle/common/enforce.h" #include "paddle/phi/common/memory_utils.h" -#include "paddle/phi/core/enforce.h" #include "paddle/utils/optional.h" #include "paddle/phi/core/dense_tensor.h" @@ -119,7 +119,7 @@ inline void BatchedOrmqr(const GPUContext& dev_ctx, int ldc = std::max(1, m); auto handle = dev_ctx.cusolver_dn_handle(); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cusolverDnSormqr_bufferSize( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cusolverDnSormqr_bufferSize( handle, side, trans, m, n, k, a, lda, tau, other, ldc, &lwork)); DenseTensor* info = new DenseTensor(); info->Resize(make_ddim({1})); @@ -136,20 +136,21 @@ inline void BatchedOrmqr(const GPUContext& dev_ctx, float* workspace_ptr = dev_ctx.template Alloc(workspace); // compute ormgr - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cusolverDnSormqr(handle, - side, - trans, - m, - n, - k, - a_working_ptr, - lda, - tau_working_ptr, - other_working_ptr, - ldc, - workspace_ptr, - lwork, - info_d)); + PADDLE_ENFORCE_GPU_SUCCESS( + common::dynload::cusolverDnSormqr(handle, + side, + trans, + m, + n, + k, + a_working_ptr, + lda, + tau_working_ptr, + other_working_ptr, + ldc, + workspace_ptr, + lwork, + info_d)); // check the error info int info_h; @@ -188,7 +189,7 @@ inline void BatchedOrmqr(const GPUContext& dev_ctx, int ldc = std::max(1, m); auto handle = dev_ctx.cusolver_dn_handle(); - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cusolverDnDormqr_bufferSize( + PADDLE_ENFORCE_GPU_SUCCESS(common::dynload::cusolverDnDormqr_bufferSize( handle, side, trans, m, n, k, a, lda, tau, other, ldc, &lwork)); DenseTensor* info = new DenseTensor(); info->Resize(make_ddim({1})); @@ -205,20 +206,21 @@ inline void BatchedOrmqr(const GPUContext& dev_ctx, double* workspace_ptr = dev_ctx.template Alloc(workspace); // compute ormgr - PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cusolverDnDormqr(handle, - side, - trans, - m, - n, - k, - a_working_ptr, - lda, - tau_working_ptr, - other_working_ptr, - ldc, - workspace_ptr, - lwork, - info_d)); + PADDLE_ENFORCE_GPU_SUCCESS( + common::dynload::cusolverDnDormqr(handle, + side, + trans, + m, + n, + k, + a_working_ptr, + lda, + tau_working_ptr, + other_working_ptr, + ldc, + workspace_ptr, + lwork, + info_d)); // check the error info int info_h; diff --git a/paddle/phi/kernels/impl/lu_kernel_impl.h b/paddle/phi/kernels/impl/lu_kernel_impl.h index d2838551ff20a7..e6f7e88a1ab218 100644 --- a/paddle/phi/kernels/impl/lu_kernel_impl.h +++ b/paddle/phi/kernels/impl/lu_kernel_impl.h @@ -14,8 +14,8 @@ #pragma once +#include "paddle/common/enforce.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/kernels/elementwise_add_kernel.h" #include "paddle/phi/kernels/elementwise_subtract_kernel.h" #include "paddle/phi/kernels/funcs/complex_functors.h" diff --git a/paddle/phi/kernels/impl/pool_grad_kernel_impl.h b/paddle/phi/kernels/impl/pool_grad_kernel_impl.h index e3e19370c86bf1..cf00a9b82b8dd8 100644 --- a/paddle/phi/kernels/impl/pool_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/pool_grad_kernel_impl.h @@ -14,7 +14,7 @@ limitations under the License. */ #pragma once -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" #include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/pooling.h" #include "paddle/phi/kernels/pool_grad_kernel.h" diff --git a/paddle/phi/kernels/impl/pool_kernel_impl.h b/paddle/phi/kernels/impl/pool_kernel_impl.h index a2a6705a68302b..dc0b7ad2108ac5 100644 --- a/paddle/phi/kernels/impl/pool_kernel_impl.h +++ b/paddle/phi/kernels/impl/pool_kernel_impl.h @@ -16,7 +16,7 @@ limitations under the License. */ #include -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" #include "paddle/phi/kernels/funcs/pooling.h" #include "paddle/phi/kernels/pool_kernel.h" diff --git a/paddle/phi/kernels/impl/qr_grad_kernel_impl.h b/paddle/phi/kernels/impl/qr_grad_kernel_impl.h index d22eca3c73393e..b67512f4f895ab 100644 --- a/paddle/phi/kernels/impl/qr_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/qr_grad_kernel_impl.h @@ -13,8 +13,8 @@ // limitations under the License. #pragma once +#include "paddle/common/enforce.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/infermeta/binary.h" #include "paddle/phi/infermeta/unary.h" diff --git a/paddle/phi/kernels/impl/qr_kernel_impl.h b/paddle/phi/kernels/impl/qr_kernel_impl.h index cb086590271eb1..79e8a39650b8c7 100644 --- a/paddle/phi/kernels/impl/qr_kernel_impl.h +++ b/paddle/phi/kernels/impl/qr_kernel_impl.h @@ -14,9 +14,9 @@ #pragma once +#include "paddle/common/enforce.h" #include "paddle/phi/common/memory_utils.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/enforce.h" #include "paddle/utils/optional.h" #if defined(PADDLE_WITH_CUDA) diff --git a/paddle/phi/kernels/impl/searchsorted_kernel_impl.h b/paddle/phi/kernels/impl/searchsorted_kernel_impl.h index b3be4b9d556645..f933b718a28fe8 100644 --- a/paddle/phi/kernels/impl/searchsorted_kernel_impl.h +++ b/paddle/phi/kernels/impl/searchsorted_kernel_impl.h @@ -16,7 +16,7 @@ #include -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" #include "paddle/phi/kernels/funcs/algorithm.h" #include "paddle/phi/kernels/funcs/for_range.h" diff --git a/paddle/phi/kernels/impl/slogdeterminant_kernel_impl.h b/paddle/phi/kernels/impl/slogdeterminant_kernel_impl.h index a5798d66ee5c7e..02004e7442da6d 100644 --- a/paddle/phi/kernels/impl/slogdeterminant_kernel_impl.h +++ b/paddle/phi/kernels/impl/slogdeterminant_kernel_impl.h @@ -20,7 +20,7 @@ #include "glog/logging.h" -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/impl/determinant_kernel_impl.h" #include "paddle/phi/kernels/slogdeterminant_kernel.h" diff --git a/paddle/phi/kernels/impl/warpctc_kernel_impl.h b/paddle/phi/kernels/impl/warpctc_kernel_impl.h index 4b4bd6f5143dd3..6693b8fdbd7ba3 100644 --- a/paddle/phi/kernels/impl/warpctc_kernel_impl.h +++ b/paddle/phi/kernels/impl/warpctc_kernel_impl.h @@ -58,16 +58,16 @@ class ComputeCtcLossFunctor { float* costs, void* workspace, ctcOptions options) { - return phi::dynload::compute_ctc_loss(activations, - gradients, - flat_labels, - label_lengths, - input_lengths, - static_cast(alphabet_size), - static_cast(minibatch), - costs, - workspace, - options); + return common::dynload::compute_ctc_loss(activations, + gradients, + flat_labels, + label_lengths, + input_lengths, + static_cast(alphabet_size), + static_cast(minibatch), + costs, + workspace, + options); } }; @@ -84,7 +84,7 @@ class ComputeCtcLossFunctor { double* costs, void* workspace, ctcOptions options) { - return phi::dynload::compute_ctc_loss_double( + return common::dynload::compute_ctc_loss_double( activations, gradients, flat_labels, @@ -141,14 +141,14 @@ class WarpCTCFunctor { ctcStatus_t status = CTC_STATUS_UNKNOWN_ERROR; if (sizeof(T) == 4) { status = - phi::dynload::get_workspace_size(cpu_label_lengths, - cpu_input_lengths, - static_cast(sequence_width), - static_cast(num_sequences), - options_, - &workspace_bytes); + common::dynload::get_workspace_size(cpu_label_lengths, + cpu_input_lengths, + static_cast(sequence_width), + static_cast(num_sequences), + options_, + &workspace_bytes); } else { - status = phi::dynload::get_workspace_size_double( + status = common::dynload::get_workspace_size_double( cpu_label_lengths, cpu_input_lengths, static_cast(sequence_width), @@ -162,7 +162,7 @@ class WarpCTCFunctor { errors::PreconditionNotMet( "warp-ctc [version %d] Error in get_workspace_size: %s", warpctc_version_, - phi::dynload::ctcGetStatusString(status))); + common::dynload::ctcGetStatusString(status))); PADDLE_ENFORCE_GT( workspace_bytes, 0UL, @@ -197,12 +197,12 @@ class WarpCTCFunctor { errors::PreconditionNotMet( "warp-ctc [version %d] Error in get_workspace_size: %s", warpctc_version_, - phi::dynload::ctcGetStatusString(status))); + common::dynload::ctcGetStatusString(status))); } protected: void init(const Context& dev_ctx, const size_t blank) { - warpctc_version_ = phi::dynload::get_warpctc_version(); + warpctc_version_ = common::dynload::get_warpctc_version(); if (dev_ctx.GetPlace().GetType() == phi::AllocationType::GPU) { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) diff --git a/paddle/phi/kernels/impl/warprnnt_kernel_impl.h b/paddle/phi/kernels/impl/warprnnt_kernel_impl.h index f51041285aaee9..42834f3b224925 100644 --- a/paddle/phi/kernels/impl/warprnnt_kernel_impl.h +++ b/paddle/phi/kernels/impl/warprnnt_kernel_impl.h @@ -55,16 +55,16 @@ class ComputeRnntLossFunctor { float* costs, void* workspace, rnntOptions options) { - return phi::dynload::compute_rnnt_loss(activations, - gradients, - label, - label_lengths, - input_lengths, - static_cast(alphabet_size), - static_cast(minibatch), - costs, - workspace, - options); + return common::dynload::compute_rnnt_loss(activations, + gradients, + label, + label_lengths, + input_lengths, + static_cast(alphabet_size), + static_cast(minibatch), + costs, + workspace, + options); } }; @@ -81,16 +81,17 @@ class ComputeRnntLossFunctor { double* costs, void* workspace, rnntOptions options) { - return phi::dynload::compute_rnnt_loss_fp64(activations, - gradients, - label, - label_lengths, - input_lengths, - static_cast(alphabet_size), - static_cast(minibatch), - costs, - workspace, - options); + return common::dynload::compute_rnnt_loss_fp64( + activations, + gradients, + label, + label_lengths, + input_lengths, + static_cast(alphabet_size), + static_cast(minibatch), + costs, + workspace, + options); } }; @@ -148,7 +149,7 @@ class WarpRNNTFunctor { } size_t workspace_bytes = 0; - status = phi::dynload::get_rnnt_workspace_size( + status = common::dynload::get_rnnt_workspace_size( maxT, maxU, B, gpu, &workspace_bytes, sizeof(T)); PADDLE_ENFORCE_EQ( @@ -157,7 +158,7 @@ class WarpRNNTFunctor { errors::PreconditionNotMet( "warp-rnnt [version %d] Error in get_rnnt_workspace_size: %s", warprnnt_version_, - phi::dynload::rnntGetStatusString(status))); + common::dynload::rnntGetStatusString(status))); PADDLE_ENFORCE_GT( workspace_bytes, 0UL, @@ -189,7 +190,7 @@ class WarpRNNTFunctor { errors::PreconditionNotMet( "warp-rnnt [version %d] Error in get_workspace_size: %s", warprnnt_version_, - phi::dynload::rnntGetStatusString(status))); + common::dynload::rnntGetStatusString(status))); } protected: @@ -199,7 +200,7 @@ class WarpRNNTFunctor { const size_t blank, const float fastemit_lambda, const int num_threads) { - warprnnt_version_ = phi::dynload::get_warprnnt_version(); + warprnnt_version_ = common::dynload::get_warprnnt_version(); options_.maxT = maxT; options_.maxU = maxU; diff --git a/paddle/phi/kernels/kps/reduce_kernel.cu b/paddle/phi/kernels/kps/reduce_kernel.cu index 506bd36e828bc5..45704eb63ba5a9 100644 --- a/paddle/phi/kernels/kps/reduce_kernel.cu +++ b/paddle/phi/kernels/kps/reduce_kernel.cu @@ -13,7 +13,7 @@ // limitations under the License. #include -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/gpu/reduce.h" #include "paddle/phi/kernels/legacy/reduce_max_kernel.h" diff --git a/paddle/phi/kernels/legacy/cpu/one_hot_kernel.cc b/paddle/phi/kernels/legacy/cpu/one_hot_kernel.cc index 65d463af2cc5d6..f70169ab82ee7a 100644 --- a/paddle/phi/kernels/legacy/cpu/one_hot_kernel.cc +++ b/paddle/phi/kernels/legacy/cpu/one_hot_kernel.cc @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/common/data_type.h" #include "paddle/common/scalar.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/kernels/funcs/math_function.h" namespace phi { diff --git a/paddle/phi/kernels/legacy/xpu/one_hot_kernel.cc b/paddle/phi/kernels/legacy/xpu/one_hot_kernel.cc index 93b0e92b50fbda..1fc2cc972ad356 100644 --- a/paddle/phi/kernels/legacy/xpu/one_hot_kernel.cc +++ b/paddle/phi/kernels/legacy/xpu/one_hot_kernel.cc @@ -12,12 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/common/data_type.h" #include "paddle/common/scalar.h" #include "paddle/phi/backends/xpu/enforce_xpu.h" #include "paddle/phi/backends/xpu/xpu_context.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/core/utils/data_type.h" namespace phi { template diff --git a/paddle/phi/kernels/onednn/dequantize_kernel.cc b/paddle/phi/kernels/onednn/dequantize_kernel.cc index 384ca7ea1e6383..19ab10a23faa66 100644 --- a/paddle/phi/kernels/onednn/dequantize_kernel.cc +++ b/paddle/phi/kernels/onednn/dequantize_kernel.cc @@ -14,10 +14,10 @@ #include "paddle/phi/kernels/dequantize_kernel.h" +#include "paddle/common/enforce.h" #include "paddle/phi/backends/onednn/onednn_context.h" #include "paddle/phi/backends/onednn/onednn_helper.h" #include "paddle/phi/backends/onednn/onednn_reuse.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_registry.h" namespace phi { diff --git a/paddle/phi/kernels/primitive/datamover_primitives.h b/paddle/phi/kernels/primitive/datamover_primitives.h index 2a3579d99cfe67..a78045aa0dc7ca 100644 --- a/paddle/phi/kernels/primitive/datamover_primitives.h +++ b/paddle/phi/kernels/primitive/datamover_primitives.h @@ -20,7 +20,7 @@ #ifdef PADDLE_WITH_HIP #include #endif -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" namespace phi { namespace kps { diff --git a/paddle/phi/kernels/primitive/functor_primitives.h b/paddle/phi/kernels/primitive/functor_primitives.h index c742706a0b0222..318f5715b6ca7e 100644 --- a/paddle/phi/kernels/primitive/functor_primitives.h +++ b/paddle/phi/kernels/primitive/functor_primitives.h @@ -14,9 +14,9 @@ #pragma once +#include "paddle/common/enforce.h" #include "paddle/common/float16.h" #include "paddle/phi/common/amp_type_traits.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/kernels/funcs/eigen/extensions.h" namespace phi { diff --git a/paddle/phi/kernels/reverse_kernel.cc b/paddle/phi/kernels/reverse_kernel.cc index 771acacedf0243..cdf380780c2509 100644 --- a/paddle/phi/kernels/reverse_kernel.cc +++ b/paddle/phi/kernels/reverse_kernel.cc @@ -14,7 +14,7 @@ #include "paddle/phi/kernels/reverse_kernel.h" -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" diff --git a/paddle/phi/kernels/selected_rows/elementwise_multiply_kernel.cc b/paddle/phi/kernels/selected_rows/elementwise_multiply_kernel.cc index 6400aa1c2c891a..70f45cd1926ac4 100644 --- a/paddle/phi/kernels/selected_rows/elementwise_multiply_kernel.cc +++ b/paddle/phi/kernels/selected_rows/elementwise_multiply_kernel.cc @@ -16,9 +16,9 @@ limitations under the License. */ #include "paddle/common/bfloat16.h" #include "paddle/common/complex.h" +#include "paddle/common/enforce.h" #include "paddle/common/float16.h" #include "paddle/phi/backends/cpu/cpu_context.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/elementwise_multiply_kernel.h" diff --git a/paddle/phi/kernels/sparse/cpu/elementwise_grad_kernel.cc b/paddle/phi/kernels/sparse/cpu/elementwise_grad_kernel.cc index 88a01e1135b7bd..85cb01fffbc10f 100644 --- a/paddle/phi/kernels/sparse/cpu/elementwise_grad_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/elementwise_grad_kernel.cc @@ -17,8 +17,8 @@ limitations under the License. */ #include "glog/logging.h" +#include "paddle/common/enforce.h" #include "paddle/phi/backends/cpu/cpu_context.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_meta.h" #include "paddle/phi/core/tensor_utils.h" diff --git a/paddle/phi/kernels/sparse/cpu/elementwise_kernel.cc b/paddle/phi/kernels/sparse/cpu/elementwise_kernel.cc index 72e3d00962b5dc..077ded01e364fa 100644 --- a/paddle/phi/kernels/sparse/cpu/elementwise_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/elementwise_kernel.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/phi/kernels/sparse/elementwise_kernel.h" -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_meta.h" #include "paddle/phi/core/tensor_utils.h" diff --git a/paddle/phi/kernels/sparse/cpu/mask_kernel.cc b/paddle/phi/kernels/sparse/cpu/mask_kernel.cc index d4e240d5e82039..e427ff002875b7 100644 --- a/paddle/phi/kernels/sparse/cpu/mask_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/mask_kernel.cc @@ -14,9 +14,9 @@ limitations under the License. */ #include "paddle/phi/kernels/sparse/mask_kernel.h" +#include "paddle/common/ddim.h" +#include "paddle/common/enforce.h" #include "paddle/phi/api/ext/dispatch.h" -#include "paddle/phi/core/ddim.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/core/visit_type.h" diff --git a/paddle/phi/kernels/sparse/cpu/reshape_kernel.cc b/paddle/phi/kernels/sparse/cpu/reshape_kernel.cc index e8badf3d6e8248..2ed6f8be3be80a 100644 --- a/paddle/phi/kernels/sparse/cpu/reshape_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/reshape_kernel.cc @@ -14,7 +14,7 @@ #include "paddle/phi/kernels/sparse/unary_kernel.h" -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" #include "paddle/phi/kernels/sparse/sparse_utils_kernel.h" #include "paddle/phi/backends/cpu/cpu_context.h" diff --git a/paddle/phi/kernels/sparse/cpu/slice_kernel.cc b/paddle/phi/kernels/sparse/cpu/slice_kernel.cc index c40be8a9b15799..81af8339f88a91 100644 --- a/paddle/phi/kernels/sparse/cpu/slice_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/slice_kernel.cc @@ -14,8 +14,8 @@ #include "paddle/phi/kernels/sparse/unary_kernel.h" +#include "paddle/common/ddim.h" #include "paddle/phi/backends/cpu/cpu_context.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/funcs/slice_utils.h" diff --git a/paddle/phi/kernels/sparse/gpu/addmm_kernel.cu b/paddle/phi/kernels/sparse/gpu/addmm_kernel.cu index 1a43009c519b6c..58d4d94876ea33 100644 --- a/paddle/phi/kernels/sparse/gpu/addmm_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/addmm_kernel.cu @@ -16,9 +16,9 @@ limitations under the License. */ #include +#include "paddle/common/ddim.h" +#include "paddle/common/enforce.h" #include "paddle/phi/backends/gpu/gpu_context.h" -#include "paddle/phi/core/ddim.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/sparse/sparse_blas.h" diff --git a/paddle/phi/kernels/sparse/gpu/conv.cu.h b/paddle/phi/kernels/sparse/gpu/conv.cu.h index 689629c9393388..3e4c6535c699b7 100644 --- a/paddle/phi/kernels/sparse/gpu/conv.cu.h +++ b/paddle/phi/kernels/sparse/gpu/conv.cu.h @@ -25,10 +25,10 @@ namespace cub = hipcub; #endif #include "paddle/phi/kernels/sparse/conv_kernel.h" +#include "paddle/common/enforce.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_info.h" #include "paddle/phi/backends/gpu/gpu_launch_config.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/aligned_vector.h" #include "paddle/phi/kernels/funcs/index_impl.cu.h" diff --git a/paddle/phi/kernels/sparse/gpu/elementwise_kernel.cu b/paddle/phi/kernels/sparse/gpu/elementwise_kernel.cu index 47daa1eae19eda..711095df31a4b6 100644 --- a/paddle/phi/kernels/sparse/gpu/elementwise_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/elementwise_kernel.cu @@ -19,7 +19,7 @@ limitations under the License. */ #include "paddle/phi/kernels/sparse/elementwise_kernel.h" #include "paddle/phi/kernels/sparse/empty_kernel.h" -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/visit_type.h" diff --git a/paddle/phi/kernels/sparse/gpu/mask_kernel.cu b/paddle/phi/kernels/sparse/gpu/mask_kernel.cu index 3b93ff9638c052..ff90632911b909 100644 --- a/paddle/phi/kernels/sparse/gpu/mask_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/mask_kernel.cu @@ -14,10 +14,10 @@ limitations under the License. */ #include "paddle/phi/kernels/sparse/mask_kernel.h" +#include "paddle/common/ddim.h" +#include "paddle/common/enforce.h" #include "paddle/phi/backends/gpu/gpu_info.h" #include "paddle/phi/backends/gpu/gpu_launch_config.h" -#include "paddle/phi/core/ddim.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/core/visit_type.h" diff --git a/paddle/phi/kernels/sparse/gpu/matmul_kernel.cu b/paddle/phi/kernels/sparse/gpu/matmul_kernel.cu index f39209e9b8604d..bb8b35a397a60d 100644 --- a/paddle/phi/kernels/sparse/gpu/matmul_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/matmul_kernel.cu @@ -16,9 +16,9 @@ limitations under the License. */ #include +#include "paddle/common/ddim.h" +#include "paddle/common/enforce.h" #include "paddle/phi/backends/gpu/gpu_context.h" -#include "paddle/phi/core/ddim.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/meta_tensor.h" #include "paddle/phi/core/sparse_coo_tensor.h" diff --git a/paddle/phi/kernels/sparse/gpu/mv_kernel.cu b/paddle/phi/kernels/sparse/gpu/mv_kernel.cu index 27f094fb0fa982..a921ab6d9de59b 100644 --- a/paddle/phi/kernels/sparse/gpu/mv_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/mv_kernel.cu @@ -16,8 +16,8 @@ limitations under the License. */ #include +#include "paddle/common/ddim.h" #include "paddle/phi/backends/gpu/gpu_context.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/sparse/sparse_blas.h" diff --git a/paddle/phi/kernels/sparse/gpu/slice_kernel.cu b/paddle/phi/kernels/sparse/gpu/slice_kernel.cu index f47accfc8eff81..b96883c0ea3e17 100644 --- a/paddle/phi/kernels/sparse/gpu/slice_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/slice_kernel.cu @@ -17,11 +17,11 @@ #include "paddle/phi/kernels/sparse/unary_kernel.h" +#include "paddle/common/ddim.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/backends/gpu/gpu_primitives.h" #include "paddle/phi/common/memory_utils.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/visit_type.h" #include "paddle/phi/kernels/empty_kernel.h" diff --git a/paddle/phi/kernels/sparse/gpu/sparse_utils_kernel.cu b/paddle/phi/kernels/sparse/gpu/sparse_utils_kernel.cu index 084cb0e60bb6de..ae2a20cea29cc2 100644 --- a/paddle/phi/kernels/sparse/gpu/sparse_utils_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/sparse_utils_kernel.cu @@ -20,9 +20,9 @@ limitations under the License. */ #ifdef PADDLE_WITH_HIP #include "paddle/phi/backends/dynload/rocsparse.h" #endif +#include "paddle/common/enforce.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_launch_config.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_meta.h" #include "paddle/phi/core/visit_type.h" @@ -292,12 +292,12 @@ void CsrToCooGPUKernel(const GPUContext& dev_ctx, #ifdef PADDLE_WITH_HIP dev_ctx.CusparseCall([&](rocsparse_handle handle) { - phi::dynload::rocsparse_csr2coo(handle, - csr_crows_data, - non_zero_num, - rows, - coo_rows_data, - rocsparse_index_base_zero); + common::dynload::rocsparse_csr2coo(handle, + csr_crows_data, + non_zero_num, + rows, + coo_rows_data, + rocsparse_index_base_zero); }); #else auto config = phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, rows, 1); diff --git a/paddle/phi/kernels/sparse/unary_kernel.h b/paddle/phi/kernels/sparse/unary_kernel.h index 24bf4f131f6101..dff8742f5afc79 100644 --- a/paddle/phi/kernels/sparse/unary_kernel.h +++ b/paddle/phi/kernels/sparse/unary_kernel.h @@ -14,8 +14,8 @@ #pragma once +#include "paddle/common/ddim.h" #include "paddle/phi/common/int_array.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/sparse_coo_tensor.h" #include "paddle/phi/core/sparse_csr_tensor.h" diff --git a/paddle/phi/kernels/triangular_solve_grad_kernel.h b/paddle/phi/kernels/triangular_solve_grad_kernel.h index eb5a5ab461a1dc..1b51ad50d3246a 100644 --- a/paddle/phi/kernels/triangular_solve_grad_kernel.h +++ b/paddle/phi/kernels/triangular_solve_grad_kernel.h @@ -14,9 +14,9 @@ #pragma once +#include "paddle/common/ddim.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/backends/gpu/gpu_context.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/dense_tensor.h" namespace phi { diff --git a/paddle/phi/kernels/xpu/adam_kernel.cc b/paddle/phi/kernels/xpu/adam_kernel.cc index a4c0d017d82de0..ad1ea4b96a043c 100644 --- a/paddle/phi/kernels/xpu/adam_kernel.cc +++ b/paddle/phi/kernels/xpu/adam_kernel.cc @@ -16,9 +16,9 @@ #include "glog/logging.h" +#include "paddle/common/enforce.h" #include "paddle/common/float16.h" #include "paddle/phi/backends/xpu/xpu_context.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/adam_functors.h" diff --git a/paddle/phi/kernels/xpu/arg_min_max_kernel.cc b/paddle/phi/kernels/xpu/arg_min_max_kernel.cc index b5b2ed7d328884..f2b4dbbc08d39f 100644 --- a/paddle/phi/kernels/xpu/arg_min_max_kernel.cc +++ b/paddle/phi/kernels/xpu/arg_min_max_kernel.cc @@ -14,10 +14,10 @@ #include "paddle/phi/kernels/arg_min_max_kernel.h" +#include "paddle/common/data_type.h" +#include "paddle/common/ddim.h" #include "paddle/phi/backends/xpu/xpu_context.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/kernels/funcs/math_function.h" namespace phi { diff --git a/paddle/phi/kernels/xpu/index_sample_grad_kernel.cc b/paddle/phi/kernels/xpu/index_sample_grad_kernel.cc index 22c35ef46840fc..fa2d481f1afaeb 100644 --- a/paddle/phi/kernels/xpu/index_sample_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/index_sample_grad_kernel.cc @@ -14,9 +14,9 @@ #include "paddle/phi/kernels/index_sample_grad_kernel.h" +#include "paddle/common/data_type.h" #include "paddle/phi/backends/xpu/enforce_xpu.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/core/utils/data_type.h" namespace phi { diff --git a/paddle/phi/kernels/xpu/index_select_grad_kernel.cc b/paddle/phi/kernels/xpu/index_select_grad_kernel.cc index 14bfce38799f0c..a7bc62a54430b9 100644 --- a/paddle/phi/kernels/xpu/index_select_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/index_select_grad_kernel.cc @@ -14,9 +14,9 @@ #include "paddle/phi/kernels/index_select_grad_kernel.h" +#include "paddle/common/data_type.h" #include "paddle/phi/backends/xpu/enforce_xpu.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/core/utils/data_type.h" namespace phi { diff --git a/paddle/phi/kernels/xpu/index_select_kernel.cc b/paddle/phi/kernels/xpu/index_select_kernel.cc index 75c19aa028bce7..b4c63203f1068f 100644 --- a/paddle/phi/kernels/xpu/index_select_kernel.cc +++ b/paddle/phi/kernels/xpu/index_select_kernel.cc @@ -13,10 +13,10 @@ // limitations under the License. #include "paddle/phi/kernels/index_select_kernel.h" +#include "paddle/common/data_type.h" #include "paddle/phi/backends/xpu/enforce_xpu.h" #include "paddle/phi/common/memory_utils.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/core/utils/data_type.h" namespace phi { diff --git a/paddle/phi/kernels/xpu/kldiv_loss_grad_kernel.cc b/paddle/phi/kernels/xpu/kldiv_loss_grad_kernel.cc index 5d2c750a4dfa33..64278d50e8ce5a 100644 --- a/paddle/phi/kernels/xpu/kldiv_loss_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/kldiv_loss_grad_kernel.cc @@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/common/enforce.h" #include "paddle/phi/backends/xpu/enforce_xpu.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/softmax_kernel.h" diff --git a/paddle/phi/kernels/xpu/kldiv_loss_kernel.cc b/paddle/phi/kernels/xpu/kldiv_loss_kernel.cc index 4ef917f008ab9e..bf1c58855184d2 100644 --- a/paddle/phi/kernels/xpu/kldiv_loss_kernel.cc +++ b/paddle/phi/kernels/xpu/kldiv_loss_kernel.cc @@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/common/enforce.h" #include "paddle/phi/backends/xpu/enforce_xpu.h" -#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/softmax_kernel.h" diff --git a/paddle/phi/kernels/xpu/one_hot_kernel.cc b/paddle/phi/kernels/xpu/one_hot_kernel.cc index ad96d4858f7ed6..162fcf805ab4d4 100644 --- a/paddle/phi/kernels/xpu/one_hot_kernel.cc +++ b/paddle/phi/kernels/xpu/one_hot_kernel.cc @@ -13,10 +13,10 @@ // limitations under the License. #include "paddle/phi/kernels/one_hot_kernel.h" +#include "paddle/common/data_type.h" #include "paddle/phi/backends/xpu/enforce_xpu.h" #include "paddle/phi/backends/xpu/xpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/core/utils/data_type.h" namespace phi { template diff --git a/paddle/phi/kernels/xpu/unique_kernel.cc b/paddle/phi/kernels/xpu/unique_kernel.cc index 6f2d8f470a2120..7cb4ceb97c652c 100644 --- a/paddle/phi/kernels/xpu/unique_kernel.cc +++ b/paddle/phi/kernels/xpu/unique_kernel.cc @@ -19,10 +19,10 @@ #include "paddle/phi/kernels/unique_kernel.h" +#include "paddle/common/data_type.h" #include "paddle/phi/backends/xpu/enforce_xpu.h" #include "paddle/phi/common/memory_utils.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/core/visit_type.h" namespace phi { diff --git a/test/cpp/fluid/math/selected_rows_functor_test.cu.cc b/test/cpp/fluid/math/selected_rows_functor_test.cu.cc index b507f096082f94..20cffb62d43b4c 100644 --- a/test/cpp/fluid/math/selected_rows_functor_test.cu.cc +++ b/test/cpp/fluid/math/selected_rows_functor_test.cu.cc @@ -15,10 +15,10 @@ limitations under the License. */ #include "paddle/phi/kernels/funcs/selected_rows_functor.h" #include "gtest/gtest.h" +#include "paddle/common/errors.h" #include "paddle/phi/backends/context_pool.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/test/cpp/phi/core/test_tensor_array.cc b/test/cpp/phi/core/test_tensor_array.cc index 201790a7bc0e10..ae2685d6fc98e7 100644 --- a/test/cpp/phi/core/test_tensor_array.cc +++ b/test/cpp/phi/core/test_tensor_array.cc @@ -17,9 +17,9 @@ limitations under the License. */ #include #include "gtest/gtest.h" +#include "paddle/common/errors.h" #include "paddle/phi/backends/all_context.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/core/tensor_array.h" #include "test/cpp/phi/core/allocator.h"