From 528faed344119cc339a8425acef7794e05a9d505 Mon Sep 17 00:00:00 2001 From: Bo Zhang <105368690+zhangbopd@users.noreply.github.com> Date: Mon, 4 Dec 2023 17:00:41 +0800 Subject: [PATCH] =?UTF-8?q?[Cmake=20=E6=B2=BB=E7=90=86]=20Move=20DDim=20et?= =?UTF-8?q?c.=20to=20common=20(#59105)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix conflict * exception * kunlun ci * WIN_CI * setup.py * bug_fix * hash * auto_code_gen_WIN_CI * inference_CI * use_common_enforce * delete pir_enforce * delete_error * change_cmake * conflict * cmake * mac_CI * inference_copy * delete_pybind_common * paddle_test * split ddim constructor * cc_test * use cinn::common * copy_infer * delete_layer_test_new * bug_fix * infer * fix inference bug * conflict --------- Co-authored-by: winter-wang <1030748926@qq.com> --- cmake/generic.cmake | 2 +- cmake/inference_lib.cmake | 44 +-- paddle/cinn/api/tensor_node.h | 18 +- paddle/cinn/ast_gen_ius/ast_gen.cc | 6 +- .../cinn/auto_schedule/analysis/analyze_ir.cc | 4 +- .../cinn/auto_schedule/analysis/analyze_ir.h | 2 +- .../auto_schedule/analysis/analyze_ir_test.cc | 12 +- paddle/cinn/auto_schedule/auto_tuner.cc | 4 +- paddle/cinn/auto_schedule/auto_tuner.h | 4 +- paddle/cinn/auto_schedule/auto_tuner_test.cc | 4 +- .../cost_model/expr_cost_model.cc | 6 +- .../cost_model/expr_cost_model.h | 6 +- .../cinn/auto_schedule/cost_model/feature.cc | 6 +- .../cinn/auto_schedule/cost_model/feature.h | 4 +- .../cost_model/feature_extractor.cc | 16 +- .../cost_model/feature_extractor.h | 3 +- .../cost_model/feature_extractor_test.cc | 8 +- .../cost_model/xgb_cost_model.cc | 2 +- .../database/jsonfile_database_test.cc | 2 +- .../auto_schedule/measure/measurer_test.cc | 6 +- .../auto_schedule/measure/simple_runner.cc | 18 +- .../measure/simple_runner_test.cc | 10 +- .../cooperative_process_test.cc | 2 +- .../search_space/auto_gen_rule/auto_bind.h | 2 +- .../auto_gen_rule/auto_bind_test.cc | 4 +- .../auto_gen_rule/auto_gen_rule.cc | 3 +- .../auto_gen_rule/auto_gen_rule.h | 4 +- .../search_space/auto_gen_rule/auto_inline.cc | 2 +- .../search_space/auto_gen_rule/auto_inline.h | 2 +- .../auto_gen_rule/auto_inline_test.cc | 14 +- .../search_space/auto_gen_rule/auto_unroll.h | 3 +- .../auto_gen_rule/auto_unroll_test.cc | 8 +- .../auto_gen_rule/mix_rules_test.cc | 2 +- .../auto_gen_rule/multi_level_tiling.cc | 8 +- .../auto_gen_rule/multi_level_tiling.h | 4 +- .../auto_gen_rule/multi_level_tiling_test.cc | 22 +- .../auto_gen_rule/reduction_factoring.cc | 2 +- .../auto_gen_rule/reduction_factoring.h | 2 +- .../auto_gen_rule/reduction_factoring_test.cc | 4 +- .../search_space/auto_gen_rule/skip_rule.cc | 2 +- .../search_space/auto_gen_rule/skip_rule.h | 2 +- .../auto_gen_rule/skip_rule_test.cc | 8 +- .../search_space/auto_gen_rule/test_helper.cc | 20 +- .../search_space/auto_gen_rule/test_helper.h | 6 +- .../search_space/rule_sampler_test.cc | 4 +- .../search_space/search_state.cc | 3 +- .../auto_schedule/search_space/search_state.h | 4 +- .../search_space/search_state_test.cc | 2 +- .../evolutionary_search_test.cc | 10 +- .../mutate_rule/mutate_tile_size_test.cc | 4 +- .../auto_schedule/task/task_creator_test.cc | 4 +- .../cinn/auto_schedule/task/task_optimizer.cc | 8 +- .../auto_schedule/task/task_registry_test.cc | 10 +- paddle/cinn/auto_schedule/task/tune_task.cc | 3 +- paddle/cinn/auto_schedule/task/tune_task.h | 2 +- .../cinn/auto_schedule/task/tune_task_test.cc | 18 +- .../tests/performance_comparison_test.cc | 15 +- paddle/cinn/backends/codegen_c.cc | 9 +- paddle/cinn/backends/codegen_c_test.cc | 8 +- paddle/cinn/backends/codegen_cuda_dev.cc | 5 +- paddle/cinn/backends/codegen_cuda_util.h | 5 +- paddle/cinn/backends/codegen_debug_test.cc | 2 +- paddle/cinn/backends/compiler_test.cc | 63 ++-- paddle/cinn/backends/ir_schedule_test.cc | 94 +++--- paddle/cinn/backends/llvm/codegen_llvm.cc | 17 +- paddle/cinn/backends/llvm/codegen_llvm.h | 2 +- .../cinn/backends/llvm/codegen_llvm_test.cc | 111 ++++--- paddle/cinn/backends/llvm/codegen_x86.cc | 4 +- paddle/cinn/backends/llvm/codegen_x86_test.cc | 15 +- .../backends/llvm/execution_engine_test.cc | 14 +- paddle/cinn/backends/llvm/llvm_intrin_rule.h | 2 +- paddle/cinn/backends/llvm/llvm_util.cc | 10 +- paddle/cinn/backends/llvm/llvm_util.h | 2 +- paddle/cinn/backends/nvrtc/nvrtc_util.cc | 4 +- paddle/cinn/common/arithmatic.cc | 2 +- paddle/cinn/common/axis.cc | 2 +- paddle/cinn/common/cas.cc | 6 +- paddle/cinn/common/cas_test.cc | 6 +- paddle/cinn/common/cinn_value.h | 2 +- paddle/cinn/common/cinn_value_test.cc | 2 +- paddle/cinn/common/common.h | 32 +- .../cinn/common/equation_graph_topo_walker.h | 2 +- paddle/cinn/common/graph_utils.cc | 2 +- paddle/cinn/common/graph_utils.h | 12 +- paddle/cinn/common/ir_util.cc | 6 +- paddle/cinn/common/ir_util.h | 4 +- paddle/cinn/common/make_subgraph_walker.h | 14 +- paddle/cinn/common/union_find.h | 2 +- paddle/cinn/frontend/computation_test.cc | 20 +- paddle/cinn/frontend/decomposer/activation.cc | 22 +- .../frontend/decomposer/activation_test.cc | 2 +- paddle/cinn/frontend/decomposer/batch_norm.cc | 57 ++-- .../frontend/decomposer/batch_norm_test.cc | 4 +- paddle/cinn/frontend/decomposer/test_helper.h | 4 +- paddle/cinn/frontend/decomposer/top_k_test.cc | 2 +- paddle/cinn/frontend/decomposer_registry.h | 4 +- .../cinn/frontend/decomposer_registry_test.cc | 2 +- paddle/cinn/frontend/interpreter_test.cc | 3 +- paddle/cinn/frontend/net_builder.cc | 11 +- paddle/cinn/frontend/net_builder.h | 17 +- paddle/cinn/frontend/net_builder_test.cc | 98 +++--- paddle/cinn/frontend/op_mapper_registry.h | 6 +- .../cinn/frontend/op_mappers/common_utils.h | 2 +- .../cinn/frontend/op_mappers/paddle/clip.cc | 19 +- .../frontend/op_mappers/paddle/constant.cc | 4 +- .../cinn/frontend/op_mappers/paddle/cumsum.cc | 2 +- .../frontend/op_mappers/paddle/elementwise.cc | 2 +- .../frontend/op_mappers/paddle/layer_norm.cc | 17 +- .../cinn/frontend/op_mappers/paddle/norm.cc | 13 +- .../cinn/frontend/op_mappers/paddle/reduce.cc | 2 +- .../cinn/frontend/op_mappers/paddle/scale.cc | 5 +- .../frontend/op_mappers/paddle/scatter.cc | 12 +- .../frontend/op_mappers/science/broadcast.cc | 2 +- .../frontend/op_mappers/science/transform.cc | 2 +- paddle/cinn/frontend/optimize.cc | 4 +- paddle/cinn/frontend/optimize.h | 4 +- paddle/cinn/frontend/paddle/model_parser.cc | 10 +- paddle/cinn/frontend/paddle/model_parser.h | 25 +- .../cinn/frontend/paddle_model_convertor.cc | 6 +- paddle/cinn/frontend/paddle_model_convertor.h | 4 +- .../frontend/paddle_model_convertor_test.cc | 2 +- .../cinn/frontend/paddle_model_to_program.cc | 3 +- .../cinn/frontend/paddle_model_to_program.h | 4 +- paddle/cinn/frontend/pass/auto_broadcast.cc | 2 +- paddle/cinn/frontend/pass/auto_cast.cc | 7 +- paddle/cinn/frontend/pass/auto_cast_test.cc | 18 +- paddle/cinn/frontend/pass/cast_collapsing.cc | 2 +- .../frontend/pass/cast_collapsing_test.cc | 16 +- .../cinn/frontend/pass/dead_code_eliminate.cc | 2 +- .../frontend/pass/dead_code_eliminate_test.cc | 4 +- paddle/cinn/frontend/pass/decomposer.cc | 2 +- paddle/cinn/frontend/pass/decomposer_test.cc | 4 +- .../frontend/pass/expand_zero_dim_pass.cc | 2 +- .../pass/expand_zero_dim_pass_test.cc | 4 +- .../frontend/pass/fill_constant_folding.cc | 2 +- .../pass/fill_constant_folding_test.cc | 8 +- .../frontend/pass/fill_constant_rewriter.cc | 2 +- paddle/cinn/frontend/pass/gemm_rewriter.cc | 2 +- .../cinn/frontend/pass/gemm_rewriter_test.cc | 20 +- paddle/cinn/frontend/pass/pass_test_helper.h | 8 +- .../frontend/pass/program_topoerror_test.cc | 2 +- paddle/cinn/frontend/pass/remove_identity.cc | 2 +- paddle/cinn/frontend/pass/test_helper.h | 2 +- .../frontend/pass/transpose_collapsing.cc | 2 +- .../pass/transpose_collapsing_test.cc | 22 +- .../frontend/pass/transpose_folding_base.h | 2 +- .../pass/transpose_folding_input_test.cc | 18 +- .../pass/transpose_folding_output_test.cc | 40 +-- .../pass/transpose_scale_folding_test.cc | 28 +- paddle/cinn/frontend/program_pass.cc | 2 +- paddle/cinn/frontend/program_pass.h | 6 +- paddle/cinn/frontend/syntax.cc | 9 +- paddle/cinn/frontend/syntax.h | 37 +-- paddle/cinn/frontend/syntax_test.cc | 6 +- paddle/cinn/frontend/var_type_utils.h | 7 +- .../hlir/dialect/operator/ir/manual_op.cc | 2 +- .../add_broadcast_to_elementwise_pass.cc | 2 +- .../group_with_group_merge_pass.cc | 16 +- .../group_with_group_merge_pass_utils.h | 5 +- .../group_merge/group_with_group_merge_util.h | 24 +- .../group_merge/op_with_group_merge_pass.cc | 7 +- .../group_merge/op_with_group_merge_util.h | 30 +- .../hlir/dialect/runtime/ir/jit_kernel_op.cc | 2 +- .../cinn/hlir/framework/accuracy_checker.cc | 8 +- .../hlir/framework/accuracy_checker_test.cc | 10 +- paddle/cinn/hlir/framework/buffer.cc | 10 +- paddle/cinn/hlir/framework/buffer.h | 16 +- paddle/cinn/hlir/framework/buffer_test.cc | 4 +- paddle/cinn/hlir/framework/graph.cc | 20 +- paddle/cinn/hlir/framework/graph.h | 12 +- paddle/cinn/hlir/framework/graph_compiler.cc | 20 +- paddle/cinn/hlir/framework/graph_compiler.h | 2 +- .../hlir/framework/graph_compiler_test.cc | 14 +- paddle/cinn/hlir/framework/graph_test.cc | 4 +- paddle/cinn/hlir/framework/instruction.cc | 12 +- paddle/cinn/hlir/framework/instruction.h | 2 +- .../cinn/hlir/framework/instruction_test.cc | 68 +++-- paddle/cinn/hlir/framework/memory.cc | 2 +- paddle/cinn/hlir/framework/memory.h | 5 +- paddle/cinn/hlir/framework/node.cc | 49 +-- paddle/cinn/hlir/framework/node.h | 24 +- paddle/cinn/hlir/framework/op_lowering.h | 2 +- .../cinn/hlir/framework/op_lowering_impl.cc | 37 +-- paddle/cinn/hlir/framework/op_lowering_impl.h | 2 +- .../cinn/hlir/framework/op_lowering_test.cc | 22 +- .../cinn/hlir/framework/op_lowering_util.cc | 16 +- paddle/cinn/hlir/framework/op_strategy.h | 10 +- paddle/cinn/hlir/framework/op_test.cc | 10 +- .../cinn/hlir/framework/parallel_compiler.cc | 5 +- .../hlir/framework/parallel_compiler_test.cc | 6 +- .../hlir/framework/pir/compilation_task.cc | 3 +- .../hlir/framework/pir/op_lowering_impl.cc | 49 +-- .../hlir/framework/pir/op_lowering_impl.h | 2 +- .../hlir/framework/pir/op_lowering_util.cc | 8 +- paddle/cinn/hlir/framework/pir/utils.cc | 8 +- paddle/cinn/hlir/framework/pir/utils.h | 4 +- .../hlir/framework/print_graph_pass_test.cc | 2 +- paddle/cinn/hlir/framework/schedule.h | 2 +- paddle/cinn/hlir/framework/scope_test.cc | 2 +- paddle/cinn/hlir/framework/tensor.h | 8 +- paddle/cinn/hlir/framework/tensor_test.cc | 2 +- .../cinn/hlir/framework/visualize_helper.cc | 6 +- paddle/cinn/hlir/framework/visualize_helper.h | 4 +- paddle/cinn/hlir/op/broadcast.cc | 6 +- paddle/cinn/hlir/op/contrib/argmax.cc | 16 +- paddle/cinn/hlir/op/contrib/argmax.h | 2 +- paddle/cinn/hlir/op/contrib/argmax_test.cc | 4 +- paddle/cinn/hlir/op/contrib/argmin.cc | 16 +- paddle/cinn/hlir/op/contrib/argmin.h | 2 +- paddle/cinn/hlir/op/contrib/argmin_test.cc | 4 +- paddle/cinn/hlir/op/contrib/assert_true.cc | 4 +- .../cinn/hlir/op/contrib/bitcast_convert.cc | 10 +- paddle/cinn/hlir/op/contrib/cholesky.cc | 4 +- paddle/cinn/hlir/op/contrib/gather_nd.cc | 21 +- paddle/cinn/hlir/op/contrib/gather_nd_test.cc | 4 +- .../cinn/hlir/op/contrib/gaussian_random.cc | 6 +- .../hlir/op/contrib/logical_right_shift.cc | 10 +- .../op/contrib/logical_right_shift_test.cc | 4 +- paddle/cinn/hlir/op/contrib/lookup_table.cc | 8 +- .../cinn/hlir/op/contrib/lookup_table_test.cc | 8 +- paddle/cinn/hlir/op/contrib/one_hot.cc | 18 +- paddle/cinn/hlir/op/contrib/one_hot_test.cc | 6 +- paddle/cinn/hlir/op/contrib/randint.cc | 6 +- paddle/cinn/hlir/op/contrib/reciprocal.cc | 8 +- .../cinn/hlir/op/contrib/reciprocal_test.cc | 4 +- paddle/cinn/hlir/op/contrib/repeat.cc | 20 +- paddle/cinn/hlir/op/contrib/repeat_test.cc | 4 +- paddle/cinn/hlir/op/contrib/resize.cc | 48 +-- paddle/cinn/hlir/op/contrib/resize.h | 2 +- paddle/cinn/hlir/op/contrib/sort.cc | 36 +-- paddle/cinn/hlir/op/contrib/sort.h | 4 +- paddle/cinn/hlir/op/contrib/sort_test.cc | 8 +- .../cinn/hlir/op/contrib/triangular_solve.cc | 4 +- paddle/cinn/hlir/op/contrib/uniform_random.cc | 6 +- paddle/cinn/hlir/op/custom_call.cc | 98 +++--- paddle/cinn/hlir/op/elementwise.cc | 46 +-- paddle/cinn/hlir/op/external_api_registry.cc | 10 +- paddle/cinn/hlir/op/external_api_registry.h | 9 +- .../hlir/op/external_api_registry_test.cc | 14 +- paddle/cinn/hlir/op/nn.cc | 16 +- paddle/cinn/hlir/op/op_broadcast_test.cc | 80 ++--- paddle/cinn/hlir/op/op_nn_test.cc | 105 ++++--- paddle/cinn/hlir/op/op_util.cc | 24 +- paddle/cinn/hlir/op/op_util.h | 9 +- paddle/cinn/hlir/op/reduction.cc | 12 +- paddle/cinn/hlir/op/reduction_test.cc | 37 +-- paddle/cinn/hlir/op/transform.cc | 6 +- paddle/cinn/hlir/op/transform_test.cc | 18 +- paddle/cinn/hlir/pass/alterlayout.cc | 52 ++-- paddle/cinn/hlir/pass/alterlayout_test.cc | 16 +- .../hlir/pass/check_fusion_accuracy_pass.cc | 12 +- .../pass/check_fusion_accuracy_pass_test.cc | 32 +- .../pass/common_subexpression_elimination.cc | 6 +- .../common_subexpression_elimination_test.cc | 6 +- paddle/cinn/hlir/pass/const_propagate.cc | 2 +- paddle/cinn/hlir/pass/const_propagate_test.cc | 4 +- .../cinn/hlir/pass/constant_folding_pass.cc | 4 +- .../hlir/pass/constant_folding_pass_test.cc | 2 +- .../hlir/pass/constant_folding_pass_util.cc | 4 +- paddle/cinn/hlir/pass/custom_call_pass.cc | 6 +- paddle/cinn/hlir/pass/dce_pass.cc | 4 +- paddle/cinn/hlir/pass/dce_pass_test.cc | 4 +- paddle/cinn/hlir/pass/dense_merge_pass.cc | 4 +- .../cinn/hlir/pass/dense_merge_pass_test.cc | 2 +- paddle/cinn/hlir/pass/dot_merger.cc | 12 +- paddle/cinn/hlir/pass/dot_merger_test.cc | 2 +- paddle/cinn/hlir/pass/fusion_helper_base.h | 5 +- paddle/cinn/hlir/pass/fusion_merge_pass.cc | 4 +- .../cinn/hlir/pass/fusion_merge_pass_test.cc | 36 +-- .../cinn/hlir/pass/fusion_merge_pass_util.h | 4 +- .../hlir/pass/general_fusion_merge_pass.cc | 4 +- .../graph_group_fuse_helper.h | 4 +- .../pass/general_fusion_merge_pass_utils.h | 2 +- paddle/cinn/hlir/pass/infershape.cc | 4 +- paddle/cinn/hlir/pass/infershape.h | 2 +- paddle/cinn/hlir/pass/op_fusion_pass.cc | 4 +- paddle/cinn/hlir/pass/op_fusion_pass_test.cc | 22 +- paddle/cinn/hlir/pass/op_fusion_pass_util.h | 4 +- paddle/cinn/hlir/pass/opfusion.cc | 10 +- paddle/cinn/hlir/pass/opfusion_test.cc | 20 +- paddle/cinn/hlir/pass/reduce_split_pass.cc | 34 +-- .../cinn/hlir/pass/reduce_split_pass_test.cc | 2 +- .../hlir/pass/single_group_optimize_pass.cc | 8 +- paddle/cinn/hlir/pass/test_dot_merger.cc | 4 +- paddle/cinn/hlir/pass/test_primitive_ops.cc | 6 +- paddle/cinn/hlir/pe/broadcast.cc | 6 +- paddle/cinn/hlir/pe/broadcast.h | 18 +- paddle/cinn/hlir/pe/elementwise.cc | 2 +- paddle/cinn/hlir/pe/elementwise.h | 15 +- paddle/cinn/hlir/pe/ir_schedule_pe.cc | 50 +-- paddle/cinn/hlir/pe/ir_schedule_pe.h | 38 +-- paddle/cinn/hlir/pe/load_params_test.cc | 2 +- paddle/cinn/hlir/pe/map_expr_to_ir.cc | 6 +- paddle/cinn/hlir/pe/map_expr_to_ir.h | 2 +- paddle/cinn/hlir/pe/nn.cc | 147 ++++----- paddle/cinn/hlir/pe/nn.h | 4 +- paddle/cinn/hlir/pe/nn_util.cc | 10 +- paddle/cinn/hlir/pe/pe_broadcast_test.cc | 48 +-- paddle/cinn/hlir/pe/pe_elementwise_test.cc | 8 +- paddle/cinn/hlir/pe/pe_transform_test.cc | 17 +- paddle/cinn/hlir/pe/reduction.cc | 18 +- paddle/cinn/hlir/pe/reduction.h | 2 +- paddle/cinn/hlir/pe/schedule.cc | 104 +++---- paddle/cinn/hlir/pe/schedule.h | 68 ++--- paddle/cinn/hlir/pe/transform.cc | 132 ++++---- paddle/cinn/hlir/pe/transform.h | 16 +- paddle/cinn/ir/buffer.cc | 6 +- paddle/cinn/ir/dim.cc | 2 +- .../ir/group_schedule/base_group_scheduler.cc | 2 +- .../ir/group_schedule/base_group_scheduler.h | 6 +- .../group_schedule/dy_shape_group_scheduler.h | 2 +- .../st_shape_group_scheduler.cc | 16 +- .../group_schedule/st_shape_group_scheduler.h | 2 +- paddle/cinn/ir/ir.cc | 10 +- paddle/cinn/ir/ir.h | 4 +- paddle/cinn/ir/ir_analyzer/ir_analyzer.cc | 6 +- paddle/cinn/ir/ir_base.h | 16 +- paddle/cinn/ir/ir_printer.cc | 4 +- paddle/cinn/ir/lowered_func.cc | 14 +- paddle/cinn/ir/module.h | 2 +- paddle/cinn/ir/op/ir_operators.cc | 16 +- paddle/cinn/ir/operation.cc | 2 +- paddle/cinn/ir/schedule/factorize_reduction.h | 2 +- paddle/cinn/ir/schedule/impl/base.cc | 2 +- paddle/cinn/ir/schedule/impl/for_type.cc | 3 +- .../ir/schedule/impl/loop_transformation.cc | 8 +- paddle/cinn/ir/schedule/ir_schedule_util.cc | 33 +- paddle/cinn/ir/schedule/ir_schedule_util.h | 8 +- paddle/cinn/ir/schedule_block_graph.cc | 22 +- paddle/cinn/ir/schedule_block_graph.h | 13 +- paddle/cinn/ir/tensor.cc | 14 +- paddle/cinn/ir/tensor.h | 4 +- paddle/cinn/ir/test/ir_compare_test.cc | 2 +- .../cinn/ir/test/schedule_block_graph_test.cc | 9 +- paddle/cinn/ir/test/schedule_desc_test.cc | 2 +- .../ir/test/st_shape_group_scheduler_test.cc | 4 +- paddle/cinn/ir/test/tensor_test.cc | 8 +- paddle/cinn/ir/utils/ir_copy.cc | 4 +- paddle/cinn/lang/buffer.cc | 2 +- paddle/cinn/lang/builtin.cc | 16 +- paddle/cinn/lang/compute.cc | 4 +- paddle/cinn/lang/lower.cc | 6 +- paddle/cinn/lang/lower.h | 15 +- paddle/cinn/lang/lower_impl.cc | 34 +-- paddle/cinn/lang/lower_impl.h | 10 +- paddle/cinn/lang/lower_tensor_group.cc | 4 +- paddle/cinn/lang/lower_tensor_group.h | 2 +- paddle/cinn/lang/lower_test.cc | 2 +- paddle/cinn/lang/packed_func.h | 6 +- paddle/cinn/lang/packed_func_test.cc | 5 +- paddle/cinn/lang/placeholder.h | 4 +- paddle/cinn/optim/buffer_assign.cc | 10 +- paddle/cinn/optim/buffer_assign.h | 2 +- .../cinn/optim/call_arg_list_to_pod_value.cc | 2 +- paddle/cinn/optim/compute_inline_expand.cc | 6 +- paddle/cinn/optim/ir_simplify.cc | 53 ++-- paddle/cinn/optim/map_extern_call.cc | 4 +- .../cinn/optim/remove_schedule_block_test.cc | 2 +- .../replace_cross_thread_reduction_test.cc | 2 +- paddle/cinn/optim/replace_var_with_expr.cc | 2 +- paddle/cinn/optim/transform_gpu_forloop.cc | 24 +- paddle/cinn/optim/transform_polyfor_to_for.cc | 6 +- paddle/cinn/optim/unroll_loops_test.cc | 4 +- paddle/cinn/optim/var_mod_simplify.cc | 4 +- paddle/cinn/optim/vectorize_loops.cc | 84 +++--- paddle/cinn/optim/vectorize_loops_test.cc | 12 +- paddle/cinn/poly/domain.cc | 2 +- paddle/cinn/poly/graph.cc | 4 +- paddle/cinn/poly/graph.h | 8 +- paddle/cinn/poly/poly_scheduler.cc | 25 +- paddle/cinn/poly/schedule.cc | 2 +- paddle/cinn/poly/schedule.h | 10 +- paddle/cinn/poly/stage.cc | 8 +- paddle/cinn/poly/stage.h | 2 +- paddle/cinn/poly/stage_test.cc | 34 ++- paddle/cinn/pybind/CMakeLists.txt | 5 +- paddle/cinn/pybind/bind_utils.h | 6 +- paddle/cinn/pybind/common.cc | 64 ++-- paddle/cinn/pybind/framework.cc | 26 +- paddle/cinn/pybind/frontend.cc | 32 +- paddle/cinn/pybind/ir/ir.cc | 4 +- paddle/cinn/pybind/ir/ir_api.cc | 14 +- paddle/cinn/pybind/ir/ir_context.cc | 2 +- paddle/cinn/pybind/ir/ir_context.h | 8 +- paddle/cinn/pybind/lang.cc | 10 +- paddle/cinn/pybind/pe.cc | 4 +- paddle/cinn/pybind/runtime.cc | 6 +- paddle/cinn/runtime/cpu/cblas.cc | 14 +- .../cinn/runtime/cpu/host_intrinsics_test.cc | 57 ++-- paddle/cinn/runtime/cpu/mkl_math_test.cc | 67 +++-- paddle/cinn/runtime/cpu/mkldnn_math.cc | 26 +- paddle/cinn/runtime/cpu/mkldnn_math_test.cc | 22 +- paddle/cinn/runtime/cpu/thread_backend.cc | 2 +- paddle/cinn/runtime/cuda/cublas_util.h | 8 +- paddle/cinn/runtime/cuda/cuda_module_test.cc | 4 +- paddle/cinn/runtime/cuda/cuda_util.cc | 12 +- paddle/cinn/runtime/cuda/cuda_util.h | 13 +- paddle/cinn/runtime/custom_function.cc | 14 +- paddle/cinn/runtime/custom_function_test.cc | 52 ++-- paddle/cinn/runtime/flags.cc | 9 +- paddle/cinn/runtime/flags.h | 6 +- paddle/cinn/runtime/intrinsic.cc | 2 +- paddle/cinn/utils/data_util.cc | 32 +- paddle/cinn/utils/data_util.h | 6 +- paddle/common/CMakeLists.txt | 3 + paddle/common/array.h | 5 + paddle/common/ddim.cc | 51 ++++ paddle/common/ddim.h | 103 +++---- paddle/common/dim.h | 13 + paddle/common/enforce.h | 90 ++++-- paddle/{phi/core => common}/errors.cc | 8 +- paddle/common/errors.h | 6 + paddle/{phi => }/common/layout.h | 18 +- paddle/common/macros.h | 22 +- .../distributed/auto_parallel/CMakeLists.txt | 2 +- .../auto_parallel/spmd_rules/CMakeLists.txt | 2 +- .../auto_parallel/test/CMakeLists.txt | 8 +- .../distributed/collective/CMakeLists.txt | 23 +- .../distributed/collective/process_group.h | 2 +- .../collective/process_group_bkcl.cc | 2 +- .../collective/process_group_with_stream.h | 2 +- .../collective/process_group_without_stream.h | 2 +- paddle/fluid/distributed/common/afs_warpper.h | 2 +- .../distributed/fleet_executor/CMakeLists.txt | 7 +- .../fleet_executor/compute_interceptor.cc | 2 +- .../fleet_executor/cond_interceptor.cc | 2 +- .../distributed/fleet_executor/dist_model.cc | 4 +- .../fleet_executor/start_interceptor.cc | 2 +- .../test/compute_interceptor_run_op_test.cc | 2 +- .../distributed/ps/service/CMakeLists.txt | 7 +- .../distributed/ps/service/brpc_ps_client.cc | 2 +- .../distributed/ps/service/brpc_ps_client.h | 2 +- .../distributed/ps/service/brpc_utils.cc | 8 +- .../ps/service/communicator/communicator.cc | 2 +- paddle/fluid/distributed/ps/service/env.h | 2 +- paddle/fluid/distributed/ps/service/server.h | 2 +- .../fluid/distributed/ps/table/CMakeLists.txt | 6 +- .../distributed/ps/table/graph/graph_edge.h | 2 +- paddle/fluid/distributed/ps/table/table.h | 2 +- paddle/fluid/distributed/rpc/CMakeLists.txt | 2 +- paddle/fluid/distributed/test/CMakeLists.txt | 1 + .../fluid/distributed/test/brpc_utils_test.cc | 12 +- paddle/fluid/eager/CMakeLists.txt | 10 +- .../fluid/eager/accumulation/CMakeLists.txt | 2 +- .../eager_generated/backwards/scale_node.cc | 2 +- paddle/fluid/eager/api/utils/CMakeLists.txt | 4 +- .../eager/auto_code_generator/CMakeLists.txt | 6 + .../eager/custom_operator/CMakeLists.txt | 4 +- .../custom_operator/custom_operator_utils.cc | 9 +- paddle/fluid/eager/eager_layout_transformer.h | 8 +- paddle/fluid/eager/eager_tensor.h | 2 +- paddle/fluid/eager/pylayer/CMakeLists.txt | 2 +- paddle/fluid/eager/utils.cc | 18 +- paddle/fluid/framework/CMakeLists.txt | 50 +-- paddle/fluid/framework/convert_utils.h | 2 +- paddle/fluid/framework/custom_operator.cc | 6 +- paddle/fluid/framework/data_feed.cc | 14 +- paddle/fluid/framework/data_feed.h | 2 +- paddle/fluid/framework/data_layout.h | 2 +- .../fluid/framework/data_layout_transform.cc | 2 +- paddle/fluid/framework/data_set.h | 2 +- paddle/fluid/framework/details/CMakeLists.txt | 74 +++-- .../details/broadcast_op_handle_test.h | 8 +- .../details/fetch_async_op_handle.cc | 6 +- .../details/gather_op_handle_test.cc | 4 +- paddle/fluid/framework/details/op_registry.h | 2 +- .../details/reduce_op_handle_test.cc | 8 +- .../details/scale_loss_grad_op_handle.cc | 2 +- paddle/fluid/framework/device_worker.h | 2 +- paddle/fluid/framework/dlpack_tensor.h | 2 +- paddle/fluid/framework/eigen.h | 8 +- paddle/fluid/framework/fleet/heter_wrapper.cc | 6 +- .../fluid/framework/heter_section_worker.cc | 2 +- paddle/fluid/framework/infershape_utils.cc | 10 +- paddle/fluid/framework/inplace_op_inference.h | 2 +- paddle/fluid/framework/ir/CMakeLists.txt | 7 +- .../framework/ir/attention_lstm_fuse_pass.cc | 8 +- .../framework/ir/auto_mixed_precision_pass.cc | 2 +- .../ir/conv2d_fusion_layout_transfer_pass.cc | 2 +- ..._trans_filter_dilations_nxn_to_1x1_pass.cc | 2 +- .../fluid/framework/ir/conv_bn_fuse_pass.cc | 7 +- .../ir/fc_elementwise_layernorm_fuse_pass.cc | 4 +- .../fused_multi_transformer_encoder_pass.cc | 8 +- .../framework/ir/fusion_group/CMakeLists.txt | 4 +- .../ir/fusion_group/code_generator_tester.cc | 4 +- .../framework/ir/ipu/delete_scale_op_pass.cc | 2 +- .../framework/ir/ipu/infer_shape_pass.cc | 6 +- .../framework/ir/layer_norm_fuse_pass.cc | 6 +- .../ir/memory_optimize_pass/CMakeLists.txt | 5 +- .../compute_propagate_scales_mkldnn_pass.cc | 2 +- ...ute_propagate_scales_mkldnn_pass_tester.cc | 8 +- .../conv_affine_channel_mkldnn_fuse_pass.cc | 4 +- .../framework/ir/mkldnn/cpu_quantize_pass.cc | 2 +- .../ir/mkldnn/multi_gru_fuse_pass.cc | 2 +- .../ir/mkldnn/multi_gru_seq_fuse_pass.cc | 2 +- .../params_quantization_mkldnn_pass_tester.cc | 2 +- .../ir/mkldnn/quant_dequant_mkldnn_pass.cc | 10 +- .../multi_devices_graph_pass.cc | 4 +- .../ir/multihead_matmul_fuse_pass.cc | 8 +- .../ir/multihead_matmul_roformer_fuse_pass.cc | 4 +- paddle/fluid/framework/ir/pass.h | 2 +- paddle/fluid/framework/ir/pass_test_util.cc | 3 +- paddle/fluid/framework/ir/pass_test_util.h | 2 +- .../ir/split_layernorm_to_math_ops_pass.cc | 8 +- .../trt_cross_multihead_matmul_fuse_pass.cc | 2 +- ...rt_delete_weight_dequant_linear_op_pass.cc | 2 +- .../trt_flash_multihead_matmul_fuse_pass.cc | 2 +- .../ir/trt_multihead_matmul_fuse_pass.cc | 8 +- .../ir/trt_qk_multihead_matmul_fuse_pass.cc | 4 +- .../ir/trt_remove_amp_strategy_op_pass.cc | 2 +- .../framework/ir/trt_support_nhwc_pass.cc | 4 +- .../framework/ir/vit_attention_fuse_pass.cc | 4 +- .../framework/ir/xpu/conv1d_xpu_fuse_pass.cc | 2 +- .../framework/ir/xpu/conv2d_bias_fuse_pass.cc | 2 +- .../framework/ir/xpu/conv2d_xpu_fuse_pass.cc | 3 +- .../framework/ir/xpu/fc_xpu_fuse_pass.cc | 3 +- .../xpu/fused_multi_transformer_xpu_pass.cc | 2 +- .../ir/xpu/multi_encoder_xpu_fuse_pass.cc | 6 +- paddle/fluid/framework/ir/xpu/pass_utils.cc | 12 +- paddle/fluid/framework/lod_tensor.cc | 4 +- paddle/fluid/framework/lod_tensor.h | 2 +- .../framework/new_executor/CMakeLists.txt | 2 +- .../new_executor/feed_fetch_utils.cc | 4 +- .../new_executor/instruction/CMakeLists.txt | 4 +- .../instruction/cinn_jit_instruction.cc | 2 +- .../new_executor/interpreter/CMakeLists.txt | 1 + .../framework/new_executor/interpreter/job.h | 4 +- .../framework/new_executor/interpreter/plan.h | 2 +- .../new_executor/workqueue/CMakeLists.txt | 4 +- .../new_executor/workqueue/event_count.h | 2 +- .../framework/no_need_buffer_vars_inference.h | 2 +- paddle/fluid/framework/op_desc.cc | 9 +- paddle/fluid/framework/op_desc.h | 2 +- paddle/fluid/framework/op_registry.h | 6 +- paddle/fluid/framework/op_version_registry.h | 2 +- paddle/fluid/framework/operator.cc | 2 +- paddle/fluid/framework/operator.h | 2 +- .../framework/paddle2cinn/CMakeLists.txt | 3 +- .../framework/paddle2cinn/cinn_cache_key.cc | 2 +- .../framework/paddle2cinn/cinn_cache_key.h | 2 +- paddle/fluid/framework/reader.h | 2 +- paddle/fluid/framework/shape_inference.h | 2 +- paddle/fluid/framework/tensor_util.cc | 20 +- paddle/fluid/imperative/CMakeLists.txt | 34 ++- paddle/fluid/imperative/all_reduce.cc | 2 +- paddle/fluid/imperative/gloo_context.cc | 2 +- paddle/fluid/imperative/infer_shape_context.h | 2 +- .../imperative/jit/program_desc_tracer.cc | 2 +- paddle/fluid/imperative/layout_autotune.cc | 4 +- paddle/fluid/imperative/layout_autotune.h | 2 +- paddle/fluid/imperative/layout_transformer.h | 16 +- paddle/fluid/imperative/reducer.cc | 2 +- paddle/fluid/imperative/variable_wrapper.h | 2 +- paddle/fluid/inference/CMakeLists.txt | 14 +- .../inference/analysis/ir_pass_manager.cc | 2 +- paddle/fluid/inference/api/CMakeLists.txt | 4 +- .../fluid/inference/api/analysis_predictor.cc | 8 +- paddle/fluid/inference/api/api_impl.cc | 4 +- paddle/fluid/inference/api/api_impl.h | 2 +- .../inference/api/details/CMakeLists.txt | 8 +- .../inference/api/details/zero_copy_tensor.cc | 13 +- .../fluid/inference/api/mkldnn_quantizer.cc | 2 +- .../inference/api/onnxruntime_predictor.cc | 2 +- .../fluid/inference/api/resource_manager.cc | 2 +- .../fluid/inference/capi_exp/CMakeLists.txt | 1 + paddle/fluid/inference/lite/tensor_utils.cc | 8 +- .../inference/tensorrt/convert/CMakeLists.txt | 5 +- .../tensorrt/convert/bilinear_interp_v2_op.cc | 2 +- .../inference/tensorrt/convert/dropout_op.cc | 2 +- .../tensorrt/convert/elementwise_op.cc | 2 +- .../tensorrt/convert/emb_eltwise_layernorm.cc | 14 +- .../tensorrt/convert/fill_constant_op.cc | 2 +- .../generic_and_custom_plugin_creater.cc | 2 +- .../tensorrt/convert/leaky_relu_op.cc | 2 +- .../tensorrt/convert/nearest_interp_op.cc | 2 +- .../tensorrt/convert/nearest_interp_v2_op.cc | 2 +- .../convert/preln_emb_eltwise_layernorm.cc | 8 +- .../tensorrt/convert/preln_residual_bias.cc | 6 +- .../tensorrt/convert/preln_skip_layernorm.cc | 4 +- .../prompt_tuning_emb_eltwise_layernorm.cc | 10 +- .../tensorrt/convert/test_op_converter.cc | 2 +- .../tensorrt/convert/test_split_op.cc | 6 +- .../inference/tensorrt/convert/ut_helper.h | 4 +- .../dynamic_shape_infermeta_factory.h | 2 +- paddle/fluid/inference/tensorrt/op_teller.cc | 8 +- .../inference/tensorrt/plugin/CMakeLists.txt | 6 +- .../elementwiseadd_transpose_op_plugin.cu | 26 +- .../tensorrt/plugin/generic_plugin.cu | 4 +- .../tensorrt/plugin/group_norm_op_plugin.cu | 6 +- .../plugin/instance_norm_op_plugin.cu | 8 +- .../tensorrt/plugin/layer_norm_op_plugin.cu | 16 +- .../plugin/preln_groupnorm_act_op_plugin.cu | 2 +- .../plugin/skip_groupnorm_act_op_plugin.cu | 2 +- .../plugin/trans_layernorm_op_plugin.cu | 24 +- .../inference/tensorrt/test_dynamic_engine.cc | 10 +- .../fluid/inference/tensorrt/test_engine.cc | 2 +- .../inference/tensorrt/trt_int8_calibrator.cc | 2 +- paddle/fluid/inference/utils/CMakeLists.txt | 4 +- .../translator/attribute_translator.cc | 4 +- .../ir_adaptor/translator/op_translator.cc | 31 +- .../translator/program_translator.cc | 2 +- .../ir_adaptor/translator/type_translator.cc | 6 +- paddle/fluid/ir_adaptor/translator/utils.cc | 2 +- paddle/fluid/jit/layer.cc | 2 +- paddle/fluid/jit/property.cc | 2 +- paddle/fluid/memory/CMakeLists.txt | 4 +- paddle/fluid/memory/allocation/CMakeLists.txt | 4 +- .../memory/allocation/allocator_facade.cc | 2 +- paddle/fluid/memory/stats.cc | 2 +- paddle/fluid/operators/CMakeLists.txt | 12 +- paddle/fluid/operators/affine_channel_op.cc | 8 +- paddle/fluid/operators/affine_channel_op.cu | 4 +- .../fluid/operators/affine_channel_op_xpu.cc | 4 +- .../fluid/operators/array_to_lod_tensor_op.cc | 13 +- paddle/fluid/operators/assign_value_op.h | 2 +- paddle/fluid/operators/attention_lstm_op.cc | 2 +- paddle/fluid/operators/batch_norm_op.cc | 15 +- paddle/fluid/operators/bilateral_slice_op.cc | 2 +- paddle/fluid/operators/bpr_loss_op.cc | 14 +- paddle/fluid/operators/cinn/CMakeLists.txt | 1 + .../operators/cinn/cinn_launch_context.cc | 6 +- .../operators/cinn/cinn_launch_context.h | 2 +- .../fluid/operators/class_center_sample_op.cu | 2 +- .../fluid/operators/collective/CMakeLists.txt | 5 +- .../fluid/operators/collective/barrier_op.h | 2 +- .../operators/collective/c_allgather_op.cc | 2 +- .../operators/collective/c_allgather_op.h | 2 +- .../operators/collective/c_broadcast_op.cu.cc | 2 +- .../operators/collective/c_embedding_op.cc | 4 +- .../fluid/operators/collective/c_reduce_op.h | 2 +- .../operators/collective/c_reducescatter_op.h | 2 +- .../operators/collective/global_gather_op.cc | 2 +- .../collective/global_gather_op.cu.cc | 4 +- .../operators/collective/global_scatter_op.cc | 2 +- .../collective/global_scatter_op.cu.cc | 4 +- .../collective/partial_allgather_op.h | 2 +- .../operators/collective/partial_recv_op.cc | 6 +- .../fluid/operators/collective/recv_v2_op.cc | 2 +- .../operators/collective/recv_v2_op.cu.cc | 4 +- .../operators/collective/send_v2_op.cu.cc | 8 +- .../operators/common_infer_shape_functions.cc | 2 +- .../controlflow/tensor_array_read_write_op.cc | 2 +- .../fluid/operators/controlflow/while_op.cc | 6 +- paddle/fluid/operators/correlation_op.cc | 2 +- paddle/fluid/operators/crop_op.cc | 10 +- paddle/fluid/operators/crop_op.h | 2 +- paddle/fluid/operators/cross_entropy_op.cc | 20 +- paddle/fluid/operators/cross_entropy_op.h | 8 +- paddle/fluid/operators/ctc_align_op.h | 2 +- .../custom_device_common_op_registry.cc | 10 +- paddle/fluid/operators/data_norm_op.cc | 14 +- .../fluid/operators/dequantize_abs_max_op.h | 2 +- paddle/fluid/operators/dequantize_log_op.h | 2 +- .../fluid/operators/detection/CMakeLists.txt | 5 +- .../detection/anchor_generator_op.cc | 4 +- .../operators/detection/anchor_generator_op.h | 2 +- .../detection/box_decoder_and_assign_op.cc | 10 +- .../detection/density_prior_box_op.cc | 4 +- .../detection/density_prior_box_op.h | 2 +- .../operators/detection/iou_similarity_op.cc | 2 +- .../detection/mine_hard_examples_op.cc | 2 +- .../operators/detection/multiclass_nms_op.cc | 2 +- .../detection/polygon_box_transform_op.cc | 2 +- .../fluid/operators/detection/prior_box_op.h | 2 +- paddle/fluid/operators/detection_map_op.cc | 2 +- paddle/fluid/operators/detection_map_op.h | 6 +- .../fluid/operators/dlnne/dlnne_engine_op.h | 10 +- .../elementwise/elementwise_functor.h | 2 +- .../operators/elementwise/elementwise_op.h | 10 +- .../elementwise/elementwise_op_function.h | 4 +- paddle/fluid/operators/expand_op.cc | 2 +- paddle/fluid/operators/fake_dequantize_op.cc | 4 +- paddle/fluid/operators/fake_dequantize_op.h | 2 +- paddle/fluid/operators/fake_quantize_op.cu.h | 3 +- paddle/fluid/operators/fake_quantize_op.h | 4 +- paddle/fluid/operators/fill_constant_op.cc | 6 +- paddle/fluid/operators/fill_op.cc | 2 +- paddle/fluid/operators/flatten_op.cc | 6 +- paddle/fluid/operators/flatten_op.h | 4 +- .../fused/cudnn_bn_stats_finalize.cu.h | 2 +- .../operators/fused/cudnn_norm_conv.cu.h | 8 +- .../fused/cudnn_scale_bias_add_relu.cu.h | 6 +- .../operators/fused/fused_attention_utils.h | 2 +- .../operators/fused/fused_bn_activation_op.cc | 2 +- .../fused/fused_bn_add_activation_op.cc | 2 +- .../fused/fused_embedding_fc_lstm_op.cc | 18 +- .../fused/fused_embedding_seq_pool_op.cc | 2 +- .../operators/fused/fused_feedforward_op.cc | 4 +- .../operators/fused/fused_feedforward_op.cu | 2 +- .../operators/fused/fused_gate_attention.h | 16 +- .../fused/fused_gate_attention_op.cu | 12 +- .../operators/fused/fused_gemm_epilogue_op.cc | 12 +- .../operators/fused/fused_gemm_epilogue_op.cu | 11 +- .../fused/fused_gemm_epilogue_op_xpu.cc | 4 +- .../fluid/operators/fused/fused_matmul_op.cc | 8 +- .../operators/fused/fused_seqpool_cvm_op.cc | 4 +- .../fused/fusion_conv_inception_op.cu | 12 +- .../fused/mkldnn/fusion_lstm_mkldnn_op.cc | 6 +- .../fused/mkldnn/multi_gru_mkldnn_op.cc | 2 +- paddle/fluid/operators/fused/multi_gru_op.cc | 2 +- .../operators/fused/resnet_basic_block_op.cc | 8 +- .../fused/resnet_basic_block_op_xpu.cc | 36 +-- .../fluid/operators/fused/resnet_unit_op.cc | 8 +- .../fluid/operators/fused/resnet_unit_op.cu | 28 +- .../operators/fused/resnet_unit_op_xpu.cc | 18 +- .../fluid/operators/generator/CMakeLists.txt | 2 +- .../generator/get_expected_kernel_func.cc | 6 +- .../operators/grid_sampler_cudnn_op.cu.cc | 12 +- paddle/fluid/operators/hash_op.cc | 2 +- paddle/fluid/operators/hash_op.h | 2 +- paddle/fluid/operators/index_select_op.h | 4 +- paddle/fluid/operators/interpolate_op.cc | 8 +- paddle/fluid/operators/interpolate_op.cu | 12 +- paddle/fluid/operators/interpolate_op.h | 14 +- paddle/fluid/operators/is_empty_op.h | 2 +- paddle/fluid/operators/l1_norm_op.cc | 2 +- paddle/fluid/operators/layout_utils.h | 24 +- paddle/fluid/operators/linear_chain_crf_op.h | 2 +- paddle/fluid/operators/lite/ut_helper.h | 2 +- paddle/fluid/operators/lod_reset_op.h | 4 +- .../operators/lookup_table_dequant_op.cc | 4 +- paddle/fluid/operators/lookup_table_op.cc | 4 +- paddle/fluid/operators/lookup_table_op.cu | 2 +- paddle/fluid/operators/lookup_table_op.h | 2 +- paddle/fluid/operators/lookup_table_v2_op.cu | 2 +- paddle/fluid/operators/lookup_table_v2_op.h | 2 +- paddle/fluid/operators/lrn_op.cc | 10 +- paddle/fluid/operators/lrn_op.h | 4 +- .../fluid/operators/match_matrix_tensor_op.cc | 4 +- paddle/fluid/operators/math/CMakeLists.txt | 12 +- paddle/fluid/operators/math/beam_search.cc | 2 +- paddle/fluid/operators/math/beam_search.cu | 4 +- .../fluid/operators/math/beam_search_xpu.cc | 2 +- paddle/fluid/operators/math/context_project.h | 8 +- .../operators/math/eigen_values_vectors.h | 8 +- paddle/fluid/operators/math/sample_prob.cu | 2 +- paddle/fluid/operators/math/sample_prob.h | 2 +- paddle/fluid/operators/matmul_op.cc | 10 +- paddle/fluid/operators/merge_lod_tensor_op.cc | 8 +- .../operators/metrics/precision_recall_op.cc | 4 +- paddle/fluid/operators/minus_op.cc | 2 +- .../operators/mkldnn/interpolate_mkldnn_op.cc | 8 +- .../operators/mkldnn/layer_norm_mkldnn_op.cc | 2 +- .../operators/mkldnn/matmul_mkldnn_op.cc | 4 +- .../operators/mkldnn/quantize_mkldnn_op.cc | 2 +- .../operators/mkldnn/requantize_mkldnn_op.cc | 2 +- .../operators/mkldnn/reshape_mkldnn_op.cc | 35 +-- .../operators/mkldnn/transpose_mkldnn_op.cc | 4 +- .../fluid/operators/modified_huber_loss_op.cc | 2 +- .../fluid/operators/modified_huber_loss_op.cu | 2 +- .../fluid/operators/modified_huber_loss_op.h | 2 +- paddle/fluid/operators/nccl/nccl_op.cu.cc | 4 +- paddle/fluid/operators/nce_op.cc | 6 +- paddle/fluid/operators/nce_op.h | 4 +- .../optimizers/decayed_adagrad_op.cc | 4 +- paddle/fluid/operators/optimizers/dpsgd_op.cc | 4 +- paddle/fluid/operators/optimizers/ftrl_op.cc | 6 +- .../pow2_decay_with_linear_warmup_op.cc | 2 +- .../operators/optimizers/proximal_gd_op.cc | 2 +- .../operators/optimizers/sparse_momentum_op.h | 2 +- paddle/fluid/operators/pad2d_op.cc | 4 +- paddle/fluid/operators/partial_sum_op.cc | 2 +- .../operators/positive_negative_pair_op.cc | 2 +- paddle/fluid/operators/pscore/CMakeLists.txt | 2 + .../pscore/distributed_lookup_table_op.cc | 8 +- .../pscore/distributed_lookup_table_op.h | 8 +- paddle/fluid/operators/pscore/fake_init_op.cc | 6 +- .../operators/pull_box_extended_sparse_op.cc | 9 +- paddle/fluid/operators/pull_box_sparse_op.cc | 5 +- .../fluid/operators/pull_gpups_sparse_op.cc | 5 +- paddle/fluid/operators/pull_sparse_op.cc | 5 +- paddle/fluid/operators/pull_sparse_v2_op.cc | 4 +- paddle/fluid/operators/pyramid_hash_op.cc | 8 +- paddle/fluid/operators/quantize_linear_op.h | 4 +- paddle/fluid/operators/randperm_op.h | 2 +- paddle/fluid/operators/range_op.h | 2 +- paddle/fluid/operators/rank_attention.cu.h | 2 +- paddle/fluid/operators/read_file_op.cc | 4 +- .../operators/reader/create_py_reader_op.cc | 4 +- .../reader/lod_tensor_blocking_queue.h | 2 +- .../operators/reader/reader_op_registry.cc | 2 +- paddle/fluid/operators/recurrent_op.cc | 14 +- paddle/fluid/operators/reduce_ops/reduce_op.h | 10 +- .../operators/reduce_ops/reduce_op_function.h | 8 +- .../fluid/operators/repeat_interleave_op.cc | 4 +- paddle/fluid/operators/reshape_op.cc | 24 +- paddle/fluid/operators/sampling_id_op.cc | 2 +- paddle/fluid/operators/sampling_id_op.h | 2 +- .../operators/sequence_ops/CMakeLists.txt | 2 +- .../sequence_ops/sequence_concat_op.cc | 10 +- .../sequence_ops/sequence_enumerate_op.h | 2 +- .../sequence_ops/sequence_expand_as_op.cu | 4 +- .../sequence_ops/sequence_expand_as_op.h | 4 +- .../sequence_ops/sequence_expand_op.cu | 2 +- .../sequence_ops/sequence_expand_op.h | 2 +- .../operators/sequence_ops/sequence_pad_op.cc | 12 +- .../sequence_ops/sequence_reshape_op.cc | 2 +- .../sequence_ops/sequence_slice_op.h | 8 +- .../sequence_softmax_cudnn_op.cu.cc | 6 +- .../sequence_ops/sequence_softmax_op.cc | 6 +- .../sequence_ops/sequence_unpad_op.cc | 2 +- .../sequence_ops/sequence_unpad_op.h | 2 +- paddle/fluid/operators/shuffle_batch_op.cc | 2 +- paddle/fluid/operators/slice_op.cc | 2 +- paddle/fluid/operators/sparse_attention_op.cu | 2 +- paddle/fluid/operators/split_op.h | 6 +- paddle/fluid/operators/spp_op.cc | 2 +- paddle/fluid/operators/spp_op.h | 17 +- paddle/fluid/operators/squeeze_op.h | 4 +- paddle/fluid/operators/stft_op.cc | 2 +- .../operators/string/faster_tokenizer_op.h | 9 +- paddle/fluid/operators/svd_helper.h | 50 +-- paddle/fluid/operators/sync_batch_norm_op.cu | 2 +- .../fluid/operators/sync_batch_norm_utils.h | 4 +- paddle/fluid/operators/tdm_child_op.cc | 6 +- paddle/fluid/operators/tdm_sampler_op.cc | 11 +- paddle/fluid/operators/tdm_sampler_op.h | 2 +- paddle/fluid/operators/temporal_shift_op.cu | 12 +- paddle/fluid/operators/temporal_shift_op.h | 6 +- .../operators/tensor_array_to_tensor_op.cc | 10 +- .../operators/tensorrt/tensorrt_engine_op.h | 10 +- .../tensorrt/tensorrt_engine_op_test.cc | 2 +- paddle/fluid/operators/top_k_op.cu | 4 +- paddle/fluid/operators/top_k_op.h | 4 +- paddle/fluid/operators/top_k_op_xpu.cc | 2 +- paddle/fluid/operators/transfer_layout_op.h | 6 +- paddle/fluid/operators/transpose_op.cc | 10 +- paddle/fluid/operators/unbind_op.h | 2 +- .../uniform_random_batch_size_like_op.cc | 4 +- .../uniform_random_batch_size_like_op.cu | 4 +- paddle/fluid/operators/uniform_random_op.h | 2 +- paddle/fluid/operators/unique_op.h | 22 +- paddle/fluid/operators/utils.h | 8 +- paddle/fluid/operators/var_conv_2d_op.cc | 8 +- paddle/fluid/pir/dialect/CMakeLists.txt | 2 +- .../pir/dialect/kernel/ir/kernel_dialect.cc | 6 +- .../pir/dialect/op_generator/op_build_gen.py | 4 +- .../pir/dialect/operator/ir/api_builder.cc | 2 +- .../pir/dialect/operator/ir/api_builder.h | 2 +- .../dialect/operator/ir/attribute_storage.h | 2 +- .../pir/dialect/operator/ir/ir_meta_tensor.h | 1 + .../dialect/operator/ir/ir_selected_rows.cc | 5 +- .../pir/dialect/operator/ir/ir_tensor.cc | 4 +- .../pir/dialect/operator/ir/manual_op.cc | 2 +- .../pir/dialect/operator/ir/op_dialect.cc | 6 +- .../operator/utils/op_yaml_info_parser.cc | 1 + .../fluid/pir/dialect/operator/utils/utils.h | 1 + .../fusion/conv2d_add_act_fuse_pass.cc | 2 +- .../transforms/fusion/conv2d_add_fuse_pass.cc | 2 +- .../transforms/fusion/conv2d_bn_fuse_pass.cc | 4 +- .../params_sync_among_devices_pass.cc | 2 +- .../transforms/transform_general_functions.h | 4 +- paddle/fluid/platform/CMakeLists.txt | 55 ++-- paddle/fluid/platform/bfloat16_test.cu | 2 +- .../platform/cuda_graph_with_memory_pool.h | 2 +- .../platform/device/custom/CMakeLists.txt | 4 +- .../device/custom/custom_device_test.cc | 12 +- .../fluid/platform/device/gpu/CMakeLists.txt | 10 +- .../platform/device/gpu/cuda/CMakeLists.txt | 2 +- .../fluid/platform/device/ipu/CMakeLists.txt | 2 +- .../fluid/platform/device/ipu/ipu_compiler.cc | 2 +- .../fluid/platform/device/ipu/ipu_executor.cc | 4 +- .../fluid/platform/device/xpu/CMakeLists.txt | 2 + paddle/fluid/platform/device_code_test.cc | 4 +- paddle/fluid/platform/dynload/CMakeLists.txt | 12 +- paddle/fluid/platform/enforce.h | 4 - paddle/fluid/platform/errors.h | 6 +- paddle/fluid/platform/float16_test.cu | 2 +- paddle/fluid/platform/macros.h | 2 +- paddle/fluid/platform/monitor.h | 2 +- paddle/fluid/platform/profiler/CMakeLists.txt | 12 +- .../profiler/custom_device/CMakeLists.txt | 2 +- .../tensor_operants_gen.py | 4 +- .../composite_backward_api.h | 122 ++++---- .../composite_double_backward_api.h | 49 +-- .../api/manual_prim/utils/static_utils.cc | 2 +- .../fluid/prim/api/manual_prim/utils/utils.h | 6 +- paddle/fluid/prim/utils/static/desc_tensor.h | 6 +- paddle/fluid/primitive/backend/CMakeLists.txt | 2 +- paddle/fluid/primitive/composite/composite.h | 51 ++-- paddle/fluid/primitive/rule/vjp/details.h | 88 +++--- paddle/fluid/primitive/type/lazy_tensor.h | 2 +- paddle/fluid/primitive/utils/CMakeLists.txt | 4 +- paddle/fluid/primitive/utils/utils.h | 4 +- paddle/fluid/pybind/CMakeLists.txt | 15 +- paddle/fluid/pybind/auto_parallel_py.cc | 4 +- paddle/fluid/pybind/eager.cc | 10 +- paddle/fluid/pybind/eager_functions.cc | 6 +- paddle/fluid/pybind/eager_math_op_patch.cc | 2 +- paddle/fluid/pybind/eager_method.cc | 4 +- paddle/fluid/pybind/eager_properties.cc | 8 +- paddle/fluid/pybind/eager_utils.cc | 5 +- paddle/fluid/pybind/eval_frame_tools.cc | 2 +- paddle/fluid/pybind/exception.cc | 4 +- .../pybind/global_value_getter_setter.cc | 2 +- paddle/fluid/pybind/pir.cc | 2 +- paddle/fluid/pybind/pir.h | 2 +- paddle/fluid/pybind/process_group_utils.h | 10 +- paddle/fluid/pybind/pybind.cc | 6 +- paddle/fluid/pybind/reader_py.cc | 12 +- paddle/fluid/pybind/tensor.cc | 42 +-- paddle/fluid/pybind/tensor_py.h | 14 +- paddle/fluid/sub_graph/sub_graph_checker.cc | 2 +- paddle/phi/CMakeLists.txt | 3 +- paddle/phi/api/all.h | 4 +- paddle/phi/api/ext/exception.h | 99 ------ paddle/phi/api/ext/op_meta_info.h | 2 +- paddle/phi/api/include/context_pool.h | 2 +- paddle/phi/api/include/tensor.h | 16 +- paddle/phi/api/lib/api_gen_utils.cc | 46 +-- paddle/phi/api/lib/backend_set.h | 2 +- paddle/phi/api/lib/data_type_set.h | 2 +- paddle/phi/api/lib/kernel_dispatch.cc | 9 +- paddle/phi/api/lib/kernel_dispatch.h | 11 +- paddle/phi/api/lib/tensor.cc | 16 +- paddle/phi/api/lib/tensor_utils.cc | 3 +- paddle/phi/api/profiler/common_event.h | 2 +- paddle/phi/api/profiler/host_event_recorder.h | 2 +- paddle/phi/api/profiler/supplement_tracing.h | 2 +- .../api/yaml/generator/tensor_operants_gen.py | 6 +- paddle/phi/backends/CMakeLists.txt | 2 +- paddle/phi/backends/c_comm_lib.h | 4 +- paddle/phi/backends/context_pool.h | 2 +- paddle/phi/backends/device_memory_aligment.h | 2 +- paddle/phi/backends/dynload/CMakeLists.txt | 4 +- paddle/phi/backends/event.h | 2 +- paddle/phi/backends/gpu/cuda/cuda_graph.h | 4 +- paddle/phi/backends/gpu/cuda/cudnn_desc.h | 6 +- paddle/phi/backends/gpu/cuda/cudnn_helper.h | 7 +- paddle/phi/backends/gpu/gpu_context.cc | 2 +- paddle/phi/backends/gpu/rocm/miopen_desc.h | 6 +- paddle/phi/backends/gpu/rocm/miopen_helper.h | 4 +- paddle/phi/backends/onednn/matmul_utils.cc | 4 +- paddle/phi/backends/onednn/matmul_utils.h | 4 +- paddle/phi/backends/onednn/onednn_context.h | 2 +- paddle/phi/backends/onednn/onednn_helper.h | 12 +- paddle/phi/backends/onednn/onednn_reuse.h | 31 +- paddle/phi/backends/stream.h | 2 +- paddle/phi/backends/xpu/xpu_context.cc | 2 +- paddle/phi/capi/include/type_utils.h | 2 +- paddle/phi/capi/include/wrapper_base.h | 2 +- paddle/phi/capi/lib/c_meta_tensor.cc | 2 +- paddle/phi/capi/lib/c_tensor.cc | 2 +- paddle/phi/common/backend.h | 2 +- paddle/phi/common/cpstring_impl.h | 2 +- paddle/phi/common/data_type.h | 2 +- paddle/phi/common/int_array.cc | 2 +- paddle/phi/common/int_array.h | 8 +- paddle/phi/common/memory_utils.h | 2 +- paddle/phi/common/place.cc | 2 +- paddle/phi/common/place.h | 2 +- paddle/phi/common/scalar.h | 2 +- paddle/phi/common/tensor_ref.h | 2 +- paddle/phi/core/CMakeLists.txt | 2 - paddle/phi/core/attribute.h | 2 +- paddle/phi/core/compat/convert_utils.h | 2 +- paddle/phi/core/compat/op_utils.h | 2 +- paddle/phi/core/custom_kernel.h | 2 +- paddle/phi/core/ddim.cc | 230 -------------- paddle/phi/core/ddim.h | 284 ------------------ paddle/phi/core/dense_tensor.h | 1 + .../distributed/auto_parallel/dist_tensor.cc | 8 +- .../auto_parallel/inferspmd_utils.h | 2 +- .../auto_parallel/placement_types.h | 2 +- .../reshard/nd_mesh_reshard_function.cc | 16 +- .../reshard/p_to_s_reshard_function.cc | 2 +- .../auto_parallel/reshard/reshard_function.cc | 4 +- .../reshard/s_to_s_reshard_function.cc | 4 +- .../phi/core/distributed/bkcl_comm_context.cc | 2 +- .../distributed/check/nccl_dynamic_check.cc | 2 +- .../core/distributed/check/static_check.cc | 2 +- paddle/phi/core/distributed/comm_context.h | 2 +- .../core/distributed/comm_context_manager.h | 2 +- paddle/phi/core/distributed/comm_task.h | 2 +- .../phi/core/distributed/comm_task_manager.h | 2 +- .../phi/core/distributed/gloo_comm_context.h | 2 +- paddle/phi/core/distributed/gloo_utils.cc | 2 +- .../phi/core/distributed/nccl_comm_context.cc | 6 +- .../phi/core/distributed/nccl_comm_context.h | 2 +- paddle/phi/core/distributed/nccl_comm_task.h | 2 +- paddle/phi/core/distributed/nccl_tools.cc | 2 +- .../phi/core/distributed/store/CMakeLists.txt | 2 +- .../phi/core/distributed/xccl_comm_context.cc | 6 +- .../phi/core/distributed/xccl_comm_context.h | 2 +- paddle/phi/core/enforce.h | 78 +---- paddle/phi/core/errors.h | 147 --------- paddle/phi/core/extended_tensor.cc | 1 + paddle/phi/core/flags.h | 2 +- paddle/phi/core/infermeta_utils.h | 2 +- paddle/phi/core/kernel_factory.cc | 2 +- paddle/phi/core/kernel_factory.h | 2 +- paddle/phi/core/macros.h | 67 ----- paddle/phi/core/meta_tensor.h | 6 +- paddle/phi/core/mixed_vector.h | 2 +- paddle/phi/core/scope_guard.h | 2 +- paddle/phi/core/selected_rows_impl.h | 6 +- paddle/phi/core/sparse_coo_tensor.cc | 7 +- paddle/phi/core/sparse_csr_tensor.cc | 4 +- paddle/phi/core/storage_properties.h | 2 +- paddle/phi/core/tensor_array.cc | 1 + paddle/phi/core/tensor_base.h | 4 +- paddle/phi/core/tensor_meta.cc | 2 +- paddle/phi/core/tensor_meta.h | 5 +- paddle/phi/core/tensor_utils.cc | 2 +- paddle/phi/core/threadpool.h | 2 +- paddle/phi/core/utils/array.h | 142 --------- paddle/phi/core/utils/dim.h | 111 ------- paddle/phi/core/utils/unroll_array_ops.h | 129 -------- paddle/phi/core/visit_type.h | 2 +- paddle/phi/infermeta/backward.cc | 26 +- paddle/phi/infermeta/binary.cc | 197 ++++++------ paddle/phi/infermeta/fusion.cc | 96 +++--- paddle/phi/infermeta/multiary.cc | 219 +++++++------- paddle/phi/infermeta/nullary.cc | 28 +- paddle/phi/infermeta/nullary.h | 1 + paddle/phi/infermeta/sparse/binary.cc | 4 +- paddle/phi/infermeta/spmd_rules/concat.cc | 6 +- .../spmd_rules/default_data_parallel.cc | 8 +- .../phi/infermeta/spmd_rules/elementwise.cc | 16 +- paddle/phi/infermeta/spmd_rules/embedding.cc | 8 +- .../infermeta/spmd_rules/flash_attention.cc | 26 +- paddle/phi/infermeta/spmd_rules/flatten.cc | 6 +- paddle/phi/infermeta/spmd_rules/layer_norm.cc | 16 +- paddle/phi/infermeta/spmd_rules/matmul.cc | 10 +- paddle/phi/infermeta/spmd_rules/numel.cc | 2 +- paddle/phi/infermeta/spmd_rules/reduction.cc | 10 +- paddle/phi/infermeta/spmd_rules/replicated.cc | 8 +- paddle/phi/infermeta/spmd_rules/reshape.cc | 6 +- paddle/phi/infermeta/spmd_rules/slice.cc | 13 +- paddle/phi/infermeta/spmd_rules/softmax.cc | 6 +- paddle/phi/infermeta/spmd_rules/split.cc | 10 +- paddle/phi/infermeta/spmd_rules/squeeze.cc | 6 +- paddle/phi/infermeta/spmd_rules/stack.cc | 2 +- paddle/phi/infermeta/spmd_rules/transpose.cc | 9 +- paddle/phi/infermeta/spmd_rules/triu.cc | 6 +- paddle/phi/infermeta/spmd_rules/unsqueeze.cc | 6 +- paddle/phi/infermeta/spmd_rules/where.cc | 22 +- paddle/phi/infermeta/strings/nullary.cc | 2 +- paddle/phi/infermeta/ternary.cc | 80 ++--- paddle/phi/infermeta/unary.cc | 258 ++++++++-------- paddle/phi/kernels/array_kernel.cc | 8 +- paddle/phi/kernels/assign_kernel.cc | 2 +- paddle/phi/kernels/autotune/cache_base.h | 2 +- paddle/phi/kernels/autotune/gpu_timer.h | 2 +- paddle/phi/kernels/coalesce_tensor_kernel.cc | 2 +- .../kernels/cpu/affine_grid_grad_kernel.cc | 6 +- paddle/phi/kernels/cpu/affine_grid_kernel.cc | 6 +- paddle/phi/kernels/cpu/arange_kernel.cc | 2 +- paddle/phi/kernels/cpu/arg_min_max_kernel.cc | 6 +- paddle/phi/kernels/cpu/argsort_grad_kernel.cc | 6 +- paddle/phi/kernels/cpu/argsort_kernel.cc | 6 +- paddle/phi/kernels/cpu/assign_pos_kernel.cc | 2 +- .../phi/kernels/cpu/batch_norm_grad_kernel.cc | 4 +- paddle/phi/kernels/cpu/batch_norm_kernel.cc | 2 +- paddle/phi/kernels/cpu/concat_kernel.cc | 4 +- paddle/phi/kernels/cpu/conv_util.h | 19 +- paddle/phi/kernels/cpu/cum_maxmin_kernel.cc | 2 +- paddle/phi/kernels/cpu/cumprod_grad_kernel.cc | 2 +- .../phi/kernels/cpu/diagonal_grad_kernel.cc | 4 +- paddle/phi/kernels/cpu/diagonal_kernel.cc | 4 +- paddle/phi/kernels/cpu/dropout_grad_kernel.cc | 2 +- paddle/phi/kernels/cpu/dropout_kernel.cc | 4 +- paddle/phi/kernels/cpu/eig.h | 8 +- paddle/phi/kernels/cpu/eig_kernel.cc | 4 +- paddle/phi/kernels/cpu/eigvals_kernel.cc | 12 +- .../cpu/fill_diagonal_tensor_kernel.cc | 2 +- paddle/phi/kernels/cpu/flip_kernel.cc | 2 +- paddle/phi/kernels/cpu/full_kernel.cc | 4 +- paddle/phi/kernels/cpu/gaussian_kernel.cc | 2 +- .../kernels/cpu/generate_proposals_kernel.cc | 57 ++-- paddle/phi/kernels/cpu/grid_sample_kernel.cc | 4 +- .../phi/kernels/cpu/group_norm_grad_kernel.cc | 4 +- paddle/phi/kernels/cpu/group_norm_kernel.cc | 4 +- .../phi/kernels/cpu/gumbel_softmax_kernel.cc | 2 +- .../phi/kernels/cpu/hsigmoid_loss_kernel.cc | 4 +- paddle/phi/kernels/cpu/index_add_impl.h | 4 +- .../phi/kernels/cpu/index_put_grad_kernel.cc | 15 +- paddle/phi/kernels/cpu/index_put_kernel.cc | 6 +- paddle/phi/kernels/cpu/index_sample_kernel.cc | 2 +- paddle/phi/kernels/cpu/index_select_impl.h | 4 +- .../kernels/cpu/instance_norm_grad_kernel.cc | 2 +- .../phi/kernels/cpu/instance_norm_kernel.cc | 2 +- .../kernels/cpu/interpolate_grad_kernel.cc | 8 +- paddle/phi/kernels/cpu/interpolate_kernel.cc | 8 +- .../phi/kernels/cpu/kthvalue_grad_kernel.cc | 8 +- paddle/phi/kernels/cpu/kthvalue_kernel.cc | 8 +- .../phi/kernels/cpu/layer_norm_grad_kernel.cc | 2 +- paddle/phi/kernels/cpu/layer_norm_kernel.cc | 2 +- .../kernels/cpu/limit_by_capacity_kernel.cc | 2 +- paddle/phi/kernels/cpu/linspace_kernel.cc | 2 +- paddle/phi/kernels/cpu/logspace_kernel.cc | 2 +- paddle/phi/kernels/cpu/lstsq_kernel.cc | 14 +- paddle/phi/kernels/cpu/lu_kernel.cc | 6 +- .../kernels/cpu/masked_select_grad_kernel.cc | 4 +- .../phi/kernels/cpu/masked_select_kernel.cc | 4 +- paddle/phi/kernels/cpu/matrix_nms_kernel.cc | 14 +- paddle/phi/kernels/cpu/mode_grad_kernel.cc | 8 +- paddle/phi/kernels/cpu/mode_kernel.cc | 8 +- .../phi/kernels/cpu/multiclass_nms3_kernel.cc | 2 +- paddle/phi/kernels/cpu/nms_kernel.cc | 2 +- paddle/phi/kernels/cpu/nonzero_kernel.cc | 2 +- .../kernels/cpu/overlap_add_grad_kernel.cc | 28 +- paddle/phi/kernels/cpu/overlap_add_kernel.cc | 28 +- paddle/phi/kernels/cpu/prior_box_kernel.cc | 2 +- .../cpu/prune_gate_by_capacity_kernel.cc | 2 +- paddle/phi/kernels/cpu/randint_kernel.cc | 2 +- .../phi/kernels/cpu/random_routing_kernel.cc | 2 +- .../cpu/repeat_interleave_grad_kernel.cc | 2 +- paddle/phi/kernels/cpu/rnn_functor.h | 2 +- paddle/phi/kernels/cpu/rnn_grad_kernel.cc | 10 +- paddle/phi/kernels/cpu/rnn_kernel.cc | 20 +- .../phi/kernels/cpu/roi_align_grad_kernel.cc | 8 +- paddle/phi/kernels/cpu/roi_align_kernel.cc | 6 +- .../phi/kernels/cpu/roi_pool_grad_kernel.cc | 8 +- paddle/phi/kernels/cpu/roi_pool_kernel.cc | 8 +- paddle/phi/kernels/cpu/send_u_recv_kernel.cc | 4 +- paddle/phi/kernels/cpu/send_ue_recv_kernel.cc | 4 +- paddle/phi/kernels/cpu/send_uv_grad_kernel.cc | 4 +- .../phi/kernels/cpu/shuffle_batch_kernel.cc | 4 +- .../sparse_weight_embedding_grad_kernel.cc | 2 +- paddle/phi/kernels/cpu/strided_copy_kernel.cc | 4 +- .../kernels/cpu/temporal_shift_grad_kernel.cc | 8 +- .../phi/kernels/cpu/temporal_shift_kernel.cc | 8 +- paddle/phi/kernels/cpu/top_k_grad_kernel.cc | 6 +- paddle/phi/kernels/cpu/top_k_kernel.cc | 6 +- .../kernels/cpu/triangular_solve_kernel.cc | 4 +- paddle/phi/kernels/cpu/uniform_kernel.cc | 2 +- .../kernels/cpu/unique_consecutive_functor.h | 16 +- .../kernels/cpu/unique_consecutive_kernel.cc | 2 +- paddle/phi/kernels/dist_grad_kernel.cc | 4 +- paddle/phi/kernels/empty_kernel.cc | 4 +- paddle/phi/kernels/flatten_grad_kernel.cc | 2 +- paddle/phi/kernels/full_kernel.cc | 2 +- paddle/phi/kernels/funcs/affine_grid_utils.h | 28 +- paddle/phi/kernels/funcs/axis_utils.h | 2 +- paddle/phi/kernels/funcs/batch_norm_utils.h | 24 +- paddle/phi/kernels/funcs/blas/blas.cc | 4 +- .../phi/kernels/funcs/blas/blaslt_impl.cu.h | 8 +- paddle/phi/kernels/funcs/broadcast_function.h | 30 +- paddle/phi/kernels/funcs/common_shape.h | 16 +- paddle/phi/kernels/funcs/compound_functors.h | 2 +- paddle/phi/kernels/funcs/concat_funcs.h | 2 +- paddle/phi/kernels/funcs/cufft_util.h | 2 +- paddle/phi/kernels/funcs/cumprod.h | 2 +- .../kernels/funcs/data_layout_transform.cc | 8 +- .../phi/kernels/funcs/data_layout_transform.h | 4 +- .../funcs/detail/activation_functions.h | 2 +- .../phi/kernels/funcs/detail/strided_memcpy.h | 2 +- paddle/phi/kernels/funcs/diag_functor.h | 2 +- paddle/phi/kernels/funcs/diagonal.h | 12 +- paddle/phi/kernels/funcs/dims_simplifier.h | 12 +- paddle/phi/kernels/funcs/dropout_impl.cu.h | 2 +- paddle/phi/kernels/funcs/elementwise_base.h | 19 +- .../phi/kernels/funcs/elementwise_functor.h | 2 +- .../phi/kernels/funcs/elementwise_grad_base.h | 10 +- paddle/phi/kernels/funcs/elementwise_utils.h | 6 +- paddle/phi/kernels/funcs/fc_functor.cu | 6 +- paddle/phi/kernels/funcs/fft.cc | 28 +- paddle/phi/kernels/funcs/fft.cu | 12 +- paddle/phi/kernels/funcs/fft_fill_conj.h | 6 +- paddle/phi/kernels/funcs/fft_key.h | 4 +- paddle/phi/kernels/funcs/for_range.h | 2 +- .../phi/kernels/funcs/fused_gemm_epilogue.h | 4 +- paddle/phi/kernels/funcs/gather.cu.h | 6 +- paddle/phi/kernels/funcs/gather.h | 10 +- .../kernels/funcs/gather_scatter_functor.cc | 2 +- paddle/phi/kernels/funcs/im2col.h | 4 +- paddle/phi/kernels/funcs/index_calculator.h | 13 +- paddle/phi/kernels/funcs/index_put_utils.h | 24 +- .../phi/kernels/funcs/interpolate_function.h | 8 +- paddle/phi/kernels/funcs/jit/CMakeLists.txt | 4 +- paddle/phi/kernels/funcs/jit/kernel_base.h | 2 +- paddle/phi/kernels/funcs/jit/registry.h | 2 +- paddle/phi/kernels/funcs/layer_norm_impl.cu.h | 2 +- paddle/phi/kernels/funcs/math_function.cc | 6 +- paddle/phi/kernels/funcs/math_function.cu | 4 +- paddle/phi/kernels/funcs/matrix_reduce.cc | 7 +- paddle/phi/kernels/funcs/matrix_reduce.cu | 6 +- paddle/phi/kernels/funcs/matrix_solve.cu | 2 +- paddle/phi/kernels/funcs/matrix_solve.h | 2 +- paddle/phi/kernels/funcs/maxouting.h | 2 +- paddle/phi/kernels/funcs/mode.h | 2 +- paddle/phi/kernels/funcs/nanmedian_utils.h | 4 +- paddle/phi/kernels/funcs/norm_utils.cu.h | 2 +- paddle/phi/kernels/funcs/norm_utils.h | 4 +- paddle/phi/kernels/funcs/pooling.h | 4 +- paddle/phi/kernels/funcs/reduce_function.h | 12 +- paddle/phi/kernels/funcs/reduce_functor.h | 2 +- .../phi/kernels/funcs/reduce_grad_functions.h | 4 +- .../funcs/repeat_tensor2index_tensor.h | 2 +- paddle/phi/kernels/funcs/scatter.cu.h | 4 +- paddle/phi/kernels/funcs/scatter.h | 6 +- paddle/phi/kernels/funcs/segment_pooling.cc | 2 +- paddle/phi/kernels/funcs/select_impl.cu.h | 6 +- .../kernels/funcs/selected_rows_functor.cc | 12 +- .../kernels/funcs/selected_rows_functor.cu | 8 +- paddle/phi/kernels/funcs/sequence2batch.cc | 4 +- paddle/phi/kernels/funcs/sequence_pooling.cc | 2 +- paddle/phi/kernels/funcs/sequence_pooling.cu | 2 +- paddle/phi/kernels/funcs/slice.h | 10 +- paddle/phi/kernels/funcs/slice_utils.h | 4 +- paddle/phi/kernels/funcs/softmax.cu | 4 +- .../phi/kernels/funcs/sparse/common_shape.h | 6 +- paddle/phi/kernels/funcs/sparse/convolution.h | 2 +- .../kernels/funcs/sparse/flatten_indices.h | 2 +- paddle/phi/kernels/funcs/sparse/softmax.cu.h | 6 +- paddle/phi/kernels/funcs/sparse/softmax.h | 2 +- .../funcs/sparse/sparse_blas_impl.cu.h | 10 +- .../funcs/sparse/sparse_blas_impl.hip.h | 8 +- paddle/phi/kernels/funcs/strided_memcpy.h | 6 +- paddle/phi/kernels/funcs/strided_slice.h | 14 +- .../phi/kernels/funcs/top_k_function_cuda.h | 4 +- .../phi/kernels/funcs/transpose_function.cu.h | 6 +- paddle/phi/kernels/funcs/unique_functor.h | 22 +- paddle/phi/kernels/funcs/unsqueeze.h | 10 +- .../kernels/funcs/values_vectors_functor.h | 8 +- paddle/phi/kernels/funcs/vol2col.cc | 1 + paddle/phi/kernels/funcs/vol2col.h | 2 +- .../cpu/distributed_fused_lamb_init_kernel.cc | 2 +- ...used_softmax_mask_upper_triangle_kernel.cc | 2 +- .../kernels/fusion/cpu/fusion_gru_kernel.cc | 4 +- .../cpu/fusion_repeated_fc_relu_kernel.cc | 2 +- .../cpu/fusion_seqconv_eltadd_relu_kernel.cc | 6 +- .../cpu/fusion_seqexpand_concat_fc_kernel.cc | 2 +- .../cutlass/memory_efficient_attention.cu | 2 +- .../generate_variable_forward_kernels.py | 2 +- .../memory_efficient_attention_utils.h | 2 +- paddle/phi/kernels/fusion/gpu/cast_with_ptr.h | 6 +- .../kernels/fusion/gpu/conv_fusion_kernel.cu | 16 +- .../gpu/distributed_fused_lamb_init_kernel.cu | 2 +- ...used_embedding_eltwise_layernorm_kernel.cu | 2 +- .../fused_fc_elementwise_layernorm_kernel.cu | 4 +- .../gpu/fused_scale_bias_add_relu_kernel.cu | 4 +- .../fused_scale_bias_relu_conv_bn_kernel.cu | 14 +- ...softmax_mask_upper_triangle_grad_kernel.cu | 2 +- ...used_softmax_mask_upper_triangle_kernel.cu | 2 +- .../fusion_transpose_flatten_concat_kernel.cu | 2 +- .../fusion/gpu/multihead_matmul_kernel.cu | 6 +- .../fusion/gpu/skip_layernorm_kernel.cu | 2 +- paddle/phi/kernels/fusion/onednn/fc_kernel.cc | 14 +- .../fusion/onednn/fused_conv_kernel.cc | 2 +- .../fusion/onednn/fused_matmul_kernel.cc | 15 +- .../fusion/onednn/fused_transpose_kernel.cc | 8 +- .../fusion/onednn/fusion_gru_kernel.cc | 8 +- .../kernels/fusion/xpu/add_act_xpu_kernel.cc | 4 +- .../fusion/xpu/add_layernorm_xpu_kernel.cc | 4 +- .../kernels/fusion/xpu/bn_act_xpu_kernel.cc | 2 +- .../kernels/fusion/xpu/conv2d_xpu_kernel.cc | 7 +- .../fusion/xpu/conv_transpose_xpu_kernel.cc | 2 +- .../fusion/xpu/fast_layernorm_xpu_kernel.cc | 2 +- .../fusion/xpu/fast_where_xpu_kernel.cc | 6 +- .../xpu/fused_feedforward_grad_kernel.cc | 2 +- .../fusion/xpu/fused_feedforward_kernel.cc | 2 +- ...fused_multi_transformer_int8_xpu_kernel.cc | 10 +- .../xpu/fused_multi_transformer_xpu_kernel.cc | 8 +- .../fusion/xpu/fused_softmax_mask_kernel.cc | 4 +- .../fusion/xpu/layer_norm_act_xpu_kernel.cc | 2 +- .../kernels/fusion/xpu/yolo_box_xpu_kernel.cc | 9 +- .../kernels/gpu/affine_grid_grad_kernel.cu | 6 +- paddle/phi/kernels/gpu/affine_grid_kernel.cu | 6 +- paddle/phi/kernels/gpu/arange_kernel.cu | 6 +- paddle/phi/kernels/gpu/arg_min_max_kernel.cu | 4 +- paddle/phi/kernels/gpu/argsort_grad_kernel.cu | 6 +- paddle/phi/kernels/gpu/argsort_kernel.cu | 8 +- paddle/phi/kernels/gpu/assign_pos_kernel.cu | 2 +- .../phi/kernels/gpu/batch_norm_grad_kernel.cu | 6 +- paddle/phi/kernels/gpu/batch_norm_kernel.cu | 4 +- paddle/phi/kernels/gpu/c_split_kernel.cu | 4 +- paddle/phi/kernels/gpu/concat_kernel.cu | 4 +- paddle/phi/kernels/gpu/contiguous_kernel.cu | 10 +- .../kernels/gpu/conv_transpose_grad_kernel.cu | 8 +- .../phi/kernels/gpu/conv_transpose_kernel.cu | 8 +- paddle/phi/kernels/gpu/cum_maxmin_kernel.cu | 2 +- paddle/phi/kernels/gpu/decode_jpeg_kernel.cu | 2 +- paddle/phi/kernels/gpu/depthwise_conv.h | 1 + .../kernels/gpu/depthwise_conv_grad_kernel.cu | 6 +- .../phi/kernels/gpu/depthwise_conv_kernel.cu | 4 +- .../phi/kernels/gpu/diagonal_grad_kernel.cu | 5 +- paddle/phi/kernels/gpu/diagonal_kernel.cu | 6 +- paddle/phi/kernels/gpu/dist_kernel.cu | 2 +- .../phi/kernels/gpu/embedding_grad_kernel.cu | 2 +- paddle/phi/kernels/gpu/expand_as_kernel.cu | 2 +- paddle/phi/kernels/gpu/expand_kernel.cu | 4 +- .../gpu/fill_diagonal_tensor_grad_kernel.cu | 2 +- .../gpu/fill_diagonal_tensor_kernel.cu | 4 +- paddle/phi/kernels/gpu/flip_kernel.cu | 4 +- paddle/phi/kernels/gpu/full_kernel.cu | 2 +- .../gpu/gaussian_inplace_grad_kernel.cu | 2 +- paddle/phi/kernels/gpu/gaussian_kernel.cu | 2 +- .../kernels/gpu/generate_proposals_kernel.cu | 59 ++-- .../phi/kernels/gpu/group_norm_grad_kernel.cu | 4 +- paddle/phi/kernels/gpu/group_norm_kernel.cu | 6 +- .../phi/kernels/gpu/gumbel_softmax_kernel.cu | 2 +- .../phi/kernels/gpu/index_add_grad_kernel.cu | 2 +- paddle/phi/kernels/gpu/index_add_kernel.cu | 2 +- .../phi/kernels/gpu/index_put_grad_kernel.cu | 42 +-- paddle/phi/kernels/gpu/index_put_kernel.cu | 14 +- .../kernels/gpu/index_select_grad_kernel.cu | 2 +- paddle/phi/kernels/gpu/index_select_kernel.cu | 2 +- .../kernels/gpu/instance_norm_grad_kernel.cu | 2 +- .../phi/kernels/gpu/instance_norm_kernel.cu | 2 +- .../kernels/gpu/interpolate_grad_kernel.cu | 8 +- paddle/phi/kernels/gpu/interpolate_kernel.cu | 8 +- paddle/phi/kernels/gpu/kthvalue_kernel.cu | 12 +- .../phi/kernels/gpu/layer_norm_grad_kernel.cu | 2 +- paddle/phi/kernels/gpu/layer_norm_kernel.cu | 6 +- paddle/phi/kernels/gpu/lerp_grad_kernel.cu | 10 +- paddle/phi/kernels/gpu/lerp_kernel.cu | 8 +- paddle/phi/kernels/gpu/linspace_kernel.cu | 2 +- paddle/phi/kernels/gpu/logspace_kernel.cu | 2 +- paddle/phi/kernels/gpu/logsumexp_kernel.cu | 8 +- paddle/phi/kernels/gpu/lstsq_kernel.cu | 14 +- paddle/phi/kernels/gpu/lu_kernel.cu | 6 +- .../kernels/gpu/masked_select_grad_kernel.cu | 4 +- .../phi/kernels/gpu/masked_select_kernel.cu | 4 +- paddle/phi/kernels/gpu/mode_kernel.cu | 8 +- paddle/phi/kernels/gpu/multinomial_kernel.cu | 5 +- paddle/phi/kernels/gpu/nanmedian_kernel.cu | 4 +- paddle/phi/kernels/gpu/nms_kernel.cu | 2 +- paddle/phi/kernels/gpu/nonzero_kernel.cu | 2 +- paddle/phi/kernels/gpu/number_count_kernel.cu | 2 +- .../kernels/gpu/overlap_add_grad_kernel.cu | 27 +- paddle/phi/kernels/gpu/overlap_add_kernel.cu | 28 +- paddle/phi/kernels/gpu/p_recv_kernel.cu | 4 +- paddle/phi/kernels/gpu/p_send_kernel.cu | 2 +- paddle/phi/kernels/gpu/qr_kernel.cu | 22 +- paddle/phi/kernels/gpu/randint_kernel.cu | 2 +- paddle/phi/kernels/gpu/randperm_kernel.cu | 2 +- .../phi/kernels/gpu/reduce_amin_amax_common.h | 8 +- paddle/phi/kernels/gpu/reduce_grad.h | 4 +- paddle/phi/kernels/gpu/reduce_kernel.cu | 20 +- paddle/phi/kernels/gpu/roi_pool_kernel.cu | 2 +- paddle/phi/kernels/gpu/roll_grad_kernel.cu | 2 +- paddle/phi/kernels/gpu/roll_kernel.cu | 4 +- paddle/phi/kernels/gpu/roll_kernel_impl.h | 2 +- paddle/phi/kernels/gpu/send_u_recv_kernel.cu | 4 +- paddle/phi/kernels/gpu/send_ue_recv_kernel.cu | 4 +- paddle/phi/kernels/gpu/send_uv_grad_kernel.cu | 4 +- .../kernels/gpu/shuffle_batch_grad_kernel.cu | 2 +- .../phi/kernels/gpu/shuffle_batch_kernel.cu | 6 +- ...d_cross_entropy_with_logits_grad_kernel.cu | 2 +- ...igmoid_cross_entropy_with_logits_kernel.cu | 2 +- paddle/phi/kernels/gpu/strided_copy_kernel.cu | 18 +- .../kernels/gpu/temporal_shift_grad_kernel.cu | 8 +- .../phi/kernels/gpu/temporal_shift_kernel.cu | 8 +- paddle/phi/kernels/gpu/tile_kernel.cu | 10 +- paddle/phi/kernels/gpu/top_k_kernel.cu | 6 +- .../phi/kernels/gpu/top_p_sampling_kernel.cu | 14 +- .../kernels/gpu/triangular_solve_kernel.cu | 4 +- .../gpu/uniform_inplace_grad_kernel.cu | 2 +- paddle/phi/kernels/gpu/uniform_kernel.cu | 2 +- .../kernels/gpu/unique_consecutive_functor.h | 34 +-- .../kernels/gpu/unique_consecutive_kernel.cu | 2 +- paddle/phi/kernels/gpu/unique_kernel.cu | 58 ++-- .../phi/kernels/gpu/viterbi_decode_kernel.cu | 2 +- paddle/phi/kernels/gpu/yolo_box_kernel.cu | 3 +- .../phi/kernels/gpudnn/affine_grid_kernel.cu | 2 +- .../phi/kernels/gpudnn/conv_cudnn_frontend.h | 2 +- paddle/phi/kernels/gpudnn/conv_gpudnn_base.h | 4 +- paddle/phi/kernels/gpudnn/conv_grad_kernel.cu | 8 +- paddle/phi/kernels/gpudnn/conv_kernel.cu | 4 +- .../gpudnn/conv_transpose_grad_kernel.cu | 27 +- .../kernels/gpudnn/conv_transpose_kernel.cu | 18 +- paddle/phi/kernels/gpudnn/pool_grad_kernel.cu | 32 +- paddle/phi/kernels/gpudnn/pool_kernel.cu | 24 +- paddle/phi/kernels/impl/amp_kernel_impl.h | 3 +- .../kernels/impl/bilinear_grad_kernel_impl.h | 10 +- .../phi/kernels/impl/bilinear_kernel_impl.h | 4 +- .../impl/broadcast_tensors_kernel_impl.h | 2 +- .../kernels/impl/cholesky_solve_kernel_impl.h | 4 +- .../phi/kernels/impl/conv_grad_kernel_impl.h | 17 +- paddle/phi/kernels/impl/conv_kernel_impl.h | 9 +- .../impl/conv_transpose_grad_kernel_impl.h | 14 +- .../kernels/impl/conv_transpose_kernel_impl.h | 14 +- paddle/phi/kernels/impl/crop_kernel_impl.h | 2 +- .../impl/deformable_conv_grad_kernel_impl.h | 29 +- .../impl/deformable_conv_kernel_impl.h | 35 +-- .../impl/determinant_grad_kernel_impl.h | 2 +- .../kernels/impl/determinant_kernel_impl.h | 6 +- paddle/phi/kernels/impl/diag_embed_impl.h | 6 +- .../phi/kernels/impl/dot_grad_kernel_impl.h | 4 +- .../phi/kernels/impl/eigh_grad_kernel_impl.h | 4 +- paddle/phi/kernels/impl/einsum_grad_impl.h | 4 +- paddle/phi/kernels/impl/einsum_impl.h | 9 +- .../kernels/impl/expand_as_grad_kernel_impl.h | 2 +- .../phi/kernels/impl/expand_as_kernel_impl.h | 8 +- .../kernels/impl/expand_grad_kernel_impl.h | 2 +- paddle/phi/kernels/impl/expand_kernel_impl.h | 4 +- paddle/phi/kernels/impl/fc_kernel_impl.h | 4 +- .../phi/kernels/impl/fft_grad_kernel_impl.h | 6 +- paddle/phi/kernels/impl/fft_kernel_impl.h | 4 +- .../phi/kernels/impl/fold_grad_kernel_impl.h | 4 +- paddle/phi/kernels/impl/fold_kernel_impl.h | 4 +- .../phi/kernels/impl/frame_grad_kernel_impl.h | 26 +- paddle/phi/kernels/impl/frame_kernel_impl.h | 28 +- .../impl/full_whit_tensor_kernel_impl.h | 2 +- .../kernels/impl/graph_message_passing_impl.h | 10 +- .../phi/kernels/impl/kron_grad_kernel_impl.h | 6 +- paddle/phi/kernels/impl/kron_kernel_impl.h | 8 +- paddle/phi/kernels/impl/lamb_kernel_impl.h | 4 +- .../phi/kernels/impl/lerp_grad_kernel_impl.h | 2 +- paddle/phi/kernels/impl/lerp_kernel_impl.h | 2 +- paddle/phi/kernels/impl/lstsq_kernel_impl.h | 8 +- paddle/phi/kernels/impl/lu_grad_kernel_impl.h | 2 +- paddle/phi/kernels/impl/lu_kernel_impl.h | 18 +- .../kernels/impl/lu_unpack_grad_kernel_impl.h | 2 +- .../kernels/impl/matmul_grad_kernel_impl.h | 46 +-- paddle/phi/kernels/impl/matmul_kernel_impl.h | 32 +- .../kernels/impl/matrix_rank_kernel_impl.h | 22 +- .../phi/kernels/impl/merged_momentum_impl.h | 2 +- .../kernels/impl/meshgrid_grad_kernel_impl.h | 2 +- .../phi/kernels/impl/meshgrid_kernel_impl.h | 4 +- .../phi/kernels/impl/multi_dot_kernel_impl.h | 16 +- .../phi/kernels/impl/pool_grad_kernel_impl.h | 2 +- paddle/phi/kernels/impl/pool_kernel_impl.h | 2 +- ...ow2_decay_with_linear_warmup_kernel_impl.h | 2 +- paddle/phi/kernels/impl/qr_grad_kernel_impl.h | 10 +- .../kernels/impl/quant_linear_kernel_impl.h | 8 +- paddle/phi/kernels/impl/reduce_grad.h | 2 +- paddle/phi/kernels/impl/renorm_impl.h | 14 +- .../impl/repeat_interleave_grad_kernel_impl.h | 6 +- .../impl/repeat_interleave_kernel_impl.h | 30 +- .../kernels/impl/searchsorted_kernel_impl.h | 2 +- .../kernels/impl/segment_pool_kernel_impl.h | 2 +- .../kernels/impl/sequence_mask_kernel_impl.h | 8 +- .../kernels/impl/set_value_grad_kernel_impl.h | 2 +- .../phi/kernels/impl/set_value_kernel_impl.h | 4 +- .../phi/kernels/impl/slice_grad_kernel_impl.h | 18 +- .../impl/slogdeterminant_grad_kernel_impl.h | 2 +- .../impl/slogdeterminant_kernel_impl.h | 4 +- .../phi/kernels/impl/solve_grad_kernel_impl.h | 22 +- paddle/phi/kernels/impl/solve_kernel_impl.h | 8 +- .../impl/spectral_norm_grad_kernel_impl.h | 6 +- .../kernels/impl/spectral_norm_kernel_impl.h | 8 +- .../phi/kernels/impl/svd_grad_kernel_impl.h | 6 +- .../phi/kernels/impl/tile_grad_kernel_impl.h | 2 +- paddle/phi/kernels/impl/tile_kernel_impl.h | 4 +- .../phi/kernels/impl/trace_grad_kernel_impl.h | 8 +- .../kernels/impl/unfold_grad_kernel_impl.h | 4 +- paddle/phi/kernels/impl/unfold_kernel_impl.h | 4 +- paddle/phi/kernels/impl/warpctc_kernel_impl.h | 2 +- .../phi/kernels/impl/warprnnt_kernel_impl.h | 2 +- paddle/phi/kernels/is_empty_kernel.cc | 2 +- paddle/phi/kernels/kps/reduce_kernel.cu | 2 +- .../phi/kernels/legacy/cpu/randint_kernel.cc | 2 +- .../phi/kernels/legacy/cpu/uniform_kernel.cc | 2 +- .../phi/kernels/legacy/gpu/randint_kernel.cu | 2 +- .../phi/kernels/legacy/gpu/uniform_kernel.cu | 2 +- .../phi/kernels/legacy/xpu/compare_kernel.cc | 4 +- .../phi/kernels/legacy/xpu/randint_kernel.cc | 2 +- .../phi/kernels/legacy/xpu/uniform_kernel.cc | 2 +- paddle/phi/kernels/onednn/add_n_kernel.cc | 2 +- .../kernels/onednn/batch_norm_grad_kernel.cc | 4 +- .../phi/kernels/onednn/batch_norm_kernel.cc | 2 +- paddle/phi/kernels/onednn/cast_kernel.cc | 2 +- .../phi/kernels/onednn/concat_grad_kernel.cc | 4 +- paddle/phi/kernels/onednn/concat_kernel.cc | 4 +- paddle/phi/kernels/onednn/conv_grad_kernel.cc | 12 +- paddle/phi/kernels/onednn/conv_handler.h | 41 +-- paddle/phi/kernels/onednn/conv_kernel.cc | 2 +- .../kernels/onednn/conv_transpose_kernel.cc | 18 +- .../phi/kernels/onednn/dequantize_kernel.cc | 2 +- .../kernels/onednn/elementwise_grad_kernel.cc | 8 +- .../phi/kernels/onednn/expand_grad_kernel.cc | 11 +- paddle/phi/kernels/onednn/expand_kernel.cc | 4 +- paddle/phi/kernels/onednn/full_kernel.cc | 4 +- paddle/phi/kernels/onednn/gaussian_kernel.cc | 2 +- .../phi/kernels/onednn/interpolate_kernel.cc | 8 +- .../phi/kernels/onednn/matmul_grad_kernel.cc | 10 +- paddle/phi/kernels/onednn/matmul_kernel.cc | 20 +- paddle/phi/kernels/onednn/pad3d_kernel.cc | 2 +- paddle/phi/kernels/onednn/pad_kernel_impl.h | 4 +- paddle/phi/kernels/onednn/pool_grad_kernel.cc | 2 +- paddle/phi/kernels/onednn/pool_kernel.cc | 2 +- .../phi/kernels/onednn/reduce_kernel_impl.h | 12 +- .../kernels/onednn/reduce_mean_grad_kernel.cc | 2 +- paddle/phi/kernels/onednn/reshape_kernel.cc | 18 +- paddle/phi/kernels/onednn/shape_kernel.cc | 6 +- .../phi/kernels/onednn/slice_grad_kernel.cc | 2 +- paddle/phi/kernels/onednn/slice_kernel.cc | 6 +- paddle/phi/kernels/onednn/split_kernel.cc | 4 +- .../phi/kernels/onednn/squeeze_grad_kernel.cc | 4 +- paddle/phi/kernels/onednn/squeeze_kernel.cc | 6 +- paddle/phi/kernels/onednn/stack_kernel.cc | 10 +- .../kernels/onednn/transpose_grad_kernel.cc | 2 +- paddle/phi/kernels/onednn/transpose_kernel.cc | 4 +- .../kernels/primitive/datamover_primitives.h | 2 +- .../elementwise_multiply_kernel.cc | 2 +- .../selected_rows/impl/add_n_kernel_impl.h | 2 +- .../selected_rows/impl/lamb_kernel_impl.h | 4 +- .../phi/kernels/sparse/cpu/coalesce_kernel.cc | 4 +- paddle/phi/kernels/sparse/cpu/conv_kernel.cc | 2 +- .../kernels/sparse/cpu/elementwise_kernel.cc | 8 +- paddle/phi/kernels/sparse/cpu/mask_kernel.cc | 2 +- .../kernels/sparse/cpu/reshape_grad_kernel.cc | 4 +- .../phi/kernels/sparse/cpu/reshape_kernel.cc | 6 +- paddle/phi/kernels/sparse/cpu/slice_kernel.cc | 2 +- .../kernels/sparse/cpu/softmax_grad_kernel.cc | 8 +- .../phi/kernels/sparse/cpu/softmax_kernel.cc | 2 +- .../kernels/sparse/cpu/sparse_utils_kernel.cc | 2 +- paddle/phi/kernels/sparse/cpu/sum_kernel.cc | 16 +- paddle/phi/kernels/sparse/gpu/addmm_kernel.cu | 8 +- .../phi/kernels/sparse/gpu/coalesce_kernel.cu | 4 +- paddle/phi/kernels/sparse/gpu/conv_kernel.cu | 2 +- paddle/phi/kernels/sparse/gpu/full_kernel.cu | 4 +- .../sparse/gpu/fused_attention_kernel.cu | 3 +- paddle/phi/kernels/sparse/gpu/mask_kernel.cu | 2 +- .../kernels/sparse/gpu/matmul_grad_kernel.cu | 2 +- .../phi/kernels/sparse/gpu/matmul_kernel.cu | 14 +- paddle/phi/kernels/sparse/gpu/mv_kernel.cu | 8 +- .../kernels/sparse/gpu/reshape_grad_kernel.cu | 4 +- .../phi/kernels/sparse/gpu/reshape_kernel.cu | 4 +- paddle/phi/kernels/sparse/gpu/slice_kernel.cu | 2 +- .../kernels/sparse/gpu/softmax_grad_kernel.cu | 4 +- .../phi/kernels/sparse/gpu/softmax_kernel.cu | 2 +- paddle/phi/kernels/sparse/gpu/sum_kernel.cu | 16 +- .../phi/kernels/sparse/sparse_utils_kernel.h | 2 +- paddle/phi/kernels/sparse/unary_kernel.h | 2 +- paddle/phi/kernels/squeeze_grad_kernel.cc | 2 +- .../kernels/stride/as_strided_grad_kernel.cc | 4 +- .../phi/kernels/stride/complex_grad_kernel.cc | 8 +- .../kernels/stride/diagonal_grad_kernel.cc | 4 +- paddle/phi/kernels/stride/diagonal_kernel.cc | 4 +- .../phi/kernels/stride/flatten_grad_kernel.cc | 4 +- paddle/phi/kernels/stride/flatten_kernel.cc | 6 +- .../stride/index_select_grad_kernel.cc | 4 +- .../phi/kernels/stride/index_select_kernel.cc | 4 +- .../phi/kernels/stride/reshape_grad_kernel.cc | 2 +- .../phi/kernels/stride/slice_grad_kernel.cc | 4 +- paddle/phi/kernels/stride/slice_kernel.cc | 5 +- .../phi/kernels/stride/squeeze_grad_kernel.cc | 4 +- paddle/phi/kernels/stride/squeeze_kernel.cc | 2 +- .../stride/strided_slice_grad_kernel.cc | 4 +- .../kernels/stride/strided_slice_kernel.cc | 5 +- .../stride/tensor_unfold_grad_kernel.cc | 4 +- .../kernels/stride/unsqueeze_grad_kernel.cc | 4 +- paddle/phi/kernels/stride/unsqueeze_kernel.cc | 7 +- paddle/phi/kernels/stride/view_grad_kernel.cc | 2 +- paddle/phi/kernels/strings/gpu/copy_utils.h | 8 +- .../kernels/strings/strings_empty_kernel.cc | 2 +- paddle/phi/kernels/strings/unicode.h | 2 +- paddle/phi/kernels/strings/unicode_flag.h | 2 +- paddle/phi/kernels/transfer_layout_kernel.cc | 2 +- .../kernels/triangular_solve_grad_kernel.h | 2 +- paddle/phi/kernels/unsqueeze_grad_kernel.cc | 2 +- .../phi/kernels/xpu/activation_grad_kernel.cc | 8 +- paddle/phi/kernels/xpu/activation_kernel.cc | 2 +- paddle/phi/kernels/xpu/arange_kernel.cc | 2 +- paddle/phi/kernels/xpu/arg_min_max_kernel.cc | 6 +- paddle/phi/kernels/xpu/argsort_grad_kernel.cc | 4 +- paddle/phi/kernels/xpu/argsort_kernel.cc | 4 +- .../phi/kernels/xpu/batch_norm_grad_kernel.cc | 2 +- paddle/phi/kernels/xpu/batch_norm_kernel.cc | 2 +- paddle/phi/kernels/xpu/c_split_kernel.cc | 4 +- paddle/phi/kernels/xpu/compare_kernel.cc | 4 +- paddle/phi/kernels/xpu/contiguous_kernel.cc | 67 +++-- paddle/phi/kernels/xpu/conv_grad_kernel.cc | 16 +- paddle/phi/kernels/xpu/conv_kernel.cc | 16 +- .../kernels/xpu/conv_transpose_grad_kernel.cc | 2 +- .../phi/kernels/xpu/conv_transpose_kernel.cc | 2 +- .../phi/kernels/xpu/cross_entropy_kernel.cc | 2 +- paddle/phi/kernels/xpu/cum_kernel.cc | 2 +- paddle/phi/kernels/xpu/cumprod_kernel.cc | 2 +- .../xpu/deformable_conv_grad_kernel.cc | 2 +- .../phi/kernels/xpu/deformable_conv_kernel.cc | 2 +- paddle/phi/kernels/xpu/diag_kernel.cc | 4 +- paddle/phi/kernels/xpu/diagonal_kernel.cc | 4 +- .../xpu/elementwise_add_grad_kernel.cc | 4 +- .../phi/kernels/xpu/elementwise_add_kernel.cc | 4 +- paddle/phi/kernels/xpu/expand_as_kernel.cc | 8 +- paddle/phi/kernels/xpu/expand_grad_kernel.cc | 4 +- paddle/phi/kernels/xpu/expand_kernel.cc | 6 +- .../xpu/fill_diagonal_tensor_kernel.cc | 4 +- paddle/phi/kernels/xpu/flip_kernel.cc | 2 +- paddle/phi/kernels/xpu/full_kernel.cc | 4 +- .../phi/kernels/xpu/gather_nd_grad_kernel.cc | 8 +- paddle/phi/kernels/xpu/gather_nd_kernel.cc | 8 +- paddle/phi/kernels/xpu/gaussian_kernel.cc | 2 +- .../kernels/xpu/generate_proposals_kernel.cc | 49 +-- paddle/phi/kernels/xpu/grid_sample_kernel.cc | 8 +- .../phi/kernels/xpu/group_norm_grad_kernel.cc | 6 +- paddle/phi/kernels/xpu/group_norm_kernel.cc | 6 +- paddle/phi/kernels/xpu/index_put_kernel.cc | 21 +- .../kernels/xpu/index_sample_grad_kernel.cc | 6 +- .../kernels/xpu/index_select_grad_kernel.cc | 4 +- paddle/phi/kernels/xpu/index_select_kernel.cc | 2 +- .../kernels/xpu/interpolate_grad_kernel.cc | 4 +- paddle/phi/kernels/xpu/interpolate_kernel.cc | 4 +- .../phi/kernels/xpu/layer_norm_grad_kernel.cc | 2 +- paddle/phi/kernels/xpu/layer_norm_kernel.cc | 2 +- paddle/phi/kernels/xpu/linspace_kernel.cc | 2 +- .../kernels/xpu/log_softmax_grad_kernel.cc | 2 +- paddle/phi/kernels/xpu/log_softmax_kernel.cc | 2 +- paddle/phi/kernels/xpu/logical_kernel.cc | 2 +- .../kernels/xpu/masked_select_grad_kernel.cc | 4 +- .../phi/kernels/xpu/masked_select_kernel.cc | 4 +- paddle/phi/kernels/xpu/matmul_grad_kernel.cc | 4 +- paddle/phi/kernels/xpu/meshgrid_kernel.cc | 2 +- .../phi/kernels/xpu/multiclass_nms3_kernel.cc | 2 +- .../phi/kernels/xpu/nll_loss_grad_kernel.cc | 2 +- paddle/phi/kernels/xpu/nll_loss_kernel.cc | 2 +- paddle/phi/kernels/xpu/nonzero_kernel.cc | 4 +- paddle/phi/kernels/xpu/p_norm_grad_kernel.cc | 2 +- paddle/phi/kernels/xpu/p_norm_kernel.cc | 2 +- paddle/phi/kernels/xpu/pad3d_grad_kernel.cc | 2 +- paddle/phi/kernels/xpu/pad_grad_kernel.cc | 2 +- paddle/phi/kernels/xpu/pad_kernel.cc | 2 +- .../pow2_decay_with_linear_warmup_kernel.cc | 2 +- paddle/phi/kernels/xpu/randint_kernel.cc | 2 +- paddle/phi/kernels/xpu/randperm_kernel.cc | 2 +- .../kernels/xpu/reduce_mean_grad_kernel.cc | 4 +- paddle/phi/kernels/xpu/scatter_kernel.cc | 4 +- .../kernels/xpu/scatter_nd_add_grad_kernel.cc | 8 +- .../phi/kernels/xpu/scatter_nd_add_kernel.cc | 6 +- .../phi/kernels/xpu/set_value_grad_kernel.cc | 14 +- paddle/phi/kernels/xpu/set_value_kernel.cc | 10 +- paddle/phi/kernels/xpu/split_kernel.cc | 2 +- paddle/phi/kernels/xpu/stack_grad_kernel.cc | 2 +- paddle/phi/kernels/xpu/stride_slice_kernel.cc | 2 +- paddle/phi/kernels/xpu/strided_copy_kernel.cc | 105 +++---- .../phi/kernels/xpu/take_along_axis_kernel.cc | 2 +- .../kernels/xpu/temporal_shift_grad_kernel.cc | 8 +- .../phi/kernels/xpu/temporal_shift_kernel.cc | 8 +- paddle/phi/kernels/xpu/tile_grad_kernel.cc | 2 +- paddle/phi/kernels/xpu/tile_kernel.cc | 6 +- paddle/phi/kernels/xpu/top_k_kernel.cc | 6 +- .../phi/kernels/xpu/transpose_grad_kernel.cc | 2 +- paddle/phi/kernels/xpu/transpose_kernel.cc | 2 +- .../phi/kernels/xpu/tril_triu_grad_kernel.cc | 2 +- paddle/phi/kernels/xpu/tril_triu_kernel.cc | 2 +- paddle/phi/kernels/xpu/unbind_kernel.cc | 2 +- paddle/phi/kernels/xpu/unfold_grad_kernel.cc | 2 +- paddle/phi/kernels/xpu/unfold_kernel.cc | 2 +- paddle/phi/kernels/xpu/uniform_kernel.cc | 2 +- paddle/phi/kernels/xpu/unique_kernel.cc | 16 +- paddle/phi/kernels/xpu/unstack_kernel.cc | 2 +- paddle/phi/kernels/xpu/warpctc_kernel.cc | 4 +- paddle/phi/kernels/xpu/where_grad_kernel.cc | 4 +- paddle/phi/kernels/xpu/where_kernel.cc | 4 +- paddle/phi/kernels/xpu/xpu_api_wrapper.h | 4 +- paddle/phi/tools/CMakeLists.txt | 2 +- paddle/pir/core/block.cc | 2 +- paddle/pir/core/block_argument.cc | 2 +- paddle/pir/core/block_operand.cc | 2 +- paddle/pir/core/builtin_attribute_storage.h | 2 +- paddle/pir/core/builtin_op.cc | 2 +- paddle/pir/core/builtin_type.cc | 4 +- paddle/pir/core/builtin_type.h | 1 - paddle/pir/core/builtin_type_interfaces.cc | 2 +- paddle/pir/core/builtin_type_interfaces.h | 6 +- paddle/pir/core/builtin_type_storage.h | 17 +- paddle/pir/core/dialect.h | 2 +- paddle/pir/core/enforce.h | 82 ----- paddle/pir/core/interface_support.h | 2 +- paddle/pir/core/iterator.h | 2 +- paddle/pir/core/macros.h | 31 -- paddle/pir/core/op_base.h | 2 +- paddle/pir/core/op_operand.cc | 2 +- paddle/pir/core/op_result.cc | 2 +- paddle/pir/core/op_trait.cc | 2 +- paddle/pir/core/operation.cc | 2 +- paddle/pir/core/operation.h | 4 +- paddle/pir/core/region.cc | 2 +- paddle/pir/core/storage_manager.cc | 2 +- paddle/pir/core/type_util.h | 4 +- paddle/pir/core/value.cc | 2 +- paddle/pir/dialect/shape/ir/shape_op.cc | 2 +- paddle/pir/pass/pass.h | 2 +- paddle/pir/pass/pass_registry.h | 3 +- paddle/pir/pattern_rewrite/pattern_match.cc | 2 +- paddle/pir/pattern_rewrite/pattern_match.h | 2 +- paddle/testing/CMakeLists.txt | 1 + paddle/utils/CMakeLists.txt | 6 +- paddle/utils/string/CMakeLists.txt | 6 +- patches/eigen/TensorReductionGpu.h | 2 +- python/setup.py.in | 3 +- python/setup_cinn.py.in | 2 + setup.py | 15 +- test/CMakeLists.txt | 6 +- test/cpp/auto_parallel/CMakeLists.txt | 8 +- test/cpp/auto_parallel/dist_tensor_test.cc | 2 +- test/cpp/auto_parallel/spmd_rule_test.cc | 145 ++++----- test/cpp/eager/CMakeLists.txt | 1 + .../accumulation_node_test.cc | 22 +- .../autograd_meta_test.cc | 2 +- .../data_structure_tests/eager_tensor_test.cc | 19 +- .../grad_node_info_test.cc | 6 +- .../data_structure_tests/grad_node_test.h | 2 +- .../grad_tensor_holder_test.cc | 10 +- .../tensor_wrapper_test.cc | 4 +- .../eager/performance_tests/CMakeLists.txt | 8 +- .../performance_tests/benchmark_eager_cpu.cc | 16 +- .../performance_tests/benchmark_eager_cuda.cc | 16 +- .../performance_tests/benchmark_fluid_cpu.cc | 12 +- .../performance_tests/benchmark_fluid_cuda.cc | 12 +- test/cpp/eager/task_tests/CMakeLists.txt | 2 +- test/cpp/eager/task_tests/backward_test.cc | 8 +- .../cross_batch_accumulation_test.cc | 2 +- test/cpp/eager/task_tests/eager_utils_test.cc | 6 +- .../eager/task_tests/forward_autograd_test.cc | 6 +- .../eager/task_tests/fwd_bwd_joint_test.cc | 17 +- test/cpp/eager/task_tests/generated_test.cc | 10 +- test/cpp/eager/task_tests/grad_test.cc | 8 +- test/cpp/eager/task_tests/hook_test.cc | 7 +- .../task_tests/hook_test_intermidiate.cc | 17 +- .../cpp/eager/task_tests/tensor_utils_test.cc | 2 +- test/cpp/eager/test_utils.h | 7 +- test/cpp/fluid/CMakeLists.txt | 26 +- test/cpp/fluid/assign_op_test.cc | 10 +- test/cpp/fluid/benchmark/CMakeLists.txt | 3 +- test/cpp/fluid/benchmark/op_tester.cc | 6 +- test/cpp/fluid/benchmark/op_tester.h | 2 +- test/cpp/fluid/cinn/CMakeLists.txt | 1 + .../fluid/cinn/cinn_launch_context_test.cc | 12 +- test/cpp/fluid/cinn/cinn_launch_op_test.cc | 2 +- test/cpp/fluid/cinn/test_helper.h | 2 +- .../controlflow/conditional_block_op_test.cc | 6 +- test/cpp/fluid/dlnne/dlnne_engine_op_test.cc | 2 +- test/cpp/fluid/elementwise/CMakeLists.txt | 24 +- .../test_elementwise_add_grad_grad.cc | 4 +- .../test_elementwise_add_op_inplace.cc | 2 +- .../test_elementwise_div_grad_grad.cc | 2 +- .../test_elementwise_op_grad_grad.h | 4 +- test/cpp/fluid/feed_forward_test.cu | 8 +- test/cpp/fluid/framework/CMakeLists.txt | 15 +- .../fluid/framework/copy_same_tensor_test.cc | 6 +- .../framework/data_layout_transform_test.cc | 10 +- .../framework/data_type_transform_test.cc | 41 +-- .../framework/data_type_transform_test.cu | 16 +- test/cpp/fluid/framework/eigen_test.cc | 15 +- test/cpp/fluid/framework/operator_test.cc | 2 +- .../paddle2cinn/cinn_cache_key_test.cc | 6 +- .../paddle2cinn/cinn_compiler_test.cc | 4 +- test/cpp/fluid/framework/reader_test.cc | 2 +- .../framework/selected_rows_utils_test.cc | 19 +- test/cpp/fluid/framework/tensor_test.cc | 42 +-- test/cpp/fluid/framework/tensor_util_test.cc | 22 +- test/cpp/fluid/fused/CMakeLists.txt | 5 + .../cpp/fluid/fused/cudnn_bn_add_relu_test.cc | 30 +- test/cpp/fluid/fused/cudnn_norm_conv_test.cc | 20 +- test/cpp/fluid/fused/fusion_group_op_test.cc | 5 +- test/cpp/fluid/gather_test.cc | 6 +- test/cpp/fluid/lite/CMakeLists.txt | 2 +- test/cpp/fluid/math/CMakeLists.txt | 10 +- test/cpp/fluid/math/beam_search_test.cc | 6 +- test/cpp/fluid/math/concat_test.cc | 24 +- .../fluid/math/selected_rows_functor_test.cc | 48 +-- .../math/selected_rows_functor_test.cu.cc | 29 +- test/cpp/fluid/mkldnn/CMakeLists.txt | 6 + test/cpp/fluid/mkldnn/test_mkldnn_caching.cc | 2 +- .../fluid/mkldnn/test_mkldnn_op_inplace.cc | 2 +- test/cpp/fluid/mkldnn/test_mkldnn_op_nhwc.cc | 12 +- test/cpp/fluid/nccl/nccl_op_test.cu.cc | 10 +- test/cpp/fluid/pscore/CMakeLists.txt | 17 +- test/cpp/fluid/scatter_test.cc | 6 +- .../test_common_infer_shape_functions.cc | 2 +- test/cpp/imperative/CMakeLists.txt | 2 +- test/cpp/imperative/heter_ccl_context_test.cc | 4 +- test/cpp/imperative/nccl_context_test.cc | 2 +- .../imperative/test_gradient_accmulator.cc | 10 +- test/cpp/imperative/test_group.cc | 2 +- test/cpp/imperative/test_hooks.cc | 10 +- test/cpp/imperative/test_prepare_op.cc | 4 +- test/cpp/imperative/test_tracer.cc | 28 +- test/cpp/inference/analysis/CMakeLists.txt | 1 + test/cpp/inference/api/CMakeLists.txt | 125 ++++++-- test/cpp/inference/api/api_impl_tester.cc | 6 +- .../inference/api/mkldnn_quantizer_tester.cc | 26 +- test/cpp/inference/api/tester_helper.h | 6 +- test/cpp/inference/test_helper.h | 2 +- test/cpp/jit/CMakeLists.txt | 10 +- test/cpp/jit/layer_test.cc | 2 +- test/cpp/new_executor/CMakeLists.txt | 5 +- .../new_executor/standalone_executor_test.cc | 2 +- test/cpp/phi/api/CMakeLists.txt | 10 +- test/cpp/phi/api/test_phi_exception.cc | 2 +- test/cpp/phi/api/test_phi_tensor.cc | 2 +- test/cpp/phi/api/test_strings_empty_api.cc | 2 +- test/cpp/phi/api/test_to_api.cc | 5 +- test/cpp/phi/common/CMakeLists.txt | 14 +- test/cpp/phi/common/test_backend.cc | 2 +- test/cpp/phi/common/test_data_layout.cc | 4 +- test/cpp/phi/common/test_data_type.cc | 2 +- test/cpp/phi/common/test_scalar.cu | 31 +- test/cpp/phi/core/CMakeLists.txt | 30 +- test/cpp/phi/core/test_custom_kernel.cc | 10 +- test/cpp/phi/core/test_ddim.cc | 44 +-- test/cpp/phi/core/test_dim.cu | 22 +- test/cpp/phi/core/test_meta_fn_utils.cc | 2 +- test/cpp/phi/core/test_selected_rows.cc | 19 +- test/cpp/phi/core/test_sparse_coo_tensor.cc | 15 +- test/cpp/phi/core/test_sparse_csr_tensor.cc | 17 +- test/cpp/phi/core/test_tensor_array.cc | 2 +- test/cpp/phi/core/unroll_array_ops_test.cc | 18 +- test/cpp/phi/kernels/CMakeLists.txt | 34 +-- test/cpp/phi/kernels/sequence_padding_test.cc | 11 +- test/cpp/phi/kernels/sequence_pooling_test.cc | 4 +- test/cpp/phi/kernels/test_auto_tune.cu | 20 +- .../cpp/phi/kernels/test_fused_adam_kernel.cc | 8 +- test/cpp/phi/kernels/test_memcpy_dev_api.cc | 2 +- .../cpp/phi/kernels/test_ternary_broadcast.cu | 24 +- .../kernels/test_transfer_layout_dev_api.cc | 4 +- test/cpp/phi/ops/CMakeLists.txt | 2 +- test/cpp/pir/cinn/CMakeLists.txt | 3 +- test/cpp/pir/cinn/group_op_test.cc | 10 +- test/cpp/pir/core/CMakeLists.txt | 9 +- test/cpp/pir/core/ir_op_test.cc | 2 +- test/cpp/pir/core/ir_program_test.cc | 2 +- test/cpp/pir/core/type_interface_test.cc | 2 +- test/cpp/pir/kernel_dialect/CMakeLists.txt | 8 +- test/cpp/pir/pass/CMakeLists.txt | 1 + .../pattern_rewrite/pattern_rewrite_test.cc | 4 +- test/cpp/pir/shape_dialect/CMakeLists.txt | 20 +- .../pir/shape_dialect/constraint_pass_test.cc | 2 +- test/cpp/pir/tools/test_interface.h | 2 +- test/cpp/pir/tools/test_op.cc | 2 +- test/cpp/pir/tools/test_trait.cc | 2 +- test/cpp/prim/CMakeLists.txt | 6 +- test/cpp/prim/test_eager_prim.cc | 6 +- 1819 files changed, 8289 insertions(+), 8899 deletions(-) mode change 100755 => 100644 paddle/cinn/frontend/interpreter_test.cc mode change 100755 => 100644 paddle/cinn/hlir/framework/memory.h rename paddle/{phi/core => common}/errors.cc (93%) rename paddle/{phi => }/common/layout.h (93%) mode change 100755 => 100644 paddle/fluid/distributed/ps/service/brpc_ps_client.h delete mode 100644 paddle/phi/api/ext/exception.h delete mode 100644 paddle/phi/core/ddim.cc delete mode 100644 paddle/phi/core/ddim.h delete mode 100644 paddle/phi/core/errors.h delete mode 100644 paddle/phi/core/macros.h delete mode 100644 paddle/phi/core/utils/array.h delete mode 100644 paddle/phi/core/utils/dim.h delete mode 100644 paddle/phi/core/utils/unroll_array_ops.h delete mode 100644 paddle/pir/core/enforce.h delete mode 100644 paddle/pir/core/macros.h diff --git a/cmake/generic.cmake b/cmake/generic.cmake index baa0340eeb992a..ab09d597499772 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -622,7 +622,7 @@ function(paddle_test_build TARGET_NAME) if(APPLE) target_link_libraries( ${TARGET_NAME} - "-Wl,-rpath,$ -Wl,-rpath,$ -Wl,-rpath,$" + "-Wl,-rpath,$ -Wl,-rpath,$ -Wl,-rpath,$ -Wl,-rpath,$" ) endif() common_link(${TARGET_NAME}) diff --git a/cmake/inference_lib.cmake b/cmake/inference_lib.cmake index 73668097014eb4..06dc5d6173794a 100755 --- a/cmake/inference_lib.cmake +++ b/cmake/inference_lib.cmake @@ -286,6 +286,10 @@ copy( include_directories(${CMAKE_BINARY_DIR}/../paddle/fluid/framework/io) # copy api headers for phi & custom op +copy( + inference_lib_dist + SRCS ${PADDLE_SOURCE_DIR}/paddle/common/*.h + DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/common/) copy( inference_lib_dist SRCS ${PADDLE_SOURCE_DIR}/paddle/phi/api/ext/*.h @@ -304,8 +308,17 @@ copy( DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/phi/common/) copy( inference_lib_dist - SRCS ${PADDLE_SOURCE_DIR}/paddle/phi/core/macros.h + SRCS ${PADDLE_SOURCE_DIR}/paddle/phi/core/enforce.h DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/phi/core/) +copy( + inference_lib_dist + SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/string/*.h + DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/utils/string/) +copy( + inference_lib_dist + SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/string/tinyformat/tinyformat.h + DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/utils/string/tinyformat/ +) copy( inference_lib_dist SRCS ${PADDLE_SOURCE_DIR}/paddle/phi/core/visit_type.h @@ -320,40 +333,13 @@ copy( DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/phi/) copy( inference_lib_dist - SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/any.h - DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/utils/) -copy( - inference_lib_dist - SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/optional.h - DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/utils/) -copy( - inference_lib_dist - SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/none.h - DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/utils/) -copy( - inference_lib_dist - SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/flat_hash_map.h - DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/utils/) -copy( - inference_lib_dist - SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/flags.h - DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/utils/) -copy( - inference_lib_dist - SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/test_macros.h + SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/*.h DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/utils/) copy( inference_lib_dist SRCS ${PADDLE_SOURCE_DIR}/paddle/extension.h DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/) -if(NOT WITH_GFLAGS) - copy( - inference_lib_dist - SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/flags_native.h - DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/utils/) -endif() - # the include path of phi needs to be changed to adapt to inference api path add_custom_command( TARGET inference_lib_dist diff --git a/paddle/cinn/api/tensor_node.h b/paddle/cinn/api/tensor_node.h index fca0a844108bc8..2a9a836c7d1e93 100644 --- a/paddle/cinn/api/tensor_node.h +++ b/paddle/cinn/api/tensor_node.h @@ -52,9 +52,10 @@ class TensorNode final { class ConsumerOpListView { public: - ConsumerOpListView(const std::set, - common::GraphEdgeCompare>& edges, - const hlir::framework::Graph* graph) + ConsumerOpListView( + const std::set, + cinn::common::GraphEdgeCompare>& edges, + const hlir::framework::Graph* graph) : edges_(edges), graph_(graph) {} ConsumerOpListView(const ConsumerOpListView& other) = delete; @@ -64,8 +65,8 @@ class TensorNode final { class Iterator { public: - Iterator(std::set, - common::GraphEdgeCompare>::const_iterator it, + Iterator(std::set, + cinn::common::GraphEdgeCompare>::const_iterator it, const hlir::framework::Graph* graph) : iter_(it), graph_(graph) {} @@ -89,8 +90,8 @@ class TensorNode final { OpNode operator*() const; private: - std::set, - common::GraphEdgeCompare>::const_iterator iter_; + std::set, + cinn::common::GraphEdgeCompare>::const_iterator iter_; const hlir::framework::Graph* graph_; }; @@ -101,7 +102,8 @@ class TensorNode final { Iterator end() const { return Iterator(this->edges_.end(), graph_); } private: - const std::set, common::GraphEdgeCompare>& edges_; + const std::set, + cinn::common::GraphEdgeCompare>& edges_; const hlir::framework::Graph* graph_; }; diff --git a/paddle/cinn/ast_gen_ius/ast_gen.cc b/paddle/cinn/ast_gen_ius/ast_gen.cc index c8be20ae3afa61..bef88ade1a7fb1 100644 --- a/paddle/cinn/ast_gen_ius/ast_gen.cc +++ b/paddle/cinn/ast_gen_ius/ast_gen.cc @@ -90,7 +90,7 @@ ir::Expr AstGen::Build(const ir::Tensor& tensor, TensorGroup* tensor_group) { std::vector iter_values; // reduce body and reduce init schedule block should have different objects // for same axis so we re-create objects - std::vector axis_vars = common::GenDefaultAxis(axis_len); + std::vector axis_vars = cinn::common::GenDefaultAxis(axis_len); for (int i = 0; i < shape.size(); ++i) { block_vars.push_back(Var(Expr(0), shape[i], @@ -118,7 +118,7 @@ ir::Expr AstGen::Build(const ir::Tensor& tensor, TensorGroup* tensor_group) { std::vector reduce_iter_values; // reduce body and reduce init schedule block should have different objects // for same axis so we re-create objects - std::vector reduce_axis_vars = common::GenDefaultAxis(axis_len); + std::vector reduce_axis_vars = cinn::common::GenDefaultAxis(axis_len); for (int i = 0; i < shape.size(); ++i) { reduce_block_vars.push_back(Var(Expr(0), shape[i], @@ -182,7 +182,7 @@ ir::Expr AstGen::Build(const ir::Tensor& tensor, TensorGroup* tensor_group) { // create schedule block itervars, i0,i1... std::vector block_vars; std::vector iter_values; - std::vector axis_vars = common::GenDefaultAxis(axis_len); + std::vector axis_vars = cinn::common::GenDefaultAxis(axis_len); for (int i = 0; i < shape.size(); ++i) { block_vars.push_back(Var( Expr(0), shape[i], cinn::UniqName("i" + std::to_string(i)), false)); diff --git a/paddle/cinn/auto_schedule/analysis/analyze_ir.cc b/paddle/cinn/auto_schedule/analysis/analyze_ir.cc index fbfdc7af72e9a6..6f00ee34813d15 100644 --- a/paddle/cinn/auto_schedule/analysis/analyze_ir.cc +++ b/paddle/cinn/auto_schedule/analysis/analyze_ir.cc @@ -144,7 +144,7 @@ bool NeedsMultiLevelTiling(const ir::ScheduleBlockRealize& sche_block_realize) { return total_unused_iter_vars >= 1; } -ir::LoweredFunc UpdateFuncWithNewBody(const common::Target& target, +ir::LoweredFunc UpdateFuncWithNewBody(const cinn::common::Target& target, const ir::LoweredFunc& old_func, ir::Expr& body) { // NOLINT ir::ModuleExpr mod_expr(std::vector({body})); @@ -179,7 +179,7 @@ ir::LoweredFunc UpdateFuncWithNewBody(const common::Target& target, ir::LoweredFunc new_func = ir::_LoweredFunc_::Make( old_func->name, old_func->args, updated_body, new_temp_bufs); #ifdef CINN_WITH_CUDA - if (target == common::DefaultNVGPUTarget()) { + if (target == cinn::common::DefaultNVGPUTarget()) { new_func->PrepareCudaAxisInfoFromBody(); } #endif diff --git a/paddle/cinn/auto_schedule/analysis/analyze_ir.h b/paddle/cinn/auto_schedule/analysis/analyze_ir.h index 81d00dcb22ec3a..2afe33ea2706fe 100644 --- a/paddle/cinn/auto_schedule/analysis/analyze_ir.h +++ b/paddle/cinn/auto_schedule/analysis/analyze_ir.h @@ -44,7 +44,7 @@ bool NeedsMultiLevelTiling(const ir::ScheduleBlockRealize& sche_block_realize); /** * Update a LoweredFunc by regenerating related fields with a new function body */ -ir::LoweredFunc UpdateFuncWithNewBody(const common::Target& target, +ir::LoweredFunc UpdateFuncWithNewBody(const cinn::common::Target& target, const ir::LoweredFunc& old_func, ir::Expr& body); // NOLINT diff --git a/paddle/cinn/auto_schedule/analysis/analyze_ir_test.cc b/paddle/cinn/auto_schedule/analysis/analyze_ir_test.cc index f7fffa0e0ff4b2..970a41173087a5 100644 --- a/paddle/cinn/auto_schedule/analysis/analyze_ir_test.cc +++ b/paddle/cinn/auto_schedule/analysis/analyze_ir_test.cc @@ -38,9 +38,9 @@ namespace auto_schedule { TEST(AnalyzeIr, AnalyzeScheduleBlockReadWriteBuffer_SimpleAssign) { Context::Global().ResetNameId(); #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif ir::Expr M(32); @@ -102,9 +102,9 @@ TEST(AnalyzeIr, AnalyzeScheduleBlockReadWriteBuffer_SimpleAssign) { TEST(AnalyzeIr, AnalyzeScheduleBlockReadWriteBuffer_AddDiffShape) { Context::Global().ResetNameId(); #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif ir::Expr M(32); @@ -158,9 +158,9 @@ TEST(AnalyzeIr, AnalyzeScheduleBlockReadWriteBuffer_AddDiffShape) { TEST(AnalyzeIr, ContainsNodeType) { Context::Global().ResetNameId(); #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif ir::Expr M(32); diff --git a/paddle/cinn/auto_schedule/auto_tuner.cc b/paddle/cinn/auto_schedule/auto_tuner.cc index d8280af500089e..d45dcc743e525c 100644 --- a/paddle/cinn/auto_schedule/auto_tuner.cc +++ b/paddle/cinn/auto_schedule/auto_tuner.cc @@ -38,7 +38,7 @@ namespace cinn { namespace auto_schedule { -AutoTuner::AutoTuner(const common::Target& target, +AutoTuner::AutoTuner(const cinn::common::Target& target, hlir::framework::Graph* graph) : target_(target), graph_(graph) {} @@ -58,7 +58,7 @@ void AutoTuner::Initialize(const Config& config, tasks_ = task_creator.CreateTuneTaskOpLevel(graph_); const auto& dtype_dict = - graph_->GetAttrs>( + graph_->GetAttrs>( "inferdtype"); const auto& shape_dict = graph_->GetAttrs< absl::flat_hash_map>("infershape"); diff --git a/paddle/cinn/auto_schedule/auto_tuner.h b/paddle/cinn/auto_schedule/auto_tuner.h index 9875e5dfcdd000..e4c416c9009478 100644 --- a/paddle/cinn/auto_schedule/auto_tuner.h +++ b/paddle/cinn/auto_schedule/auto_tuner.h @@ -46,7 +46,7 @@ class AutoTuner { DatabaseConfig database_config; }; - AutoTuner(const common::Target& target, hlir::framework::Graph* graph); + AutoTuner(const cinn::common::Target& target, hlir::framework::Graph* graph); // Initialize tuner with specific config and auxiliary objects. void Initialize(const Config& config, @@ -56,7 +56,7 @@ class AutoTuner { TuningResult Tune(const TuningOptions& options); private: - const common::Target& target_; + const cinn::common::Target& target_; hlir::framework::Graph* graph_; std::unique_ptr> op_lowerer_; diff --git a/paddle/cinn/auto_schedule/auto_tuner_test.cc b/paddle/cinn/auto_schedule/auto_tuner_test.cc index 36fd51016c989e..6ddaa2b2d7669d 100644 --- a/paddle/cinn/auto_schedule/auto_tuner_test.cc +++ b/paddle/cinn/auto_schedule/auto_tuner_test.cc @@ -48,9 +48,9 @@ using ::cinn::hlir::framework::Scope; class TestAutoTuner : public ::testing::Test { public: #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif std::shared_ptr graph; diff --git a/paddle/cinn/auto_schedule/cost_model/expr_cost_model.cc b/paddle/cinn/auto_schedule/cost_model/expr_cost_model.cc index fcaf57d54c4cef..a9074c76fa8cf9 100644 --- a/paddle/cinn/auto_schedule/cost_model/expr_cost_model.cc +++ b/paddle/cinn/auto_schedule/cost_model/expr_cost_model.cc @@ -29,7 +29,7 @@ namespace cinn { namespace auto_schedule { float ExprCostModel::Predict(const ir::ModuleExpr& sample, - const common::Target& target) const { + const cinn::common::Target& target) const { if (trained_times_.load() == 0) { return SearchState::NOT_INIT_COST; } @@ -42,7 +42,7 @@ float ExprCostModel::Predict(const ir::ModuleExpr& sample, void ExprCostModel::Train(const std::vector& samples, const std::vector& labels, - const common::Target& target) { + const cinn::common::Target& target) { trained_times_.store(1); size_t total_size = samples.size(); CHECK_EQ(total_size, labels.size()) @@ -60,7 +60,7 @@ void ExprCostModel::Train(const std::vector& samples, void ExprCostModel::Update(const std::vector& samples, const std::vector& labels, - const common::Target& target) { + const cinn::common::Target& target) { ++trained_times_; size_t total_size = samples.size(); CHECK_EQ(total_size, labels.size()) diff --git a/paddle/cinn/auto_schedule/cost_model/expr_cost_model.h b/paddle/cinn/auto_schedule/cost_model/expr_cost_model.h index 02e0b4a52c831f..4dc34045709374 100644 --- a/paddle/cinn/auto_schedule/cost_model/expr_cost_model.h +++ b/paddle/cinn/auto_schedule/cost_model/expr_cost_model.h @@ -30,13 +30,13 @@ namespace auto_schedule { class ExprCostModel : public XgbCostModel { public: virtual float Predict(const ir::ModuleExpr& sample, - const common::Target& target) const; + const cinn::common::Target& target) const; void Train(const std::vector& samples, const std::vector& labels, - const common::Target& target); + const cinn::common::Target& target); void Update(const std::vector& samples, const std::vector& labels, - const common::Target& target); + const cinn::common::Target& target); private: std::atomic trained_times_{0}; diff --git a/paddle/cinn/auto_schedule/cost_model/feature.cc b/paddle/cinn/auto_schedule/cost_model/feature.cc index f993ee256616a6..3a403b21d081f7 100644 --- a/paddle/cinn/auto_schedule/cost_model/feature.cc +++ b/paddle/cinn/auto_schedule/cost_model/feature.cc @@ -37,12 +37,12 @@ namespace cinn { namespace auto_schedule { Feature::Feature() - : target_(common::UnkTarget()), + : target_(cinn::common::UnkTarget()), stack_encoded_feature_(1), // initialize a LoopBlockFeature as root block current_loop_block_index_(0), parent_indices_(1, -1) {} -Feature::Feature(const common::Target& target) +Feature::Feature(const cinn::common::Target& target) : target_(target), stack_encoded_feature_(1), // initialize a LoopBlockFeature as root block current_loop_block_index_(0), @@ -52,7 +52,7 @@ std::vector Feature::ToFixedSizeVector() { std::vector ret(LoopBlockFeature::kTotalSize + 1, 0); // LoopBlockFeature::kTotalSize plus 1 for target - if (target_ == common::DefaultNVGPUTarget()) { + if (target_ == cinn::common::DefaultNVGPUTarget()) { ret[0] = 1; } // else 0 for other cases diff --git a/paddle/cinn/auto_schedule/cost_model/feature.h b/paddle/cinn/auto_schedule/cost_model/feature.h index cfd100598cdd17..2f98b12c269a6b 100644 --- a/paddle/cinn/auto_schedule/cost_model/feature.h +++ b/paddle/cinn/auto_schedule/cost_model/feature.h @@ -134,7 +134,7 @@ class Feature { public: Feature(); - explicit Feature(const common::Target& target); + explicit Feature(const cinn::common::Target& target); // Convert the various-length loop block features to fixed-size vector std::vector ToFixedSizeVector(); @@ -182,7 +182,7 @@ class Feature { int current_loop_block_index_; std::vector parent_indices_; - common::Target target_; + cinn::common::Target target_; }; } // namespace auto_schedule diff --git a/paddle/cinn/auto_schedule/cost_model/feature_extractor.cc b/paddle/cinn/auto_schedule/cost_model/feature_extractor.cc index a8255c1875c746..3189e2e1c2b4eb 100644 --- a/paddle/cinn/auto_schedule/cost_model/feature_extractor.cc +++ b/paddle/cinn/auto_schedule/cost_model/feature_extractor.cc @@ -50,7 +50,7 @@ void FeatureExtractor::Visit(const Expr *x) { } Feature FeatureExtractor::Extract(const ir::ModuleExpr &mod_expr, - const common::Target &target) { + const cinn::common::Target &target) { feature_ = Feature(target); for (const ir::Expr &e : mod_expr.GetExprs()) { Visit(&e); @@ -91,8 +91,9 @@ NotVisitExprFields(_Tensor_) #define VisitForDtypePattern(NodeType, member) \ void FeatureExtractor::Visit(const NodeType *x) { \ - if (x->type() == common::F32() || x->type() == common::F16() || \ - x->type() == common::F64()) { \ + if (x->type() == cinn::common::F32() || \ + x->type() == cinn::common::F16() || \ + x->type() == cinn::common::F64()) { \ feature_.CurrentLoopBlock().float_##member += x->type().lanes(); \ } else { \ feature_.CurrentLoopBlock().int_##member += x->type().lanes(); \ @@ -125,8 +126,9 @@ VisitForDtypePattern(Let, other_call); #define VisitForMultiOperandsDtypePattern(NodeType, member) \ void FeatureExtractor::Visit(const NodeType *x) { \ - if (x->type() == common::F32() || x->type() == common::F16() || \ - x->type() == common::F64()) { \ + if (x->type() == cinn::common::F32() || \ + x->type() == cinn::common::F16() || \ + x->type() == cinn::common::F64()) { \ feature_.CurrentLoopBlock().float_##member += \ (x->operands().size() - 1); \ } else { \ @@ -231,8 +233,8 @@ void FeatureExtractor::Visit(const PolyFor *x) { /* Visit for Reduce and Broadcast */ void FeatureExtractor::Visit(const Reduce *x) { - if (x->type() == common::F32() || x->type() == common::F16() || - x->type() == common::F64()) { + if (x->type() == cinn::common::F32() || x->type() == cinn::common::F16() || + x->type() == cinn::common::F64()) { switch (x->reduce_type) { case Reduce::ReduceType::kSum: feature_.CurrentLoopBlock().float_reduce_sum_or_sub += diff --git a/paddle/cinn/auto_schedule/cost_model/feature_extractor.h b/paddle/cinn/auto_schedule/cost_model/feature_extractor.h index 690d669da720b9..61b2a6083b7ba4 100644 --- a/paddle/cinn/auto_schedule/cost_model/feature_extractor.h +++ b/paddle/cinn/auto_schedule/cost_model/feature_extractor.h @@ -40,7 +40,8 @@ namespace auto_schedule { class FeatureExtractor : public ir::IRVisitorRequireReImpl { public: FeatureExtractor(); - Feature Extract(const ir::ModuleExpr& mod_expr, const common::Target& target); + Feature Extract(const ir::ModuleExpr& mod_expr, + const cinn::common::Target& target); void Visit(const Expr* x) override; diff --git a/paddle/cinn/auto_schedule/cost_model/feature_extractor_test.cc b/paddle/cinn/auto_schedule/cost_model/feature_extractor_test.cc index 10726f450a0de3..22fa1a7f259bad 100644 --- a/paddle/cinn/auto_schedule/cost_model/feature_extractor_test.cc +++ b/paddle/cinn/auto_schedule/cost_model/feature_extractor_test.cc @@ -38,9 +38,9 @@ namespace auto_schedule { TEST(FeatureExtractor, SimpleAssign) { Context::Global().ResetNameId(); #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif ir::Expr M(32); ir::Expr N(32); @@ -93,9 +93,9 @@ TEST(FeatureExtractor, SimpleAssign) { TEST(FeatureExtractor, MatrixMultiply) { Context::Global().ResetNameId(); #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif ir::Expr M(2); diff --git a/paddle/cinn/auto_schedule/cost_model/xgb_cost_model.cc b/paddle/cinn/auto_schedule/cost_model/xgb_cost_model.cc index 8697aaa42ee1c0..8cac30ee841391 100644 --- a/paddle/cinn/auto_schedule/cost_model/xgb_cost_model.cc +++ b/paddle/cinn/auto_schedule/cost_model/xgb_cost_model.cc @@ -96,7 +96,7 @@ void AddDistPkgToPythonSysPath() { } XgbCostModel::XgbCostModel() { - common::PythonInterpreterGuard::Guard(); + cinn::common::PythonInterpreterGuard::Guard(); int previous = xgb_cost_model_count_.fetch_add(1); if (previous == 0) { AddDistPkgToPythonSysPath(); diff --git a/paddle/cinn/auto_schedule/database/jsonfile_database_test.cc b/paddle/cinn/auto_schedule/database/jsonfile_database_test.cc index 5db6f8999b18a5..0a4a98f977dc11 100644 --- a/paddle/cinn/auto_schedule/database/jsonfile_database_test.cc +++ b/paddle/cinn/auto_schedule/database/jsonfile_database_test.cc @@ -92,7 +92,7 @@ class TestJSONFileDatabase : public ::testing::Test { std::string record_file_path; JSONFileDatabase test_db; std::vector lowered_funcs; - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); }; TEST_F(TestJSONFileDatabase, Serialize) { diff --git a/paddle/cinn/auto_schedule/measure/measurer_test.cc b/paddle/cinn/auto_schedule/measure/measurer_test.cc index 89a2feece5aeaf..26600567c5abbf 100644 --- a/paddle/cinn/auto_schedule/measure/measurer_test.cc +++ b/paddle/cinn/auto_schedule/measure/measurer_test.cc @@ -57,9 +57,9 @@ class TestMeasurer : public ::testing::Test { void SetUp() override { #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif std::unordered_set fetch_ids; auto program = CreateAddReluProgram(); @@ -70,7 +70,7 @@ class TestMeasurer : public ::testing::Test { TaskCreator task_creator; tasks = task_creator.CreateTuneTaskOpLevel(graph.get()); const auto& dtype_dict = - graph->GetAttrs>( + graph->GetAttrs>( "inferdtype"); const auto& shape_dict = graph->GetAttrs< absl::flat_hash_map>( diff --git a/paddle/cinn/auto_schedule/measure/simple_runner.cc b/paddle/cinn/auto_schedule/measure/simple_runner.cc index 1871cfc82ae92f..92dcc00693b5b9 100644 --- a/paddle/cinn/auto_schedule/measure/simple_runner.cc +++ b/paddle/cinn/auto_schedule/measure/simple_runner.cc @@ -45,31 +45,31 @@ static const std::unordered_map> }; // Generate random value and populate them to the output address of memory -static void PopulateRandomValue(const common::Type& type, +static void PopulateRandomValue(const cinn::common::Type& type, const int numel, void* raw_ptr) { std::random_device seed; std::default_random_engine engine(seed()); - if (type == common::Bool()) { + if (type == cinn::common::Bool()) { auto* fmt_ptr = reinterpret_cast(raw_ptr); std::bernoulli_distribution dist(0.5); std::generate_n( fmt_ptr, numel, [&engine, &dist]() { return dist(engine); }); - } else if (type == common::I32()) { + } else if (type == cinn::common::I32()) { auto* fmt_ptr = reinterpret_cast(raw_ptr); std::uniform_int_distribution dist(std::numeric_limits::min(), std::numeric_limits::max()); std::generate_n( fmt_ptr, numel, [&engine, &dist]() { return dist(engine); }); - } else if (type == common::I64()) { + } else if (type == cinn::common::I64()) { auto* fmt_ptr = reinterpret_cast(raw_ptr); std::uniform_int_distribution dist( std::numeric_limits::min(), std::numeric_limits::max()); std::generate_n( fmt_ptr, numel, [&engine, &dist]() { return dist(engine); }); - } else if (type == common::F32()) { + } else if (type == cinn::common::F32()) { auto* fmt_ptr = reinterpret_cast(raw_ptr); std::uniform_real_distribution dist( std::numeric_limits::min(), std::numeric_limits::max()); @@ -90,12 +90,12 @@ static void PopulateRandomValue(const common::Type& type, // Initialize a tensor with 0 if init_with_zero == true, otherwise initialize // the tensor with random value. static void InitTensorData(Tensor tensor, - const common::Target& target, + const cinn::common::Target& target, bool init_with_zero) { int mem_size = tensor->shape().numel() * tensor->type().bytes(); auto* tensor_data = tensor->mutable_data(target, tensor->type()); #ifdef CINN_WITH_CUDA - if (target == common::DefaultNVGPUTarget()) { + if (target == cinn::common::DefaultNVGPUTarget()) { if (init_with_zero) { cudaMemset(tensor_data, 0, mem_size); } else { @@ -106,7 +106,7 @@ static void InitTensorData(Tensor tensor, } } #endif - if (target == common::DefaultHostTarget()) { + if (target == cinn::common::DefaultHostTarget()) { if (init_with_zero) { memset(tensor_data, 0, mem_size); } else { @@ -228,7 +228,7 @@ MeasureResult SimpleRunner::Run(const MeasureInput& input, instr->Run(&execution_args); } #ifdef CINN_WITH_CUDA - if (instr->target_ == common::DefaultNVGPUTarget()) { + if (instr->target_ == cinn::common::DefaultNVGPUTarget()) { CUDA_CALL(cudaDeviceSynchronize()); } #endif diff --git a/paddle/cinn/auto_schedule/measure/simple_runner_test.cc b/paddle/cinn/auto_schedule/measure/simple_runner_test.cc index a0427edd56ced4..fc231b00e8e9dc 100644 --- a/paddle/cinn/auto_schedule/measure/simple_runner_test.cc +++ b/paddle/cinn/auto_schedule/measure/simple_runner_test.cc @@ -40,9 +40,9 @@ using ::cinn::hlir::framework::Scope; class TestSimpleRunner : public ::testing::Test { public: #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif std::shared_ptr graph; std::shared_ptr compiled_scope; @@ -69,9 +69,9 @@ class TestSimpleRunner : public ::testing::Test { task = std::make_unique(); #ifdef CINN_WITH_CUDA - task->target = common::DefaultNVGPUTarget(); + task->target = cinn::common::DefaultNVGPUTarget(); #else - task->target = common::DefaultHostTarget(); + task->target = cinn::common::DefaultHostTarget(); #endif task->subgraph = graph->fusion_groups.front(); input.task = task.get(); @@ -118,7 +118,7 @@ TEST_F(TestSimpleRunner, TimeMeasured) { BuildResult build_result; build_result.compiled_scope = nullptr; std::vector> instructions; - instructions.emplace_back(new Instruction(common::DefaultHostTarget(), + instructions.emplace_back(new Instruction(cinn::common::DefaultHostTarget(), nullptr, {}, {"empty_placeholder"}, diff --git a/paddle/cinn/auto_schedule/post_schedule_rule/cooperative_process_test.cc b/paddle/cinn/auto_schedule/post_schedule_rule/cooperative_process_test.cc index 0507c78ff2e1cc..ad7e77e2d157ca 100644 --- a/paddle/cinn/auto_schedule/post_schedule_rule/cooperative_process_test.cc +++ b/paddle/cinn/auto_schedule/post_schedule_rule/cooperative_process_test.cc @@ -43,7 +43,7 @@ TEST_F(TestCooperativeProcess, Matmul) { int num_threads_x = 2; int steps_k = 8; - Initialize(common::DefaultNVGPUTarget()); + Initialize(cinn::common::DefaultNVGPUTarget()); frontend::Program matmul_op = tests::OpBuilder("matmul").Build({{"X", X_shape}, {"Y", Y_shape}}); ir::IRSchedule ir_schedule = MakeIRSchedule(matmul_op, fixed_rand_seed); diff --git a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_bind.h b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_bind.h index c4baf8e7797e38..b5981e5aec9a95 100644 --- a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_bind.h +++ b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_bind.h @@ -27,7 +27,7 @@ namespace auto_schedule { // Auto bind GPU index(BlockIdx, ThreadIdx) to the loops around the block class AutoBind : public AutoGenRule { public: - explicit AutoBind(const common::Target& target) : AutoGenRule(target) {} + explicit AutoBind(const cinn::common::Target& target) : AutoGenRule(target) {} ~AutoBind() = default; RuleApplyType Init(ir::IRSchedule* init_schedule) override; diff --git a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_bind_test.cc b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_bind_test.cc index 35dc5374b96647..72e11879733343 100644 --- a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_bind_test.cc +++ b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_bind_test.cc @@ -38,7 +38,7 @@ class TestAutoBind : public TestAutoGenRuleBase { void TestApplyOnElementWiseAdd(const std::vector& shape, const std::string& block_name) { - Initialize(common::DefaultNVGPUTarget()); + Initialize(cinn::common::DefaultNVGPUTarget()); auto test_program = tests::OpBuilder("elementwise_add").Build({{"X", shape}, {"Y", shape}}); // construct input parameter @@ -107,7 +107,7 @@ class TestAutoBind : public TestAutoGenRuleBase { }; TEST_F(TestAutoBind, AnalyseApplyType) { - Initialize(common::DefaultNVGPUTarget()); + Initialize(cinn::common::DefaultNVGPUTarget()); ir::IRSchedule ir_schedule = MakeIRSchedule( tests::OpBuilder("matmul").Build({{"X", {32, 64}}, {"Y", {64, 32}}})); SearchState state(ir_schedule, 0, {}); diff --git a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_gen_rule.cc b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_gen_rule.cc index bb215358d0b641..e52d91c1252241 100644 --- a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_gen_rule.cc +++ b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_gen_rule.cc @@ -24,7 +24,8 @@ namespace cinn { namespace auto_schedule { -AutoGenRule::AutoGenRule(const common::Target& target) : target_(&target) {} +AutoGenRule::AutoGenRule(const cinn::common::Target& target) + : target_(&target) {} int AutoGenRule::NumberApplicable() const { CHECK_GE(num_applicable_, 0) diff --git a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_gen_rule.h b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_gen_rule.h index e3008b857c53a9..dee0b72f19f4ff 100644 --- a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_gen_rule.h +++ b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_gen_rule.h @@ -45,7 +45,7 @@ enum class RuleApplyType : int { */ class AutoGenRule { public: - explicit AutoGenRule(const common::Target& target); + explicit AutoGenRule(const cinn::common::Target& target); ~AutoGenRule() = default; // Initialize the AutoGenRule, it must be called before further actions. @@ -83,7 +83,7 @@ class AutoGenRule { // number of ScheduleBlock that can apply this auto gen rule int num_applicable_ = -1; // Target, not owned. - const common::Target* target_; + const cinn::common::Target* target_; // IRSchedule, not owned; ir::IRSchedule* ir_schedule_; }; diff --git a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_inline.cc b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_inline.cc index 57e13c00a1c76b..4ba7092cee323c 100644 --- a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_inline.cc +++ b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_inline.cc @@ -36,7 +36,7 @@ namespace cinn { namespace auto_schedule { AutoInline::AutoInline( - const common::Target& target, + const cinn::common::Target& target, const std::unordered_set& no_inline_output_names) : AutoGenRule(target), no_inline_output_names_(no_inline_output_names) {} diff --git a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_inline.h b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_inline.h index 9a0fc3e823361f..66a5818c7c4438 100644 --- a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_inline.h +++ b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_inline.h @@ -41,7 +41,7 @@ enum class AutoInlineType : int { class AutoInline : public AutoGenRule { public: - AutoInline(const common::Target& target, + AutoInline(const cinn::common::Target& target, const std::unordered_set& no_inline_output_names); ~AutoInline() = default; diff --git a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_inline_test.cc b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_inline_test.cc index e69d3069f19390..83310de86f8baf 100644 --- a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_inline_test.cc +++ b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_inline_test.cc @@ -50,7 +50,7 @@ using ::cinn::hlir::framework::OpLowerer; TEST(AutoInline, SingleLoopInline) { srand(0); Context::Global().ResetNameId(); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); Expr M(32); @@ -140,7 +140,7 @@ TEST(AutoInline, SingleLoopInline) { TEST(AutoInline, AddReluInline) { srand(0); Context::Global().ResetNameId(); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); frontend::NetBuilder builder("test"); @@ -155,7 +155,7 @@ TEST(AutoInline, AddReluInline) { hlir::framework::ApplyPass(graph.get(), "OpFusionPass"); const auto& dtype_dict = - graph->GetAttrs>( + graph->GetAttrs>( "inferdtype"); const auto& shape_dict = graph->GetAttrs< absl::flat_hash_map>("infershape"); @@ -268,7 +268,7 @@ class TestAutoInline : public TestAutoGenRuleBase {}; * Add(Multiply(Add(Relu()))) */ TEST_F(TestAutoInline, SingleChain) { - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); Initialize(target); std::vector input_names = { "bias", "conv_output", "bn_scale", "bn_offset"}; @@ -343,7 +343,7 @@ TEST_F(TestAutoInline, SingleChain) { * z = Multiply(Exp()) */ TEST_F(TestAutoInline, InlineToMultiConsumers) { - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); Initialize(target); std::vector input_names = {"x"}; std::vector output_names = {"var_2", "var_1", "var_0"}; @@ -404,7 +404,7 @@ TEST_F(TestAutoInline, InlineToMultiConsumers) { * z1 = Subtract(Gather(), Add(Gather())) */ TEST_F(TestAutoInline, OnlySpatialOp) { - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); Initialize(target); std::vector input_names = {"x", "y"}; std::vector output_names = {"var_6", @@ -472,7 +472,7 @@ TEST_F(TestAutoInline, OnlySpatialOp) { * y = Add(fill_constant()) */ TEST_F(TestAutoInline, NoReadBufferOp) { - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); Initialize(target); std::vector input_names = {"x"}; std::vector output_names = {"var_0", "fill_constant"}; diff --git a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_unroll.h b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_unroll.h index f11ff7227a70ec..d5521e15c53482 100644 --- a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_unroll.h +++ b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_unroll.h @@ -31,7 +31,8 @@ namespace auto_schedule { // based on actual situation. class AutoUnroll : public AutoGenRule { public: - explicit AutoUnroll(const common::Target& target) : AutoGenRule(target) {} + explicit AutoUnroll(const cinn::common::Target& target) + : AutoGenRule(target) {} ~AutoUnroll() = default; RuleApplyType Init(ir::IRSchedule* init_schedule) override; diff --git a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_unroll_test.cc b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_unroll_test.cc index e4b0597cfeed75..0118846ab7a2e8 100644 --- a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_unroll_test.cc +++ b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_unroll_test.cc @@ -35,9 +35,9 @@ TEST(AutoUnroll, Init) { {M, N}, [&](Var i, Var j) { return A(i, j) * B(i, j); }, "C"); #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif ast_gen_ius::TensorGroup tensor_group({C}); auto funcs = @@ -65,9 +65,9 @@ TEST(AutoUnroll, UnrollableApply) { "C"); #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif auto stages = CreateStages({C}); auto funcs = cinn::lang::LowerVec( diff --git a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/mix_rules_test.cc b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/mix_rules_test.cc index d0e2dde7bdad67..caa130fb6bdbff 100644 --- a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/mix_rules_test.cc +++ b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/mix_rules_test.cc @@ -36,7 +36,7 @@ class TestMixRules : public TestAutoGenRuleBase { TEST_F(TestMixRules, 2DMatmulOnMultiTilingRelated) { frontend::Program matmul_op = tests::OpBuilder("matmul").Build({{"X", {32, 32}}, {"Y", {32, 32}}}); - Initialize(common::DefaultNVGPUTarget()); + Initialize(cinn::common::DefaultNVGPUTarget()); ir::IRSchedule ir_schedule = MakeIRSchedule(matmul_op); std::vector func_bodys = ir_schedule.GetModule().GetExprs(); ASSERT_EQ(func_bodys.size(), 1UL); diff --git a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/multi_level_tiling.cc b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/multi_level_tiling.cc index 9cc02eefac7e5f..8b99fd6e61e221 100644 --- a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/multi_level_tiling.cc +++ b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/multi_level_tiling.cc @@ -38,7 +38,7 @@ namespace cinn { namespace auto_schedule { -MultiLevelTiling::MultiLevelTiling(const common::Target& target, +MultiLevelTiling::MultiLevelTiling(const cinn::common::Target& target, const Config& config) : AutoGenRule(target), config_(config) { for (int i = 0; i < config_.tile_struct.size(); ++i) { @@ -434,9 +434,9 @@ void MultiLevelTiling::ApplyCacheWrite(ir::IRSchedule* ir_schedule, } } -const std::unordered_map +const std::unordered_map MultiLevelTiling::kConfigs{ - {common::Target::Arch::NVGPU, + {cinn::common::Target::Arch::NVGPU, MultiLevelTiling::Config{ /*bind_axis*/ std::vector{"blockIdx.x", "threadIdx.x"}, @@ -446,7 +446,7 @@ const std::unordered_map /*write_cache_memory_type*/ std::string("local"), /*write_cache_levels*/ std::vector{3}, }}, - {common::Target::Arch::X86, + {cinn::common::Target::Arch::X86, MultiLevelTiling::Config{ /*bind_axis*/ std::vector{}, /*tile_struct*/ std::string("SSRSRS"), diff --git a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/multi_level_tiling.h b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/multi_level_tiling.h index 3bcf22a812ae0d..617cc24998bbb5 100644 --- a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/multi_level_tiling.h +++ b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/multi_level_tiling.h @@ -53,9 +53,9 @@ class MultiLevelTiling : public AutoGenRule { std::vector write_cache_levels; }; - static const std::unordered_map kConfigs; + static const std::unordered_map kConfigs; - MultiLevelTiling(const common::Target& target, const Config& config); + MultiLevelTiling(const cinn::common::Target& target, const Config& config); ~MultiLevelTiling() = default; // initialize the AutoGenRule, it must be called before further actions. diff --git a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/multi_level_tiling_test.cc b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/multi_level_tiling_test.cc index 62f1bb74f4ac0e..bf7d8fb0b7d560 100644 --- a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/multi_level_tiling_test.cc +++ b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/multi_level_tiling_test.cc @@ -44,9 +44,9 @@ TEST(MultiLevelTile, SampleSplitTwo) { srand(0); Context::Global().ResetNameId(); #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif MultiLevelTiling multi_level_tiling( @@ -66,9 +66,9 @@ TEST(MultiLevelTile, SampleTileSplit) { srand(0); Context::Global().ResetNameId(); #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif MultiLevelTiling multi_level_tiling( @@ -93,9 +93,9 @@ TEST(MultiLevelTile, SimpleLoops) { srand(0); Context::Global().ResetNameId(); #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif Expr M(32); @@ -148,9 +148,9 @@ TEST(MulitLevelTile, MatrixMultiply) { srand(0); Context::Global().ResetNameId(); #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif Expr M(32); @@ -214,7 +214,7 @@ TEST_F(TestMultiLevelTiling, Matmul) { std::vector Y_shape = {32, 32}; std::vector out_shape = {32, 32}; - Initialize(common::DefaultNVGPUTarget()); + Initialize(cinn::common::DefaultNVGPUTarget()); frontend::Program matmul_op = tests::OpBuilder("matmul").Build({{"X", X_shape}, {"Y", Y_shape}}); ir::IRSchedule ir_schedule = MakeIRSchedule(matmul_op, fixed_rand_seed); @@ -365,7 +365,7 @@ TEST_F(TestMultiLevelTiling, ReduceSum) { std::vector out_shape = {1, 16, 1}; std::vector reduce_dim = {2}; - Initialize(common::DefaultNVGPUTarget()); + Initialize(cinn::common::DefaultNVGPUTarget()); frontend::Program reduce_sum_op = tests::OpBuilder("reduce_sum") .Build({{"X", X_shape}}, {{"dim", reduce_dim}, {"keep_dim", false}}); @@ -408,7 +408,7 @@ TEST_F(TestMultiLevelTiling, Pool2d) { {"adaptive", adaptive}, {"padding_algorithm", padding_algorithm}}); - Initialize(common::DefaultNVGPUTarget()); + Initialize(cinn::common::DefaultNVGPUTarget()); ir::IRSchedule ir_schedule = MakeIRSchedule(pool2d_program, fixed_rand_seed); SearchState state(ir_schedule); VLOG(6) << "Original state:\n" << state->DebugString(); diff --git a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/reduction_factoring.cc b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/reduction_factoring.cc index c8b8fdeb0f554d..85bc207c84fc7d 100644 --- a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/reduction_factoring.cc +++ b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/reduction_factoring.cc @@ -189,7 +189,7 @@ void ReductionFactoring::Apply(const std::string& block_name, ir_schedule->GetBlock(block_name + "_rf__reduce_init"); ir_schedule->SimpleComputeAt(rf_init_block, rb_loops.back()); - if (*target_ == common::DefaultNVGPUTarget()) { + if (*target_ == cinn::common::DefaultNVGPUTarget()) { rb_loops = ir_schedule->GetLoops(block_name); rf_block = ir_schedule->GetBlock(block_name + "_rf"); ir_schedule->Bind(rb_loops.back(), "threadIdx.x"); diff --git a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/reduction_factoring.h b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/reduction_factoring.h index 889e3e94292d2d..90963e831075c5 100644 --- a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/reduction_factoring.h +++ b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/reduction_factoring.h @@ -26,7 +26,7 @@ namespace auto_schedule { class ReductionFactoring : public AutoGenRule { public: - explicit ReductionFactoring(const common::Target& target) + explicit ReductionFactoring(const cinn::common::Target& target) : AutoGenRule(target) {} ~ReductionFactoring() = default; diff --git a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/reduction_factoring_test.cc b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/reduction_factoring_test.cc index 6848fba586944e..6f475d09de9316 100644 --- a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/reduction_factoring_test.cc +++ b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/reduction_factoring_test.cc @@ -39,7 +39,7 @@ class TestReductionFactoring : public TestAutoGenRuleBase { const std::vector& reduce_dim, const std::string& block_name, const std::string& expected_ir) { - Initialize(common::DefaultNVGPUTarget()); + Initialize(cinn::common::DefaultNVGPUTarget()); // In order to forcibly use the most basic Compute of reduction FLAGS_cinn_new_group_scheduler = 1; auto test_program = tests::ReduceBuilder().Build( @@ -71,7 +71,7 @@ class TestReductionFactoring : public TestAutoGenRuleBase { TEST_F(TestReductionFactoring, AnalyseApplyType) { Context::Global().ResetNameId(); - Initialize(common::DefaultNVGPUTarget()); + Initialize(cinn::common::DefaultNVGPUTarget()); auto test_program = tests::OpBuilder("elementwise_add").Build({{"X", {4, 5}}, {"Y", {4, 5}}}); ir::IRSchedule ir_schedule = MakeIRSchedule(test_program); diff --git a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/skip_rule.cc b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/skip_rule.cc index 7810822299c8c7..c33641c0efae24 100644 --- a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/skip_rule.cc +++ b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/skip_rule.cc @@ -24,7 +24,7 @@ namespace cinn { namespace auto_schedule { -SkipRule::SkipRule(const common::Target& target) : AutoGenRule(target) {} +SkipRule::SkipRule(const cinn::common::Target& target) : AutoGenRule(target) {} RuleApplyType SkipRule::Init(ir::IRSchedule* ir_schedule) { ir_schedule_ = ir_schedule; diff --git a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/skip_rule.h b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/skip_rule.h index b6862c23e7d2cb..a2d9e2bd5b8202 100644 --- a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/skip_rule.h +++ b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/skip_rule.h @@ -25,7 +25,7 @@ namespace auto_schedule { class SkipRule : public AutoGenRule { public: - explicit SkipRule(const common::Target& target); + explicit SkipRule(const cinn::common::Target& target); ~SkipRule() = default; RuleApplyType Init(ir::IRSchedule* init_schedule) override; diff --git a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/skip_rule_test.cc b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/skip_rule_test.cc index 5ba15a46fef188..f64bf387f59348 100644 --- a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/skip_rule_test.cc +++ b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/skip_rule_test.cc @@ -39,9 +39,9 @@ TEST(SkipRule, Basic) { srand(0); Context::Global().ResetNameId(); #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif Expr M(32); @@ -88,9 +88,9 @@ TEST(SkipRule, ApplyOnSpecificBlock) { srand(0); Context::Global().ResetNameId(); #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif Expr M(32); diff --git a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/test_helper.cc b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/test_helper.cc index 11fabfe16df2f0..257fb1a6a935de 100644 --- a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/test_helper.cc +++ b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/test_helper.cc @@ -41,7 +41,7 @@ using ::cinn::hlir::framework::Scope; using ::cinn::hlir::framework::Shape; using ::cinn::hlir::framework::Tensor; -void TestAutoGenRuleBase::Initialize(const common::Target& target) { +void TestAutoGenRuleBase::Initialize(const cinn::common::Target& target) { target_ = target; backend_compier_ = backends::Compiler::Create(target); } @@ -56,9 +56,8 @@ ir::IRSchedule TestAutoGenRuleBase::MakeIRSchedule( hlir::framework::ApplyPass(graph.get(), "OpFusionPass"); LOG_IF(WARNING, graph->fusion_groups.size() > 1) << "Test Graph has more than 1 group"; - auto& dtype_dict = - graph->GetMutableAttrs>( - "inferdtype"); + auto& dtype_dict = graph->GetMutableAttrs< + absl::flat_hash_map>("inferdtype"); auto& shape_dict = graph->GetMutableAttrs< absl::flat_hash_map>("infershape"); auto op_lowerer = @@ -107,7 +106,7 @@ ir::Module TestAutoGenRuleBase::BuildIRModule(const ir::IRSchedule& schedule) { std::string TestAutoGenRuleBase::GenSourceCode(const ir::Module& ir_module) { std::unique_ptr codegen; #ifdef CINN_WITH_CUDA - if (target_ == common::DefaultNVGPUTarget()) { + if (target_ == cinn::common::DefaultNVGPUTarget()) { codegen = std::make_unique(this->target_); } else { codegen = std::make_unique( @@ -151,7 +150,7 @@ void MemoryCopy(const float* src, float* dst, int numel, std::string type) { } void AddDataToScope(Scope* scope, - const common::Target& target, + const cinn::common::Target& target, float* data_ptr, std::string name, const std::vector& shape) { @@ -161,8 +160,9 @@ void AddDataToScope(Scope* scope, Shape cinn_shape(shape); tensor->Resize(cinn_shape); auto* tgt_data_ptr = tensor->mutable_data(target); - std::string mem_cpy_type = - target == common::DefaultNVGPUTarget() ? "DeviceToHost" : "HostToHost"; + std::string mem_cpy_type = target == cinn::common::DefaultNVGPUTarget() + ? "DeviceToHost" + : "HostToHost"; MemoryCopy(data_ptr, tgt_data_ptr, cinn_shape.numel(), mem_cpy_type); } @@ -172,7 +172,7 @@ void CheckResult(raw_func_type test_func, const std::vector& output_names, const std::vector>& input_shapes, const std::vector>& output_shapes, - const common::Target& target) { + const cinn::common::Target& target) { CHECK(input_names.size()) << "The number of inputs must be greater than 0."; CHECK(output_names.size()) << "The number of outputs must be greater than 0."; CHECK_EQ(input_names.size(), input_shapes.size()) @@ -239,7 +239,7 @@ void CheckResult(raw_func_type test_func, // data for (int i = 0; i < output_names.size(); ++i) { const float* result_ptr = scope.GetTensor(output_names[i])->data(); - std::string mem_cpy_type = target == common::DefaultNVGPUTarget() + std::string mem_cpy_type = target == cinn::common::DefaultNVGPUTarget() ? "DeviceToHost" : "HostToHost"; MemoryCopy( diff --git a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/test_helper.h b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/test_helper.h index 73ef166e37b416..b808c046b752cf 100644 --- a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/test_helper.h +++ b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/test_helper.h @@ -45,7 +45,7 @@ class TestAutoGenRuleBase : public ::testing::Test { Context::Global().ResetNameId(); } // Initialize context for specified target - void Initialize(const common::Target& target); + void Initialize(const cinn::common::Target& target); // construct an ir::IRSchedule by lowering the specified for following // AutoGenRule test @@ -68,7 +68,7 @@ class TestAutoGenRuleBase : public ::testing::Test { raw_func_type GenExecutableKernel(const ir::Module& ir_module); protected: - common::Target target_; + cinn::common::Target target_; std::vector lowered_funcs_; std::unique_ptr backend_compier_; }; @@ -92,7 +92,7 @@ void CheckResult(raw_func_type test_func, const std::vector& output_names, const std::vector>& input_shapes, const std::vector>& output_shapes, - const common::Target& target); + const cinn::common::Target& target); } // namespace auto_schedule } // namespace cinn diff --git a/paddle/cinn/auto_schedule/search_space/rule_sampler_test.cc b/paddle/cinn/auto_schedule/search_space/rule_sampler_test.cc index 2c21477a1bc590..442e108d948cc6 100644 --- a/paddle/cinn/auto_schedule/search_space/rule_sampler_test.cc +++ b/paddle/cinn/auto_schedule/search_space/rule_sampler_test.cc @@ -23,9 +23,9 @@ namespace cinn { namespace auto_schedule { #ifdef CINN_WITH_CUDA -Target target = common::DefaultNVGPUTarget(); +Target target = cinn::common::DefaultNVGPUTarget(); #else -Target target = common::DefaultHostTarget(); +Target target = cinn::common::DefaultHostTarget(); #endif std::vector GenerateTestRules() { diff --git a/paddle/cinn/auto_schedule/search_space/search_state.cc b/paddle/cinn/auto_schedule/search_space/search_state.cc index d380691faf62b5..8aff7dac3c2110 100644 --- a/paddle/cinn/auto_schedule/search_space/search_state.cc +++ b/paddle/cinn/auto_schedule/search_space/search_state.cc @@ -32,7 +32,8 @@ namespace auto_schedule { SearchState::SearchState(ir::IRSchedule ir_sch, float cost, const std::vector& rules) - : common::Shared<_SearchState_>(common::make_shared<_SearchState_>()) { + : cinn::common::Shared<_SearchState_>( + cinn::common::make_shared<_SearchState_>()) { auto* state = get(); state->ir_schedule = std::move(ir_sch); state->applicable_rules = rules; diff --git a/paddle/cinn/auto_schedule/search_space/search_state.h b/paddle/cinn/auto_schedule/search_space/search_state.h index a5684eb6189522..6852fb1c99186b 100644 --- a/paddle/cinn/auto_schedule/search_space/search_state.h +++ b/paddle/cinn/auto_schedule/search_space/search_state.h @@ -31,7 +31,7 @@ struct _SearchState_; class AutoGenRule; //! Shared Wrapper for _SearchState_ -class SearchState : public common::Shared<_SearchState_> { +class SearchState : public cinn::common::Shared<_SearchState_> { public: SearchState() = default; // create a new SearchState @@ -49,7 +49,7 @@ class SearchState : public common::Shared<_SearchState_> { }; //! Class to store immediate states during search -struct _SearchState_ : public common::Object { +struct _SearchState_ : public cinn::common::Object { // IRSchedule contains ir::ModuleExpr and trace scheduling process ir::IRSchedule ir_schedule; // Cost model predicted cost diff --git a/paddle/cinn/auto_schedule/search_space/search_state_test.cc b/paddle/cinn/auto_schedule/search_space/search_state_test.cc index b0f216c4895aa1..3ab24fd1fdb106 100644 --- a/paddle/cinn/auto_schedule/search_space/search_state_test.cc +++ b/paddle/cinn/auto_schedule/search_space/search_state_test.cc @@ -25,7 +25,7 @@ namespace cinn { namespace auto_schedule { TEST(TestSearchState, SearchStateHash_Equal) { - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); ir::Expr M(32); ir::Expr N(32); diff --git a/paddle/cinn/auto_schedule/search_strategy/evolutionary_search_test.cc b/paddle/cinn/auto_schedule/search_strategy/evolutionary_search_test.cc index 539be166f28cf2..6a983d7f9aaac8 100644 --- a/paddle/cinn/auto_schedule/search_strategy/evolutionary_search_test.cc +++ b/paddle/cinn/auto_schedule/search_strategy/evolutionary_search_test.cc @@ -41,7 +41,7 @@ std::vector CreateTasks(const frontend::Program& program, TaskCreator task_creator; auto tasks = task_creator.CreateTuneTaskOpLevel(graph.get()); const auto& dtype_dict = - graph->GetAttrs>( + graph->GetAttrs>( "inferdtype"); const auto& shape_dict = graph->GetAttrs< absl::flat_hash_map>("infershape"); @@ -93,7 +93,7 @@ class MockSearchSpace : public SearchSpace { class MockCostModel : public ExprCostModel { float Predict(const ir::ModuleExpr& sample, - const common::Target& target) const override { + const cinn::common::Target& target) const override { float cost = 0.0f; std::vector exprs = sample.GetExprs(); for (const ir::Expr& expr : exprs) { @@ -108,7 +108,7 @@ class MockCostModel : public ExprCostModel { TEST(EvolutionarySearch, GetOneBest) { TuneTask mock_tune_task; mock_tune_task.serialized_key = "mock_task"; - mock_tune_task.target = common::DefaultTarget(); + mock_tune_task.target = cinn::common::DefaultTarget(); InitialTaskRegistry* task_registry = InitialTaskRegistry::Global(); task_registry->Regist(mock_tune_task.serialized_key, ir::ModuleExpr({ir::Expr(0)})); @@ -131,7 +131,7 @@ TEST(EvolutionarySearch, GetOneBest) { TEST(EvolutionarySearch, GetEpsGreedy) { TuneTask mock_tune_task; mock_tune_task.serialized_key = "mock_task"; - mock_tune_task.target = common::DefaultTarget(); + mock_tune_task.target = cinn::common::DefaultTarget(); InitialTaskRegistry* task_registry = InitialTaskRegistry::Global(); task_registry->Regist(mock_tune_task.serialized_key, ir::ModuleExpr({ir::Expr(0)})); @@ -155,7 +155,7 @@ TEST(EvolutionarySearch, GetEpsGreedy) { } TEST(EvolutionarySearch, Evolve) { - auto target = common::DefaultNVGPUTarget(); + auto target = cinn::common::DefaultNVGPUTarget(); auto tasks = CreateTasks( tests::OpBuilder("matmul").Build({{"X", {32, 32}}, {"Y", {32, 32}}}), target); diff --git a/paddle/cinn/auto_schedule/search_strategy/mutate_rule/mutate_tile_size_test.cc b/paddle/cinn/auto_schedule/search_strategy/mutate_rule/mutate_tile_size_test.cc index 94222d748c0546..87930cf81ce632 100644 --- a/paddle/cinn/auto_schedule/search_strategy/mutate_rule/mutate_tile_size_test.cc +++ b/paddle/cinn/auto_schedule/search_strategy/mutate_rule/mutate_tile_size_test.cc @@ -28,9 +28,9 @@ TEST(MutateTileSize, Basic) { srand(0); Context::Global().ResetNameId(); #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif const int kSize = 32; diff --git a/paddle/cinn/auto_schedule/task/task_creator_test.cc b/paddle/cinn/auto_schedule/task/task_creator_test.cc index 60b5ebec0e808c..2cb80727d9bc4f 100644 --- a/paddle/cinn/auto_schedule/task/task_creator_test.cc +++ b/paddle/cinn/auto_schedule/task/task_creator_test.cc @@ -50,9 +50,9 @@ Program CreateAddProgram() { TEST(TaskCreator, Basic) { #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif Program prog = CreateAddProgram(); auto graph = std::make_shared(prog, target); diff --git a/paddle/cinn/auto_schedule/task/task_optimizer.cc b/paddle/cinn/auto_schedule/task/task_optimizer.cc index d76797d9953ecd..273cba4c4060e6 100644 --- a/paddle/cinn/auto_schedule/task/task_optimizer.cc +++ b/paddle/cinn/auto_schedule/task/task_optimizer.cc @@ -49,12 +49,12 @@ using cinn::hlir::op::ExternalApiRegistry; // *** forward declarations of auxiliary functions to be used in this file only // *** update a scheduled function with several post-processors -ir::LoweredFunc FuncWithUpdatedBody(const common::Target& target, +ir::LoweredFunc FuncWithUpdatedBody(const cinn::common::Target& target, const ir::LoweredFunc& old_func, ir::Expr& body); // NOLINT // check whether a scheduled lowered function is valid bool PruneInvalid(const ir::LoweredFunc& lowered_func, - const common::Target& target); + const cinn::common::Target& target); // exclude some special tasks bool IsForbiddenToTune(const TuneTask* task); // tell whether the task has been wrapped by custom_call in @@ -441,11 +441,11 @@ bool IsGPUMemoryUsageExceedLimit(const ir::LoweredFunc& lowered_func, } bool PruneInvalid(const ir::LoweredFunc& lowered_func, - const common::Target& target) { + const cinn::common::Target& target) { static const size_t kGPUSharedMemoryLimitBytes = GetGPUSharedMemoryLimit(); static const size_t kGPULocalStackLimitBytes = GetGPULocalStackLimit(); - if (target == common::DefaultNVGPUTarget()) { + if (target == cinn::common::DefaultNVGPUTarget()) { if (IsGPUMemoryUsageExceedLimit(lowered_func, ir::MemoryType::GPUShared, kGPUSharedMemoryLimitBytes)) { diff --git a/paddle/cinn/auto_schedule/task/task_registry_test.cc b/paddle/cinn/auto_schedule/task/task_registry_test.cc index 23f31028be9e93..579d9378511de9 100644 --- a/paddle/cinn/auto_schedule/task/task_registry_test.cc +++ b/paddle/cinn/auto_schedule/task/task_registry_test.cc @@ -34,13 +34,13 @@ namespace cinn { namespace auto_schedule { std::vector CreateTasks(hlir::framework::Graph* graph, - const common::Target& target) { + const cinn::common::Target& target) { // create tasks TaskCreator task_creator; std::vector tasks = task_creator.CreateTuneTaskOpLevel(graph); const auto& dtype_dict = - graph->GetAttrs>( + graph->GetAttrs>( "inferdtype"); const auto& shape_dict = graph->GetAttrs< absl::flat_hash_map>("infershape"); @@ -56,7 +56,7 @@ std::vector CreateTasks(hlir::framework::Graph* graph, } std::shared_ptr CreateAddProgram( - const common::Target& target) { + const cinn::common::Target& target) { frontend::NetBuilder builder("test"); auto a = builder.CreateInput(Float(32), {1, 64, 112, 112}, "A"); @@ -70,9 +70,9 @@ TEST(TestTaskRegistry, basic) { FLAGS_auto_schedule_use_cost_model = true; #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif std::shared_ptr graph = CreateAddProgram(target); std::vector tasks = CreateTasks(graph.get(), target); diff --git a/paddle/cinn/auto_schedule/task/tune_task.cc b/paddle/cinn/auto_schedule/task/tune_task.cc index f2c2b720b6f062..30353d2db584d3 100644 --- a/paddle/cinn/auto_schedule/task/tune_task.cc +++ b/paddle/cinn/auto_schedule/task/tune_task.cc @@ -63,7 +63,8 @@ std::string TuneTask::SerializeToString( // local function to print dtype,shape of out/in variables of the specified // node auto print_node_links_fn = - [&](const std::vector>& links, + [&](const std::vector>& + links, bool is_input) { int printed_num = 0; for (auto&& edge : links) { diff --git a/paddle/cinn/auto_schedule/task/tune_task.h b/paddle/cinn/auto_schedule/task/tune_task.h index 92bf5c73ca3f3d..b69c86917602c5 100644 --- a/paddle/cinn/auto_schedule/task/tune_task.h +++ b/paddle/cinn/auto_schedule/task/tune_task.h @@ -54,7 +54,7 @@ class TuneTask { // Lower handler, Not owned hlir::framework::OpLowerer* op_lowerer; // target of this task - common::Target target; + cinn::common::Target target; // stores the initial (un-optimized) LoweredFuncs std::vector lowered_funcs; // names of the output arguments of lowered_funcs_ diff --git a/paddle/cinn/auto_schedule/task/tune_task_test.cc b/paddle/cinn/auto_schedule/task/tune_task_test.cc index 41fe147d6d60c4..733197b0a6f97d 100644 --- a/paddle/cinn/auto_schedule/task/tune_task_test.cc +++ b/paddle/cinn/auto_schedule/task/tune_task_test.cc @@ -59,9 +59,9 @@ Program CreateAddProgram() { TEST(TuneTask, GraphToUnoptLoweredFunc_NoPass) { Context::Global().ResetNameId(); #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif Program prog = CreateAddProgram(); auto graph = std::make_shared(prog, target); @@ -73,7 +73,7 @@ TEST(TuneTask, GraphToUnoptLoweredFunc_NoPass) { const auto& shape_dict = graph->GetAttrs< absl::flat_hash_map>("infershape"); const auto& dtype_dict = - graph->GetAttrs>( + graph->GetAttrs>( "inferdtype"); auto op_lowerer = hlir::framework::CreateOpLowerer(dtype_dict, shape_dict, target); @@ -169,9 +169,9 @@ TEST(TuneTask, GraphToUnoptLoweredFunc_NoPass) { TEST(TuneTask, GraphToUnoptLoweredFunc_ApplyPass) { Context::Global().ResetNameId(); #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif Program prog = CreateAddProgram(); auto graph = std::make_shared(prog, target); @@ -185,7 +185,7 @@ TEST(TuneTask, GraphToUnoptLoweredFunc_ApplyPass) { const auto& shape_dict = graph->GetAttrs< absl::flat_hash_map>("infershape"); const auto& dtype_dict = - graph->GetAttrs>( + graph->GetAttrs>( "inferdtype"); OpLowerer op_lowerer( @@ -277,9 +277,9 @@ TEST(TuneTask, GraphToUnoptLoweredFunc_ApplyPass) { TEST(TuneTask, SerializeToString) { Context::Global().ResetNameId(); #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif Program prog = CreateAddProgram(); auto graph = std::make_shared(prog, target); @@ -291,7 +291,7 @@ TEST(TuneTask, SerializeToString) { const auto& shape_dict = graph->GetAttrs< absl::flat_hash_map>("infershape"); const auto& dtype_dict = - graph->GetAttrs>( + graph->GetAttrs>( "inferdtype"); OpLowerer op_lowerer( new hlir::framework::OpLowererImpl(dtype_dict, shape_dict, target)); diff --git a/paddle/cinn/auto_schedule/tests/performance_comparison_test.cc b/paddle/cinn/auto_schedule/tests/performance_comparison_test.cc index ce7a9369f847ef..2966467b3eda67 100644 --- a/paddle/cinn/auto_schedule/tests/performance_comparison_test.cc +++ b/paddle/cinn/auto_schedule/tests/performance_comparison_test.cc @@ -138,7 +138,7 @@ class PerformanceTester : public ::testing::Test { std::unique_ptr BuildNoScheduleProgram( Graph* graph, GraphCompiler* graph_compiler) { const auto& dtype_dict = - graph->GetAttrs>( + graph->GetAttrs>( "inferdtype"); const auto& shape_dict = graph->GetAttrs< absl::flat_hash_map>( @@ -211,9 +211,9 @@ class PerformanceTester : public ::testing::Test { } #ifdef CINN_WITH_CUDA - Target target_ = common::DefaultNVGPUTarget(); + Target target_ = cinn::common::DefaultNVGPUTarget(); #else - Target target_ = common::DefaultHostTarget(); + Target target_ = cinn::common::DefaultHostTarget(); #endif Options options_; }; @@ -340,7 +340,7 @@ TEST_F(PerformanceTester, LookupTable) { Evaluate(tests::OpBuilder("lookup_table") .Build({{"table", {50001, 768}}, - {"ids", {10, 128, 1}, common::Int(64)}}, + {"ids", {10, 128, 1}, cinn::common::Int(64)}}, {{"padding_idx", padding_idx}})); } @@ -349,7 +349,7 @@ TEST_F(PerformanceTester, Gather) { Evaluate(tests::OpBuilder("gather").Build( {{"operand", {10, 12, 128, 512}}, - {"index", {1, 1, 1, 128}, common::Int(32)}}, + {"index", {1, 1, 1, 128}, cinn::common::Int(32)}}, {{"axis", axis}})); } @@ -359,8 +359,9 @@ TEST_F(PerformanceTester, ResNet50) { FLAGS_cinn_infer_model_version = 1.0; std::unordered_map> feeds = { {"inputs", {batch_size, 3, 224, 224}}}; - Evaluate(cinn::frontend::PaddleModelConvertor(common::DefaultNVGPUTarget()) - .LoadModel(FLAGS_resnet50_model_dir, true, feeds)); + Evaluate( + cinn::frontend::PaddleModelConvertor(cinn::common::DefaultNVGPUTarget()) + .LoadModel(FLAGS_resnet50_model_dir, true, feeds)); } } // namespace auto_schedule diff --git a/paddle/cinn/backends/codegen_c.cc b/paddle/cinn/backends/codegen_c.cc index c3c882f9e8f988..282a338204f26d 100644 --- a/paddle/cinn/backends/codegen_c.cc +++ b/paddle/cinn/backends/codegen_c.cc @@ -120,10 +120,11 @@ std::string CodeGenC::GetTypeName(Type type) { auto customized_name = type.customized_type(); // get name of a cuda built-in vector type, it is started with a // 'CudaVectorType::' prefix - if (utils::Startswith(customized_name, - common::customized_type::kcuda_builtin_vector_t)) { + if (utils::Startswith( + customized_name, + cinn::common::customized_type::kcuda_builtin_vector_t)) { customized_name.erase( - 0, strlen(common::customized_type::kcuda_builtin_vector_t)); + 0, strlen(cinn::common::customized_type::kcuda_builtin_vector_t)); } return customized_name; } @@ -653,7 +654,7 @@ void CodeGenC::PrintBufferCreation(const std::vector &buffers) { DoIndent(); auto buffer_ptr_type = Type() - .set_customized_type(common::customized_type::kbuffer_t) + .set_customized_type(cinn::common::customized_type::kbuffer_t) .set_cpp_handle(); Var variable = ir::_Var_::Make(buffer->name, buffer_ptr_type); auto expr = ir::intrinsics::BufferCreate::Make(buffer); diff --git a/paddle/cinn/backends/codegen_c_test.cc b/paddle/cinn/backends/codegen_c_test.cc index caf4950cdfe8cd..91f80c190f0f85 100644 --- a/paddle/cinn/backends/codegen_c_test.cc +++ b/paddle/cinn/backends/codegen_c_test.cc @@ -286,7 +286,7 @@ TEST(CodeGenC, matmul_tile) { // Code gen auto func = Lower("matmul", stages, {A, B, C}); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); Module::Builder builder("module1", target); builder.AddFunction(func); @@ -373,7 +373,7 @@ TEST(CodeGenC, matmul_packed) { // Code gen auto func = Lower("matmul_with_packing", stages, {A, B, packedB, C}); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); Module::Builder builder("module1", target); builder.AddFunction(func); @@ -445,10 +445,10 @@ TEST(CodeGenC, call_extern) { auto yexpr = Lower("yy", stages, {y}); - Module::Builder builder("module0", common::DefaultHostTarget()); + Module::Builder builder("module0", cinn::common::DefaultHostTarget()); builder.AddFunction(yexpr); - CodeGenC codegen(common::DefaultHostTarget()); + CodeGenC codegen(cinn::common::DefaultHostTarget()); codegen.SetInlineBuiltinCodes(false); auto out = codegen.Compile(builder.Build(), CodeGenC::OutputKind::CImpl); std::cout << "codegen C:" << std::endl << out << std::endl; diff --git a/paddle/cinn/backends/codegen_cuda_dev.cc b/paddle/cinn/backends/codegen_cuda_dev.cc index 5a1ddbc450a091..4e7fa79d2d0b30 100644 --- a/paddle/cinn/backends/codegen_cuda_dev.cc +++ b/paddle/cinn/backends/codegen_cuda_dev.cc @@ -339,8 +339,9 @@ void CodeGenCUDA_Dev::Visit(const ir::Let *op) { // identify vectorized tensors by checking their dtypes are customized_type // with customized_type::kcuda_builtin_vector_t prefix, and save their names if (op->type().is_customized() && - utils::Startswith(op->type().customized_type(), - common::customized_type::kcuda_builtin_vector_t)) { + utils::Startswith( + op->type().customized_type(), + cinn::common::customized_type::kcuda_builtin_vector_t)) { str_ += GetTypeRepr(op->type()); if (op->type().is_cpp_handle()) { str_ += " "; diff --git a/paddle/cinn/backends/codegen_cuda_util.h b/paddle/cinn/backends/codegen_cuda_util.h index 71be91a855c8ed..5a7f1f5882bf9b 100644 --- a/paddle/cinn/backends/codegen_cuda_util.h +++ b/paddle/cinn/backends/codegen_cuda_util.h @@ -47,9 +47,10 @@ namespace detail { struct CollectHostFunctionVisitor : public ir::IRMutator<> { explicit CollectHostFunctionVisitor(const std::string& module_name) - : host_module_builder(module_name + "_host", common::DefaultHostTarget()), + : host_module_builder(module_name + "_host", + cinn::common::DefaultHostTarget()), device_module_builder(module_name + "_gpu_device", - common::DefaultNVGPUTarget()) {} + cinn::common::DefaultNVGPUTarget()) {} std::tuple operator()(Expr* expr) { ir::IRMutator<>::Visit(expr, expr); diff --git a/paddle/cinn/backends/codegen_debug_test.cc b/paddle/cinn/backends/codegen_debug_test.cc index a156f5475b3db7..6ed5e37685b703 100644 --- a/paddle/cinn/backends/codegen_debug_test.cc +++ b/paddle/cinn/backends/codegen_debug_test.cc @@ -61,7 +61,7 @@ CUdeviceptr CreateCudaMemory(const std::vector& shape, const T* data) { } TEST(CodeGenDebug, RunCudaSourceCode) { - common::Context::Global().ResetNameId(); + cinn::common::Context::Global().ResetNameId(); std::string source_code = R"ROC( extern "C" { diff --git a/paddle/cinn/backends/compiler_test.cc b/paddle/cinn/backends/compiler_test.cc index 84abedd91e5b61..1c14fcd4ffa64b 100644 --- a/paddle/cinn/backends/compiler_test.cc +++ b/paddle/cinn/backends/compiler_test.cc @@ -50,26 +50,30 @@ TEST(Compiler, x86) { auto fn = Lower("fn", stages, {A, B, C}); - ir::Module::Builder builder("some_module", common::DefaultHostTarget()); + ir::Module::Builder builder("some_module", + cinn::common::DefaultHostTarget()); builder.AddFunction(fn); - auto compiler = Compiler::Create(common::DefaultHostTarget()); + auto compiler = Compiler::Create(cinn::common::DefaultHostTarget()); compiler->Build(builder.Build()); auto* fnp = compiler->Lookup("fn"); ASSERT_TRUE(fnp); - auto* Ab = common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) - .set_random() - .Build(); - auto* Bb = common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) - .set_random() - .Build(); - auto* Cb = common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) - .set_zero() - .Build(); - - auto args = common::ArgsBuilder().Add(Ab).Add(Bb).Add(Cb).Build(); + auto* Ab = + cinn::common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) + .set_random() + .Build(); + auto* Bb = + cinn::common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) + .set_random() + .Build(); + auto* Cb = + cinn::common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) + .set_zero() + .Build(); + + auto args = cinn::common::ArgsBuilder().Add(Ab).Add(Bb).Add(Cb).Build(); reinterpret_cast(fnp)(args.data(), args.size()); // test result @@ -107,24 +111,28 @@ TEST(Compiler, cuda) { auto fn = Lower("fn", stages, {A, B, C}); - ir::Module::Builder builder("some_module", common::DefaultHostTarget()); + ir::Module::Builder builder("some_module", + cinn::common::DefaultHostTarget()); builder.AddFunction(fn); - auto compiler = Compiler::Create(common::DefaultNVGPUTarget()); + auto compiler = Compiler::Create(cinn::common::DefaultNVGPUTarget()); compiler->Build(builder.Build()); auto* fnp = compiler->Lookup("fn"); ASSERT_TRUE(fnp); - auto* Ab = common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) - .set_random() - .Build(); - auto* Bb = common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) - .set_random() - .Build(); - auto* Cb = common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) - .set_zero() - .Build(); + auto* Ab = + cinn::common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) + .set_random() + .Build(); + auto* Bb = + cinn::common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) + .set_random() + .Build(); + auto* Cb = + cinn::common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) + .set_zero() + .Build(); // allocate CUDA buffer void *Ag, *Bg, *Cg; @@ -144,7 +152,8 @@ TEST(Compiler, cuda) { cinn_buffer_t Cbb; Cbb.memory = reinterpret_cast(Cg); - auto args = common::ArgsBuilder().Add(&Abb).Add(&Bbb).Add(&Cbb).Build(); + auto args = + cinn::common::ArgsBuilder().Add(&Abb).Add(&Bbb).Add(&Cbb).Build(); utils::Timer timer; timer.Start(); @@ -204,10 +213,10 @@ TEST(Compiler, sqrt) { auto fn = Lower("fn", stages, {input, mean, scale, bias, variance, A, B[0], BB}); - Module::Builder builder("some", common::DefaultHostTarget()); + Module::Builder builder("some", cinn::common::DefaultHostTarget()); builder.AddFunction(fn); - auto compiler = Compiler::Create(common::DefaultHostTarget()); + auto compiler = Compiler::Create(cinn::common::DefaultHostTarget()); compiler->Build(builder.Build()); } diff --git a/paddle/cinn/backends/ir_schedule_test.cc b/paddle/cinn/backends/ir_schedule_test.cc index cdcc709d9d45bf..1d6b92933b9f16 100644 --- a/paddle/cinn/backends/ir_schedule_test.cc +++ b/paddle/cinn/backends/ir_schedule_test.cc @@ -41,7 +41,7 @@ TEST(IrSchedule, split_and_fuse1) { Expr N(32); Expr P(32); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); Placeholder A("A", {M, N}); auto B = Compute( @@ -100,7 +100,7 @@ TEST(IrSchedule, split_and_fuse2) { Expr N(32); Expr P(32); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); Placeholder A("A", {M, N}); auto B = Compute( @@ -163,7 +163,7 @@ void TestSplitThrow() { Expr N(32); Expr P(32); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); Placeholder A("A", {M, N}); auto B = Compute( @@ -205,7 +205,7 @@ TEST(IrSchedule, reorder1) { Expr N(32); Expr P(32); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); Placeholder A("A", {M, N, P}); auto B = Compute( @@ -272,7 +272,7 @@ TEST(IrSchedule, reorder2) { Expr N(32); Expr P(32); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); Placeholder A("A", {M, N, P}); auto B = Compute( @@ -338,7 +338,7 @@ TEST(IrSchedule, reorder3) { Expr N(32); Expr P(32); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); Placeholder A("A", {M, N, P}); auto B = Compute( @@ -408,7 +408,7 @@ TEST(IrSchedule, reorder4) { Expr N(32); Expr P(32); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); Placeholder A("A", {M, N, P}); auto B = Compute( @@ -482,7 +482,7 @@ TEST(IrSchedule, parallel) { Expr M(32); Expr N(32); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); Placeholder A("A", {M, N}); auto B = Compute( @@ -548,7 +548,7 @@ TEST(IrSchedule, vectorize) { Expr M(32); Expr N(32); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); Placeholder A("A", {M, N}); auto B = Compute( @@ -622,7 +622,7 @@ TEST(IrSchedule, unroll) { Expr M(32); Expr N(2); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); Placeholder A("A", {M, N}); auto B = Compute( @@ -696,7 +696,7 @@ TEST(IrSchedule, bind) { Expr M(32); Expr N(2); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); Placeholder A("A", {M, N}); auto B = Compute( @@ -741,7 +741,7 @@ TEST(IrSchedule, simple_compute_at) { Expr M(128); Expr N(10); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); Placeholder A("A", {M, N}); auto B = Compute( @@ -814,7 +814,7 @@ TEST(IrSchedule, compute_at0) { Expr M(128); Expr N(10); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); Placeholder A("A", {M, N}); auto B = Compute( @@ -888,7 +888,7 @@ TEST(IrSchedule, compute_at1) { Expr N(32); Expr P(32); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); Placeholder A("A", {M, N, P}); auto B = Compute( @@ -960,7 +960,7 @@ TEST(IrSchedule, compute_at2) { Expr M(64); Expr N(32); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); Placeholder A("A", {M, M}); auto B = Compute( @@ -1032,7 +1032,7 @@ TEST(IrSchedule, compute_at3) { Expr M(64); Expr N(32); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); Placeholder A("A", {M, M}); auto B = Compute( @@ -1112,7 +1112,7 @@ TEST(IrSchedule, compute_at4) { Expr N(32); Expr P(32); - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); Placeholder A("A", {M, N, P}); auto B = Compute( @@ -1174,7 +1174,7 @@ TEST(IrSchedule, compute_at5) { Expr M(64); Expr N(32); - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); Placeholder A("A", {M, M}); auto B = Compute( @@ -1237,7 +1237,7 @@ TEST(IrSchedule, compute_at6) { Expr M(64); Expr N(32); - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); Placeholder A("A", {M, M}); auto B = Compute( @@ -1303,7 +1303,7 @@ TEST(IrSchedule, cache_read1) { Expr N(32); Expr P(16); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); Placeholder A("A", {M, M}); auto B = Compute( @@ -1388,7 +1388,7 @@ TEST(IrSchedule, cache_read2) { Expr M(64); Expr N(32); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); Placeholder A("A", {M, N}); auto B = Compute( @@ -1456,7 +1456,7 @@ TEST(IrSchedule, cache_write1) { Expr M(64); Expr N(32); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); Placeholder A("A", {M, N}); auto B = Compute( @@ -1542,7 +1542,7 @@ TEST(IrSchedule, cache_write2) { Expr M(64); Expr N(32); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); Placeholder A("A", {M, N}); auto B = Compute( @@ -1610,7 +1610,7 @@ TEST(IrSchedule, cache_read3) { Expr N(32); Expr P(16); - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); Placeholder A("A", {M, M}); auto B = Compute( @@ -1691,7 +1691,7 @@ TEST(IrSchedule, cache_write3) { Expr M(64); Expr N(32); - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); Placeholder A("A", {M, N}); auto B = Compute( @@ -1773,7 +1773,7 @@ TEST(IrSchedule, sync_threads) { Expr M(64); Expr N(32); - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); Placeholder A("A", {M, N}); auto B = Compute( @@ -1854,7 +1854,7 @@ TEST(IrSchedule, cache_write4) { Expr M(64); Expr N(32); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); Placeholder A("A", {M, N, N}); Var k(32, "k0"); @@ -1930,7 +1930,7 @@ TEST(IrSchedule, rfactor) { Expr N(2); Expr K(16); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); Placeholder A("A", {M, N, K}); Var j(2, "j0"); @@ -2057,7 +2057,7 @@ TEST(IrSchedule, rfactor1) { Expr N(2); Expr K(16); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); Placeholder A("A", {M, N, K}); Var j(2, "j0"); @@ -2185,7 +2185,7 @@ TEST(IrSchedule, rfactor2) { Expr N(2); Expr K(16); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); Placeholder A("A", {M, K}); Placeholder B("B", {K, N}); @@ -2318,7 +2318,7 @@ TEST(IrSchedule, factorize_reduction) { Expr N(4); Expr K(5); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); Placeholder A("A", {M, N, K}); Var j(4, "j0"); @@ -2407,7 +2407,7 @@ TEST(IrSchedule, factorize_reduction1) { Expr N(4); Expr K(5); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); Placeholder A("A", {M, N, K}); Var j(4, "j0"); @@ -2496,7 +2496,7 @@ TEST(IrSchedule, factorize_reduction2) { Expr N(4); Expr K(5); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); Placeholder A("A", {M, N * K}); Var j(4 * 5, "j0"); @@ -2582,7 +2582,7 @@ TEST(IrSchedule, compute_inline1) { Expr N(32); Expr P(32); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); Placeholder A("A", {M, N, P}); auto B = Compute( @@ -2653,7 +2653,7 @@ TEST(IrSchedule, compute_inline2) { Expr N(32); Expr P(32); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); Placeholder A("A", {M, N, P}); auto B = Compute( @@ -2728,7 +2728,7 @@ TEST(IrSchedule, compute_inline3) { Expr N(32); Expr P(32); - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); Placeholder A("A", {M, N, P}); auto B = Compute( @@ -2790,7 +2790,7 @@ TEST(IrSchedule, compute_inline4) { Expr N(32); Expr P(32); - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); Placeholder A("A", {M, N, P}); auto B = Compute( @@ -2852,7 +2852,7 @@ TEST(IrSchedule, reverse_compute_inline1) { Expr M(32); Expr N(64); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); Placeholder A("A", {M, N}); auto B = Compute( @@ -2915,7 +2915,7 @@ TEST(IrSchedule, reverse_compute_inline2) { Expr N(32); Expr P(32); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); Placeholder A("A", {M, N, P}); auto B = Compute( @@ -2984,7 +2984,7 @@ TEST(IrSchedule, copytransform1) { Expr N(32); Expr P(32); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); Placeholder A("A", {M, N, P}); auto B = Compute( @@ -3075,7 +3075,7 @@ TEST(IrSchedule, copytransform2) { Expr N(64); Expr P(128); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); Placeholder A("A", {M, N, P}); auto B = Compute( @@ -3171,7 +3171,7 @@ TEST(IrSchedule, Annotate) { {}, {}, nullptr, - common::DefaultHostTarget(), + cinn::common::DefaultHostTarget(), true); ir::IRSchedule ir_sch(ir::ModuleExpr({funcs[0]->body})); auto fused = ir_sch.Fuse("B", {0, 1}); @@ -3215,7 +3215,7 @@ TEST(IrSchedule, Unannotate) { {}, {}, nullptr, - common::DefaultHostTarget(), + cinn::common::DefaultHostTarget(), true); ir::IRSchedule ir_sch(ir::ModuleExpr({funcs[0]->body})); auto fused = ir_sch.Fuse("B", {0, 1}); @@ -3253,7 +3253,7 @@ TEST(IrSchedule, Unannotate) { } TEST(IrSchedule, ComplexIndices) { - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); ir::Expr M(32); ir::Expr K(64); @@ -3375,7 +3375,7 @@ TEST(IrSchedule, SamplePerfectTile) { {}, {}, nullptr, - common::DefaultHostTarget(), + cinn::common::DefaultHostTarget(), true); ir::IRSchedule ir_sch(ir::ModuleExpr({funcs[0]->body})); @@ -3400,7 +3400,7 @@ TEST(IrSchedule, GetChildBlocks) { {}, {}, nullptr, - common::DefaultHostTarget(), + cinn::common::DefaultHostTarget(), true); ir::IRSchedule ir_sch(ir::ModuleExpr({funcs[0]->body})); @@ -3440,7 +3440,7 @@ TEST(IrSchedule, SampleCategorical) { {}, {}, nullptr, - common::DefaultHostTarget(), + cinn::common::DefaultHostTarget(), true); ir::IRSchedule ir_sch(ir::ModuleExpr({funcs[0]->body})); diff --git a/paddle/cinn/backends/llvm/codegen_llvm.cc b/paddle/cinn/backends/llvm/codegen_llvm.cc index ff90a14fcfd204..a79e67fd6c4839 100644 --- a/paddle/cinn/backends/llvm/codegen_llvm.cc +++ b/paddle/cinn/backends/llvm/codegen_llvm.cc @@ -55,8 +55,8 @@ namespace cinn { namespace backends { using BinaryInstruction = llvm::Instruction::BinaryOps; -using common::bfloat16; -using common::float16; +using cinn::common::bfloat16; +using cinn::common::float16; namespace { @@ -69,9 +69,11 @@ auto NodeToExpr(const T *node) { return oss.str(); } -bool is_integral_type(common::Type t) { return t.is_int() || t.is_uint(); } +bool is_integral_type(cinn::common::Type t) { + return t.is_int() || t.is_uint(); +} -bool is_floating_type(common::Type t) { return t.is_float(); } +bool is_floating_type(cinn::common::Type t) { return t.is_float(); } llvm::Value *EmitComparison(llvm::CmpInst::Predicate predicate, llvm::Value *lhs, @@ -405,7 +407,8 @@ llvm::Value *CodeGenLLVM::Visit(const ir::Cast *op) { // pod_value_t cast to a value. if (op->v().type().is_customized_type() && op->v().type().customized_type() == - common::customized_type::kpod_value_t) { // pod_value_t operator + cinn::common::customized_type::kpod_value_t) { // pod_value_t + // operator llvm::Function *callee{}; if (op->type().is_bool()) { callee = m_->getFunction(runtime::intrinsic::pod_value_to_bool); @@ -970,7 +973,7 @@ llvm::Value *CodeGenLLVM::Visit(const ir::Store *op) { // fit the total_lanes in native_lanes(split into multiple native steps) for (int offset = 0; offset < total_lanes; offset += total_lanes) { int lanes = total_lanes; - Expr base = common::AutoSimplify(ramp->base + offset); + Expr base = cinn::common::AutoSimplify(ramp->base + offset); optim::VarModSimplify(&base); auto *ptr = CreateBufferPtr(op->type().ElementOf(), buffer, Visit(&base)); @@ -1283,7 +1286,7 @@ llvm::Value *CodeGenLLVM::DenseVectorLoad(const ir::Load *op) { for (int i = 0; i < load_lanes; i += load_lanes) { int slice_lanes = load_lanes; - auto slice_base = common::AutoSimplify(ramp->base + i); + auto slice_base = cinn::common::AutoSimplify(ramp->base + i); optim::VarModSimplify(&slice_base); auto slide_stride = Expr(1); auto slide_index = slice_base; diff --git a/paddle/cinn/backends/llvm/codegen_llvm.h b/paddle/cinn/backends/llvm/codegen_llvm.h index ff885db2c8e594..3428e213b014f8 100644 --- a/paddle/cinn/backends/llvm/codegen_llvm.h +++ b/paddle/cinn/backends/llvm/codegen_llvm.h @@ -118,7 +118,7 @@ class CodeGenLLVM : public LLVMIRVisitor, public IrBuilderMixin { llvm::Module *m, llvm::IRBuilder<> *b, const std::shared_ptr &symbol_table = nullptr, - const Target &target = common::DefaultHostTarget()); + const Target &target = cinn::common::DefaultHostTarget()); // Common llvm types // @{ diff --git a/paddle/cinn/backends/llvm/codegen_llvm_test.cc b/paddle/cinn/backends/llvm/codegen_llvm_test.cc index aa6ca91af1b26b..930e70f22e8692 100644 --- a/paddle/cinn/backends/llvm/codegen_llvm_test.cc +++ b/paddle/cinn/backends/llvm/codegen_llvm_test.cc @@ -59,7 +59,7 @@ auto CreateTensor() { auto c = lang::Compute( {M, N}, [&](auto i, auto j) { return a(i, j) + b(i, j); }, "c"); - lang::Buffer c_buf(common::Float(32)); + lang::Buffer c_buf(cinn::common::Float(32)); return std::make_tuple( std::move(a), std::move(b), std::move(c), std::move(c_buf)); @@ -82,7 +82,7 @@ template -auto CreateBinaryOp(common::Type t, T1 x, T2 y) { +auto CreateBinaryOp(cinn::common::Type t, T1 x, T2 y) { auto px = std::make_unique(t, x); auto py = std::make_unique(t, y); @@ -92,7 +92,7 @@ auto CreateBinaryOp(common::Type t, T1 x, T2 y) { return std::make_unique(std::move(ex), std::move(ey)); } -auto CreateIrBuffer(common::Type t, +auto CreateIrBuffer(cinn::common::Type t, std::string name, std::vector shape, int data_alignment = 0) { @@ -104,7 +104,7 @@ auto CreateIrBuffer(common::Type t, } for (auto i : shape) { - auto pi = std::make_unique(common::Int(32), i); + auto pi = std::make_unique(cinn::common::Int(32), i); buffer->shape.emplace_back(pi.release()); } @@ -114,7 +114,7 @@ auto CreateIrBuffer(common::Type t, auto CreateIrTensor(std::string name, std::vector shape) { std::vector shape_expr; for (auto i : shape) { - auto pi = std::make_unique(common::Int(32), i); + auto pi = std::make_unique(cinn::common::Int(32), i); shape_expr.emplace_back(pi.release()); } @@ -146,28 +146,28 @@ TEST(CodeGenLLVM, Imm) { llvm::Value *value = nullptr; - ir::IntImm i32_imm(common::Int(32), 10); + ir::IntImm i32_imm(cinn::common::Int(32), 10); value = emitter->Visit(&i32_imm); ASSERT_EQ(value->getType(), i32); ASSERT_EQ(value, llvm::ConstantInt::get(i32, i32_imm.value, true)); // value->print(llvm::outs(), false); - ir::UIntImm u32_imm(common::UInt(32), 5); + ir::UIntImm u32_imm(cinn::common::UInt(32), 5); value = emitter->Visit(&u32_imm); ASSERT_EQ(value->getType(), u32); ASSERT_EQ(value, llvm::ConstantInt::get(u32, u32_imm.value, false)); - ir::FloatImm float32_imm(common::Float(32), 2.5); + ir::FloatImm float32_imm(cinn::common::Float(32), 2.5); value = emitter->Visit(&float32_imm); ASSERT_EQ(value->getType(), f32); ASSERT_EQ(value, llvm::ConstantFP::get(f32, float32_imm.value)); - ir::FloatImm float16_imm(common::Float16(), 2.5); + ir::FloatImm float16_imm(cinn::common::Float16(), 2.5); value = emitter->Visit(&float16_imm); ASSERT_EQ(value->getType(), f16); ASSERT_EQ(value, llvm::ConstantFP::get(f16, float16_imm.value)); - ir::FloatImm bfloat16_imm(common::BFloat16(), 2.5); + ir::FloatImm bfloat16_imm(cinn::common::BFloat16(), 2.5); value = emitter->Visit(&bfloat16_imm); ASSERT_EQ(value->getType(), bf16); ASSERT_EQ(value, llvm::ConstantFP::get(bf16, bfloat16_imm.value)); @@ -198,7 +198,8 @@ TEST(CodeGenLLVM, Expr) { do { int x = 2; int y = 3; - auto op = CreateBinaryOp(common::Int(32), x, y); + auto op = + CreateBinaryOp(cinn::common::Int(32), x, y); expect_value = llvm::ConstantInt::get(i32, x + y); value = emitter->Visit(op.get()); @@ -213,8 +214,8 @@ TEST(CodeGenLLVM, Expr) { do { float x = 2.5; float y = 3.5; - auto op = - CreateBinaryOp(common::Float(32), x, y); + auto op = CreateBinaryOp( + cinn::common::Float(32), x, y); expect_value = llvm::ConstantFP::get(f32, x - y); value = emitter->Visit(op.get()); @@ -226,8 +227,8 @@ TEST(CodeGenLLVM, Expr) { do { float16 x{2.5}; float16 y{3.5}; - auto op = - CreateBinaryOp(common::Float16(), x, y); + auto op = CreateBinaryOp( + cinn::common::Float16(), x, y); expect_value = llvm::ConstantFP::get(f16, x - y); value = emitter->Visit(op.get()); @@ -240,7 +241,7 @@ TEST(CodeGenLLVM, Expr) { bfloat16 x{2.5}; bfloat16 y{3.5}; auto op = CreateBinaryOp( - common::BFloat16(), x, y); + cinn::common::BFloat16(), x, y); expect_value = llvm::ConstantFP::get(bf16, x - y); value = emitter->Visit(op.get()); @@ -252,7 +253,8 @@ TEST(CodeGenLLVM, Expr) { do { int x = 5; int y = 3; - auto op = CreateBinaryOp(common::Int(64), x, y); + auto op = + CreateBinaryOp(cinn::common::Int(64), x, y); expect_value = llvm::ConstantInt::get(i64, x * y); value = emitter->Visit(op.get()); ASSERT_EQ(value->getType(), i64); @@ -263,8 +265,8 @@ TEST(CodeGenLLVM, Expr) { do { float x = 6; float y = 4; - auto op = - CreateBinaryOp(common::Float(32), x, y); + auto op = CreateBinaryOp( + cinn::common::Float(32), x, y); expect_value = llvm::ConstantFP::get(f32, x / y); value = emitter->Visit(op.get()); ASSERT_EQ(value->getType(), f32); @@ -275,8 +277,8 @@ TEST(CodeGenLLVM, Expr) { do { float16 x{6}; float16 y{4}; - auto op = - CreateBinaryOp(common::Float16(), x, y); + auto op = CreateBinaryOp( + cinn::common::Float16(), x, y); expect_value = llvm::ConstantFP::get(f16, x / y); value = emitter->Visit(op.get()); ASSERT_EQ(value->getType(), f16); @@ -288,7 +290,7 @@ TEST(CodeGenLLVM, Expr) { bfloat16 x{6}; bfloat16 y{4}; auto op = CreateBinaryOp( - common::BFloat16(), x, y); + cinn::common::BFloat16(), x, y); expect_value = llvm::ConstantFP::get(bf16, x / y); value = emitter->Visit(op.get()); ASSERT_EQ(value->getType(), bf16); @@ -299,7 +301,8 @@ TEST(CodeGenLLVM, Expr) { do { int x = 25; int y = 7; - auto op = CreateBinaryOp(common::Int(32), x, y); + auto op = + CreateBinaryOp(cinn::common::Int(32), x, y); expect_value = llvm::ConstantInt::get(i32, x % y); value = emitter->Visit(op.get()); ASSERT_EQ(value->getType(), i32); @@ -310,7 +313,8 @@ TEST(CodeGenLLVM, Expr) { do { int x = 3; int y = 3; - auto op = CreateBinaryOp(common::Int(32), x, y); + auto op = + CreateBinaryOp(cinn::common::Int(32), x, y); expect_value = llvm::ConstantInt::get(i1, 1); value = emitter->Visit(op.get()); ASSERT_EQ(value->getType(), i1); @@ -322,8 +326,8 @@ TEST(CodeGenLLVM, Expr) { float x = 3; float y = 3; - auto op = - CreateBinaryOp(common::Float(32), x, y); + auto op = CreateBinaryOp( + cinn::common::Float(32), x, y); expect_value = llvm::ConstantInt::get(i1, 0); value = emitter->Visit(op.get()); ASSERT_EQ(value->getType(), i1); @@ -334,7 +338,8 @@ TEST(CodeGenLLVM, Expr) { do { int x = 6; int y = 6; - auto op = CreateBinaryOp(common::Int(32), x, y); + auto op = + CreateBinaryOp(cinn::common::Int(32), x, y); value = emitter->Visit(op.get()); expect_value = llvm::ConstantInt::get(i1, 0); ASSERT_EQ(value->getType(), i1); @@ -345,7 +350,8 @@ TEST(CodeGenLLVM, Expr) { do { int x = 6; int y = 6; - auto op = CreateBinaryOp(common::Int(32), x, y); + auto op = + CreateBinaryOp(cinn::common::Int(32), x, y); value = emitter->Visit(op.get()); expect_value = llvm::ConstantInt::get(i1, 1); ASSERT_EQ(value->getType(), i1); @@ -356,7 +362,8 @@ TEST(CodeGenLLVM, Expr) { do { int x = 6; int y = 6; - auto op = CreateBinaryOp(common::Int(32), x, y); + auto op = + CreateBinaryOp(cinn::common::Int(32), x, y); value = emitter->Visit(op.get()); expect_value = llvm::ConstantInt::get(i1, 0); ASSERT_EQ(value->getType(), i1); @@ -367,7 +374,8 @@ TEST(CodeGenLLVM, Expr) { do { int x = 6; int y = 6; - auto op = CreateBinaryOp(common::Int(32), x, y); + auto op = + CreateBinaryOp(cinn::common::Int(32), x, y); value = emitter->Visit(op.get()); expect_value = llvm::ConstantInt::get(i1, 1); ASSERT_EQ(value->getType(), i1); @@ -382,7 +390,8 @@ TEST(CodeGenLLVM, Expr) { do { int x = 2; int y = 3; - auto op = CreateBinaryOp(common::Int(32), x, y); + auto op = + CreateBinaryOp(cinn::common::Int(32), x, y); value = emitter->Visit(op.get()); expect_value = llvm::ConstantInt::get(i32, std::min(x, y)); ASSERT_EQ(value->getType(), i32); @@ -393,8 +402,8 @@ TEST(CodeGenLLVM, Expr) { do { float x = 2; float y = 3; - auto op = - CreateBinaryOp(common::Float(32), x, y); + auto op = CreateBinaryOp( + cinn::common::Float(32), x, y); value = emitter->Visit(op.get()); expect_value = llvm::ConstantFP::get(f32, std::max(x, y)); ASSERT_EQ(value->getType(), f32); @@ -412,9 +421,9 @@ TEST(CodeGenLLVM, Expr) { // i32 -> f32 LOG(INFO) << "test i32 -> f32"; int v2 = 2; - auto x2 = std::make_unique(common::Int(32), v2); + auto x2 = std::make_unique(cinn::common::Int(32), v2); auto ex2 = ir::Expr(x2.release()); - auto op2 = ir::Cast::Make(common::Float(32), std::move(ex2)); + auto op2 = ir::Cast::Make(cinn::common::Float(32), std::move(ex2)); value = emitter->Visit(&op2); expect_value = llvm::ConstantFP::get(f32, v2); ASSERT_EQ(value->getType(), f32); @@ -423,9 +432,9 @@ TEST(CodeGenLLVM, Expr) { // f32 -> i32 LOG(INFO) << "test f32 -> i32"; float v3 = 3; - auto x3 = std::make_unique(common::Float(32), v3); + auto x3 = std::make_unique(cinn::common::Float(32), v3); auto ex3 = ir::Expr(x3.release()); - auto op3 = ir::Cast::Make(common::Int(32), std::move(ex3)); + auto op3 = ir::Cast::Make(cinn::common::Int(32), std::move(ex3)); value = emitter->Visit(&op3); expect_value = llvm::ConstantInt::get(i32, v3); ASSERT_EQ(value->getType(), i32); @@ -434,9 +443,9 @@ TEST(CodeGenLLVM, Expr) { // i32 -> f16 LOG(INFO) << "test i32 -> f16"; int v4 = 4; - auto x4 = std::make_unique(common::Int(32), v4); + auto x4 = std::make_unique(cinn::common::Int(32), v4); auto ex4 = ir::Expr(x4.release()); - auto op4 = ir::Cast::Make(common::Float16(), std::move(ex4)); + auto op4 = ir::Cast::Make(cinn::common::Float16(), std::move(ex4)); value = emitter->Visit(&op4); expect_value = llvm::ConstantFP::get(f16, v4); ASSERT_EQ(value->getType(), f16); @@ -445,9 +454,9 @@ TEST(CodeGenLLVM, Expr) { // f16 -> f32 LOG(INFO) << "test f16 -> f32"; float16 v5{5}; - auto x5 = std::make_unique(common::Float16(), v5); + auto x5 = std::make_unique(cinn::common::Float16(), v5); auto ex5 = ir::Expr(x5.release()); - auto op5 = ir::Cast::Make(common::Float(32), std::move(ex5)); + auto op5 = ir::Cast::Make(cinn::common::Float(32), std::move(ex5)); value = emitter->Visit(&op5); expect_value = llvm::ConstantFP::get(f32, v5); ASSERT_EQ(value->getType(), f32); @@ -456,9 +465,9 @@ TEST(CodeGenLLVM, Expr) { // i32 -> bf16 LOG(INFO) << "test i32 -> bf16"; int v6 = 4; - auto x6 = std::make_unique(common::Int(32), v6); + auto x6 = std::make_unique(cinn::common::Int(32), v6); auto ex6 = ir::Expr(x6.release()); - auto op6 = ir::Cast::Make(common::BFloat16(), std::move(ex6)); + auto op6 = ir::Cast::Make(cinn::common::BFloat16(), std::move(ex6)); value = emitter->Visit(&op6); expect_value = llvm::ConstantFP::get(bf16, v6); ASSERT_EQ(value->getType(), bf16); @@ -467,9 +476,9 @@ TEST(CodeGenLLVM, Expr) { // bf16 -> f32 LOG(INFO) << "test bf16 -> f32"; bfloat16 v7{5}; - auto x7 = std::make_unique(common::BFloat16(), v7); + auto x7 = std::make_unique(cinn::common::BFloat16(), v7); auto ex7 = ir::Expr(x7.release()); - auto op7 = ir::Cast::Make(common::Float(32), std::move(ex7)); + auto op7 = ir::Cast::Make(cinn::common::Float(32), std::move(ex7)); value = emitter->Visit(&op7); expect_value = llvm::ConstantFP::get(f32, v7); ASSERT_EQ(value->getType(), f32); @@ -515,7 +524,7 @@ TEST(CodeGenLLVM, Statement) { // ir::Tensor auto tensor_op = CreateIrTensor("x", {2, 3}); - tensor_op->buffer = CreateIrBuffer(common::Int(32), "", {2, 3}); + tensor_op->buffer = CreateIrBuffer(cinn::common::Int(32), "", {2, 3}); // ir::Alloc auto alloc_op = std::make_unique(); @@ -525,17 +534,19 @@ TEST(CodeGenLLVM, Statement) { auto store_op = std::make_unique(); store_op->tensor = ir::Expr(tensor_op); for (int i : {1, 1}) { - auto pi = std::make_unique(common::Int(32), std::move(i)); + auto pi = + std::make_unique(cinn::common::Int(32), std::move(i)); store_op->indices.emplace_back(pi.release()); } - auto store_value = std::make_unique(common::Int(32), 5); + auto store_value = std::make_unique(cinn::common::Int(32), 5); store_op->value = ir::Expr(store_value.release()); // ir::Load auto load_op = std::make_unique(); load_op->tensor = ir::Expr(tensor_op); for (int i : {1, 1}) { - auto pi = std::make_unique(common::Int(32), std::move(i)); + auto pi = + std::make_unique(cinn::common::Int(32), std::move(i)); load_op->indices.emplace_back(pi.release()); } @@ -544,7 +555,7 @@ TEST(CodeGenLLVM, Statement) { free_op->destination = ir::Expr(tensor_op->buffer); // ir::Call - auto call_op = std::make_unique(common::Int(32)); + auto call_op = std::make_unique(cinn::common::Int(32)); call_op->name = "codegen_llvm_test.Alloc_Store_Load_Free"; // Emit llvm ir diff --git a/paddle/cinn/backends/llvm/codegen_x86.cc b/paddle/cinn/backends/llvm/codegen_x86.cc index 9de0603e2c9e26..cfd796162241c0 100644 --- a/paddle/cinn/backends/llvm/codegen_x86.cc +++ b/paddle/cinn/backends/llvm/codegen_x86.cc @@ -128,8 +128,8 @@ void CodeGenX86::CreateParallelLaunch(Expr body, int num_task) { symbol_table_->PushScope(); UnpackVars(vars, data); ParallelEnv par_env; - auto task_id_name = common::UniqName("task_id"); - auto num_task_name = common::UniqName("num_task"); + auto task_id_name = cinn::common::UniqName("task_id"); + auto num_task_name = cinn::common::UniqName("num_task"); par_env.task_id = ir::Var(task_id_name, Int(32)); par_env.num_task = ir::Var(num_task_name, Int(32)); SetVar(task_id_name, task_id); diff --git a/paddle/cinn/backends/llvm/codegen_x86_test.cc b/paddle/cinn/backends/llvm/codegen_x86_test.cc index 42cd0f171435da..16c698fd88bd6e 100644 --- a/paddle/cinn/backends/llvm/codegen_x86_test.cc +++ b/paddle/cinn/backends/llvm/codegen_x86_test.cc @@ -39,7 +39,7 @@ TEST(Vectorize, basic) { LOG(INFO) << "fn: " << fn; - Module::Builder builder("module", common::DefaultHostTarget()); + Module::Builder builder("module", cinn::common::DefaultHostTarget()); builder.AddFunction(fn); auto module = builder.Build(); @@ -53,18 +53,21 @@ TEST(Vectorize, basic) { auto* fn_ptr = reinterpret_cast(fn_); - auto* A_buf = common::BufferBuilder(Float(32), {1024}) + auto* A_buf = cinn::common::BufferBuilder(Float(32), {1024}) .set_random() .set_align(64) .Build(); - auto* B_buf = common::BufferBuilder(Float(32), {1024}) + auto* B_buf = cinn::common::BufferBuilder(Float(32), {1024}) .set_random() .set_align(64) .Build(); - auto* C_buf = - common::BufferBuilder(Float(32), {1024}).set_zero().set_align(64).Build(); + auto* C_buf = cinn::common::BufferBuilder(Float(32), {1024}) + .set_zero() + .set_align(64) + .Build(); - auto args = common::ArgsBuilder().Add(A_buf).Add(B_buf).Add(C_buf).Build(); + auto args = + cinn::common::ArgsBuilder().Add(A_buf).Add(B_buf).Add(C_buf).Build(); fn_ptr(reinterpret_cast(args.data()), args.size()); diff --git a/paddle/cinn/backends/llvm/execution_engine_test.cc b/paddle/cinn/backends/llvm/execution_engine_test.cc index 7adca52f34ca78..a66b63248a50dd 100644 --- a/paddle/cinn/backends/llvm/execution_engine_test.cc +++ b/paddle/cinn/backends/llvm/execution_engine_test.cc @@ -107,10 +107,10 @@ auto CreateTestCinnModule() { {M, N}, [&](Var i, Var j) { return A(i, j) + B(i, j); }, "C"); C->Bind(C_buf); - common::Target target; - target.arch = common::Target::Arch::X86; - target.bits = common::Target::Bit::k32; - target.os = common::Target::OS::Linux; + cinn::common::Target target; + target.arch = cinn::common::Target::Arch::X86; + target.bits = cinn::common::Target::Bit::k32; + target.os = cinn::common::Target::OS::Linux; ir::Module::Builder builder("module1", target); auto stages = CreateStages({C}); @@ -154,7 +154,7 @@ TEST(llvm_test01, elementwise_add) { } TEST(llvm, module_call_lowered_func) { - ir::Module::Builder builder("some_module", common::DefaultHostTarget()); + ir::Module::Builder builder("some_module", cinn::common::DefaultHostTarget()); ir::Expr M(kM); ir::Expr N(kN); { // define fn @@ -184,7 +184,7 @@ TEST(llvm, module_call_lowered_func) { auto main_fn = lang::Lower("main", stages, {a, b, c}, {}); builder.AddFunction(main_fn); - CodeGenC codegen(common::DefaultHostTarget()); + CodeGenC codegen(cinn::common::DefaultHostTarget()); codegen.SetInlineBuiltinCodes(false); LOG(INFO) << "module:\n" << codegen.Compile(builder.Build(), CodeGenC::OutputKind::CImpl); @@ -318,7 +318,7 @@ TEST(ExecutionEngine, call_extern) { stages[add_out]->ComputeInline(); auto func = Lower("comp", stages, {x, y, res}); - Module::Builder builder("module0", common::DefaultHostTarget()); + Module::Builder builder("module0", cinn::common::DefaultHostTarget()); builder.AddFunction(func); auto engine = backends::ExecutionEngine::Create({1}); diff --git a/paddle/cinn/backends/llvm/llvm_intrin_rule.h b/paddle/cinn/backends/llvm/llvm_intrin_rule.h index 77d22349ed2580..903c056196f4e4 100644 --- a/paddle/cinn/backends/llvm/llvm_intrin_rule.h +++ b/paddle/cinn/backends/llvm/llvm_intrin_rule.h @@ -104,7 +104,7 @@ void RegisterCpuIntrinRule() { Expr arg = node->read_args[0]; Type type = arg->type(); if (type.is_int() || type.is_uint()) { - *rv = common::make_bool(false, type.lanes()); + *rv = cinn::common::make_bool(false, type.lanes()); } else if (type.is_float()) { *rv = ir::EQ::Make(lang::Abs(arg), lang::Infinity(type)) && !(lang::IsNan(arg)); diff --git a/paddle/cinn/backends/llvm/llvm_util.cc b/paddle/cinn/backends/llvm/llvm_util.cc index 32256ecc5c9ca0..f1c35d7f58e68d 100644 --- a/paddle/cinn/backends/llvm/llvm_util.cc +++ b/paddle/cinn/backends/llvm/llvm_util.cc @@ -26,7 +26,7 @@ namespace backends { using cinn::common::bfloat16; using cinn::common::float16; -llvm::Type *CinnTypeToLLVMType(common::Type type, +llvm::Type *CinnTypeToLLVMType(cinn::common::Type type, llvm::Module *m, bool is_vec) { llvm::Type *ir_type = nullptr; @@ -118,10 +118,10 @@ llvm::Type *CinnTypeToLLVMType(common::Type type, return ir_type; } -#define __(ty__) \ - template <> \ - llvm::Type *llvm_type_of(llvm::Module * m) { \ - return CinnTypeToLLVMType(common::type_of(), m); \ +#define __(ty__) \ + template <> \ + llvm::Type *llvm_type_of(llvm::Module * m) { \ + return CinnTypeToLLVMType(cinn::common::type_of(), m); \ } __(int8_t) diff --git a/paddle/cinn/backends/llvm/llvm_util.h b/paddle/cinn/backends/llvm/llvm_util.h index dd1a79768ab027..de9227c3d94525 100644 --- a/paddle/cinn/backends/llvm/llvm_util.h +++ b/paddle/cinn/backends/llvm/llvm_util.h @@ -48,7 +48,7 @@ inline llvm::StringRef AsStringRef(absl::string_view str) { return llvm::StringRef(str.data(), str.size()); } -llvm::Type *CinnTypeToLLVMType(common::Type t, +llvm::Type *CinnTypeToLLVMType(cinn::common::Type t, llvm::Module *m, bool is_vec = false); diff --git a/paddle/cinn/backends/nvrtc/nvrtc_util.cc b/paddle/cinn/backends/nvrtc/nvrtc_util.cc index b5ef59f6bdae2e..4ad94e506c1986 100644 --- a/paddle/cinn/backends/nvrtc/nvrtc_util.cc +++ b/paddle/cinn/backends/nvrtc/nvrtc_util.cc @@ -176,7 +176,7 @@ std::string Compiler::CompileWithNvcc(const std::string& cuda_c) { } // get unqiue prefix name - prefix_name_ = dir + "/" + common::UniqName("rtc_tmp"); + prefix_name_ = dir + "/" + cinn::common::UniqName("rtc_tmp"); auto cuda_c_file = prefix_name_ + ".cu"; std::ofstream ofs(cuda_c_file, std::ios::out); @@ -194,7 +194,7 @@ std::string Compiler::CompileWithNvcc(const std::string& cuda_c) { // std::ios::in); } void Compiler::CompileToPtx() { - auto include_dir = common::Context::Global().runtime_include_dir(); + auto include_dir = cinn::common::Context::Global().runtime_include_dir(); std::string include_dir_str = ""; for (auto dir : include_dir) { if (include_dir_str.empty()) { diff --git a/paddle/cinn/common/arithmatic.cc b/paddle/cinn/common/arithmatic.cc index 1caad647bba69f..5cabe56dff2db5 100644 --- a/paddle/cinn/common/arithmatic.cc +++ b/paddle/cinn/common/arithmatic.cc @@ -125,7 +125,7 @@ GiNaC::ex ExprToGinacConverter::BuildHelper(ir::Expr expr) { } GiNaC::ex ExprToGinacConverter::operator()(Expr expr) { - // TODO(Superjomn) Replace this with common::IsPureMath( + // TODO(Superjomn) Replace this with cinn::common::IsPureMath( auto complex_nodes = ir::ir_utils::CollectIRNodes(expr, [](const Expr* n) { return n->As() || // n->As() || // diff --git a/paddle/cinn/common/axis.cc b/paddle/cinn/common/axis.cc index 0e13c6bd0781a6..9913a38b5ed5cc 100644 --- a/paddle/cinn/common/axis.cc +++ b/paddle/cinn/common/axis.cc @@ -68,7 +68,7 @@ std::string axis_name(int level) { std::vector GenDefaultAxis(int naxis) { std::vector axis; for (int i = 0; i < naxis; i++) { - axis.emplace_back(common::axis_name(i)); + axis.emplace_back(cinn::common::axis_name(i)); CHECK(axis.back()->type().valid()); } return axis; diff --git a/paddle/cinn/common/cas.cc b/paddle/cinn/common/cas.cc index 9a4d5bc3ed2794..a24549896de506 100644 --- a/paddle/cinn/common/cas.cc +++ b/paddle/cinn/common/cas.cc @@ -2309,14 +2309,14 @@ Expr SolveInequality(Expr inequality, Var val) { #undef __ Expr all = AutoSimplify(a - b); - // if (common::IsPureMath(a) && common::IsPureMath(b)) { + // if (cinn::common::IsPureMath(a) && cinn::common::IsPureMath(b)) { if (true) { - auto _res_positive_ = common::Solve(a, b, val); // NOLINT + auto _res_positive_ = cinn::common::Solve(a, b, val); // NOLINT auto& res = std::get<0>(_res_positive_); auto& positive = std::get<1>(_res_positive_); // Simplify it with CAS to avoid random result from GiNac. res = AutoSimplify(res); - res = common::cast(res, val->type()); + res = cinn::common::cast(res, val->type()); if (le_n) { if (positive) return ir::LE::Make(val, res); diff --git a/paddle/cinn/common/cas_test.cc b/paddle/cinn/common/cas_test.cc index e260d91844d763..d37bd87c23685f 100644 --- a/paddle/cinn/common/cas_test.cc +++ b/paddle/cinn/common/cas_test.cc @@ -26,7 +26,7 @@ namespace cinn { namespace common { -using common::make_const; +using cinn::common::make_const; using utils::GetStreamCnt; using utils::Join; using utils::Trim; @@ -181,8 +181,8 @@ TEST(CAS, FracOp) { auto u4 = AutoSimplify(Expr(32768) * (((Expr(32) * x) + y) / 32)); EXPECT_EQ(GetStreamCnt(u4), "((32768 * (y / 32)) + (32768 * x))"); - common::cas_intervals_t var_intervals; - var_intervals.emplace("y", common::CasInterval(0, 31)); + cinn::common::cas_intervals_t var_intervals; + var_intervals.emplace("y", cinn::common::CasInterval(0, 31)); auto u = AutoSimplify((Expr(x) * 32 + y) / 32, var_intervals); EXPECT_EQ(GetStreamCnt(u), "x"); diff --git a/paddle/cinn/common/cinn_value.h b/paddle/cinn/common/cinn_value.h index 587a79ec71c6ff..3cfb4214d76b9a 100755 --- a/paddle/cinn/common/cinn_value.h +++ b/paddle/cinn/common/cinn_value.h @@ -50,7 +50,7 @@ class CINNValuePack; /** * A _CINNValuePack_ is a shared Array of multiple CINNValue. */ -struct _CINNValuePack_ : public common::Object { +struct _CINNValuePack_ : public cinn::common::Object { /** * Create a new CINNValuePack instance. * @param array The list of CINNValues. diff --git a/paddle/cinn/common/cinn_value_test.cc b/paddle/cinn/common/cinn_value_test.cc index 3419ba7849c09b..2e3e30183f61de 100644 --- a/paddle/cinn/common/cinn_value_test.cc +++ b/paddle/cinn/common/cinn_value_test.cc @@ -51,7 +51,7 @@ TEST(CINNValue, Expr) { { CINNValue copied = CINNValue(a); - ASSERT_TRUE(copied == common::make_const(1)); + ASSERT_TRUE(copied == cinn::common::make_const(1)); } } diff --git a/paddle/cinn/common/common.h b/paddle/cinn/common/common.h index e54d8aad4b31d7..34623d904515b3 100644 --- a/paddle/cinn/common/common.h +++ b/paddle/cinn/common/common.h @@ -28,25 +28,25 @@ namespace cinn { // export some general concepts. -using common::Context; -using common::make_shared; -using common::Object; -using common::ref_count; -using common::Shared; -using common::UniqName; +using cinn::common::Context; +using cinn::common::make_shared; +using cinn::common::Object; +using cinn::common::ref_count; +using cinn::common::Shared; +using cinn::common::UniqName; // Type related. -using common::Bool; -using common::Float; -using common::Int; -using common::UInt; -using common::Void; +using cinn::common::Bool; +using cinn::common::Float; +using cinn::common::Int; +using cinn::common::UInt; +using cinn::common::Void; -using common::type_of; +using cinn::common::type_of; -using common::Target; -using common::Type; -using common::UnkTarget; +using cinn::common::Target; +using cinn::common::Type; +using cinn::common::UnkTarget; template T& Reference(const T* x) { @@ -63,7 +63,7 @@ static void CheckVarNameValid(const absl::string_view name) { name.find('\n') == std::string::npos && // name.find('\r') == std::string::npos) << "Some invalid character found"; - CHECK(!common::IsAxisNameReserved(std::string(name))) + CHECK(!cinn::common::IsAxisNameReserved(std::string(name))) << "The name [" << name << "] is reserved for internal axis"; } diff --git a/paddle/cinn/common/equation_graph_topo_walker.h b/paddle/cinn/common/equation_graph_topo_walker.h index b7fba86f5a0b2e..6098a54100d606 100644 --- a/paddle/cinn/common/equation_graph_topo_walker.h +++ b/paddle/cinn/common/equation_graph_topo_walker.h @@ -110,7 +110,7 @@ class EquationGraphTopoWalker final { for (VarIterT iter = begin; iter != end; ++iter) { VisitNextFunctions(*iter, [&](FT f) { starts.emplace_back(f); }); } - common::BfsWalker bfs_walker{BfsVisitNextFunction}; + cinn::common::BfsWalker bfs_walker{BfsVisitNextFunction}; bfs_walker(starts.begin(), starts.end(), FunctionVisitor); } diff --git a/paddle/cinn/common/graph_utils.cc b/paddle/cinn/common/graph_utils.cc index a2b6861b899b49..d0341b8c5f64b9 100755 --- a/paddle/cinn/common/graph_utils.cc +++ b/paddle/cinn/common/graph_utils.cc @@ -211,7 +211,7 @@ bool GraphEdgeCompare::operator()(const Shared &a, } std::set Graph::CollectNodes( - std::function &&teller) { + std::function &&teller) { std::set res; for (auto *node : nodes()) { if (teller(node)) res.insert(node); diff --git a/paddle/cinn/common/graph_utils.h b/paddle/cinn/common/graph_utils.h index cb144e1c901c76..00c2a93f85e0fb 100644 --- a/paddle/cinn/common/graph_utils.h +++ b/paddle/cinn/common/graph_utils.h @@ -66,8 +66,8 @@ class GraphEdge : public Object { }; struct GraphEdgeCompare { - bool operator()(const common::Shared& a, - const common::Shared& b) const; + bool operator()(const cinn::common::Shared& a, + const cinn::common::Shared& b) const; }; /** @@ -224,11 +224,11 @@ class GraphNode : public Object { //! The input links of the node. //! \note We record the raw pointer rather than the shared pointer to avoid //! cycle reference. - std::set, GraphEdgeCompare> inlinks_; + std::set, GraphEdgeCompare> inlinks_; //! The output links of the node. //! \note We record the raw pointer rather than the shared pointer to avoid //! cycle reference. - std::set, GraphEdgeCompare> outlinks_; + std::set, GraphEdgeCompare> outlinks_; mutable int visited_time_{}; //! used to mark the index of node's input/output tensors @@ -276,7 +276,7 @@ class Graph { //! Collect the nodes match the condition defined by \p teller in the graph. std::set CollectNodes( - std::function&& teller); + std::function&& teller); void DropNode(GraphNode* n) { auto it = std::find_if( @@ -291,7 +291,7 @@ class Graph { void ClearUnlinkedNodes( absl::flat_hash_map>* shape_dict, - absl::flat_hash_map* type_dict, + absl::flat_hash_map* type_dict, absl::flat_hash_map* layout_dict); size_t num_nodes() const { return nodes_.size(); } diff --git a/paddle/cinn/common/ir_util.cc b/paddle/cinn/common/ir_util.cc index d88289a8722b8a..774d7514e6fb23 100644 --- a/paddle/cinn/common/ir_util.cc +++ b/paddle/cinn/common/ir_util.cc @@ -69,8 +69,8 @@ Expr RampRelatedAdd(ir::Ramp *ramp, ir::Ramp *other) { CHECK(ramp); CHECK(other); if (ramp->lanes == other->lanes) { - Expr base_add = common::AutoSimplify(ramp->base + other->base); - Expr stride_add = common::AutoSimplify(ramp->stride + other->stride); + Expr base_add = cinn::common::AutoSimplify(ramp->base + other->base); + Expr stride_add = cinn::common::AutoSimplify(ramp->stride + other->stride); VLOG(2) << base_add; VLOG(2) << stride_add; return ir::Ramp::Make(base_add, stride_add, ramp->lanes); @@ -156,7 +156,7 @@ Expr IndiceToAbsOffset(const std::vector &shape, res = indice_prod; } } - return common::AutoSimplify(res); + return cinn::common::AutoSimplify(res); } Expr IndiceToAbsOffset(const std::vector &shape, diff --git a/paddle/cinn/common/ir_util.h b/paddle/cinn/common/ir_util.h index 179c5dfd0d1245..3f5831e9b858cb 100644 --- a/paddle/cinn/common/ir_util.h +++ b/paddle/cinn/common/ir_util.h @@ -69,10 +69,10 @@ inline Expr make_one() { return make_const(static_cast(1)); } inline Expr make_bool(bool x) { - return common::make_shared(Bool(), x); + return cinn::common::make_shared(Bool(), x); } inline Expr make_bool(bool x, int lanes) { - return common::make_shared(Bool(lanes), x); + return cinn::common::make_shared(Bool(lanes), x); } // @} diff --git a/paddle/cinn/common/make_subgraph_walker.h b/paddle/cinn/common/make_subgraph_walker.h index 0f90c3e6b25cda..d712deb7dda1a4 100644 --- a/paddle/cinn/common/make_subgraph_walker.h +++ b/paddle/cinn/common/make_subgraph_walker.h @@ -26,19 +26,19 @@ namespace cinn::common { template -common::TopoWalker MakeSubgraphWalker( - const common::TopoWalker& walker, +cinn::common::TopoWalker MakeSubgraphWalker( + const cinn::common::TopoWalker& walker, IterT src_begin, IterT src_end, IterT sink_begin, IterT sink_end) { - common::TopoWalker reversed_walker(walker.VisitNextNodes, - walker.VisitPrevNodes); + cinn::common::TopoWalker reversed_walker(walker.VisitNextNodes, + walker.VisitPrevNodes); auto ReachableToOneSrc = - common::MakeIsReachableFromSrcPredicator( + cinn::common::MakeIsReachableFromSrcPredicator( walker, src_begin, src_end); auto ReachableToOneSink = - common::MakeIsReachableFromSrcPredicator( + cinn::common::MakeIsReachableFromSrcPredicator( reversed_walker, sink_begin, sink_end); auto VisitPrevNodes = [ReachableToOneSrc, ReachableToOneSink, walker]( @@ -61,7 +61,7 @@ common::TopoWalker MakeSubgraphWalker( }); }; - return common::TopoWalker(VisitPrevNodes, VisitNextNodes); + return cinn::common::TopoWalker(VisitPrevNodes, VisitNextNodes); } } // namespace cinn::common diff --git a/paddle/cinn/common/union_find.h b/paddle/cinn/common/union_find.h index c42a14683ae3d5..18a2ee2bf69ae8 100644 --- a/paddle/cinn/common/union_find.h +++ b/paddle/cinn/common/union_find.h @@ -94,7 +94,7 @@ struct UnionFind { return res; } - std::vector> nodes; + std::vector> nodes; }; } // namespace common diff --git a/paddle/cinn/frontend/computation_test.cc b/paddle/cinn/frontend/computation_test.cc index b3b01af03aba98..e4666fb46e21d7 100644 --- a/paddle/cinn/frontend/computation_test.cc +++ b/paddle/cinn/frontend/computation_test.cc @@ -81,7 +81,7 @@ TEST(cinn_computation, basic_cpu) { auto c = builder.Add(a, b); auto d = builder.Add(a, c); - auto target = common::DefaultHostTarget(); + auto target = cinn::common::DefaultHostTarget(); auto comp = CinnComputation::BuildAndCompile(target, builder); std::vector hostA(M * N); std::vector hostB(M * N); @@ -119,7 +119,7 @@ TEST(cinn_computation, basic_gpu) { auto c = builder.Add(a, b); auto d = builder.Add(a, c); - auto target = common::DefaultNVGPUTarget(); + auto target = cinn::common::DefaultNVGPUTarget(); auto comp = CinnComputation::BuildAndCompile(target, builder); std::vector hostA(M * N); std::vector hostB(M * N); @@ -149,7 +149,7 @@ TEST(cinn_computation, basic_gpu) { TEST(cinn_computation, net_builder_cpu) { auto program = CreateTestProgram(); - auto target = common::DefaultHostTarget(); + auto target = cinn::common::DefaultHostTarget(); auto compute = CinnComputation::Compile(target, program); auto inputs = compute->GetInputTensors(); ASSERT_EQ(inputs.size(), 2); @@ -183,7 +183,7 @@ TEST(cinn_computation, net_builder_cpu) { #ifdef CINN_WITH_CUDA TEST(cinn_computation, net_builder_gpu) { auto program = CreateTestProgram(); - auto target = common::DefaultNVGPUTarget(); + auto target = cinn::common::DefaultNVGPUTarget(); auto compute = CinnComputation::Compile(target, program); auto inputs = compute->GetInputTensors(); ASSERT_EQ(inputs.size(), 2); @@ -223,7 +223,7 @@ TEST(cinn_computation, net_builder_gpu) { #endif TEST(cinn_computation, fc_execute_cpu) { - auto target = common::DefaultHostTarget(); + auto target = cinn::common::DefaultHostTarget(); ASSERT_NE(FLAGS_model_dir, ""); auto compute = CinnComputation::CompilePaddleModel( target, FLAGS_model_dir, {"A"}, {{1, 30}}, false); @@ -240,7 +240,7 @@ TEST(cinn_computation, fc_execute_cpu) { #ifdef CINN_WITH_CUDA TEST(cinn_computation, fc_execute_gpu) { - auto target = common::DefaultNVGPUTarget(); + auto target = cinn::common::DefaultNVGPUTarget(); ASSERT_NE(FLAGS_model_dir, ""); auto compute = CinnComputation::CompilePaddleModel( target, FLAGS_model_dir, {"A"}, {{1, 30}}, false); @@ -274,7 +274,7 @@ TEST(cinn_computation, decomposer_cpu) { // without decomposer { auto prog = CreateAddProgram(); - auto target = common::DefaultHostTarget(); + auto target = cinn::common::DefaultHostTarget(); auto options = CinnComputation::DefaultCompileOptions(); options.use_decomposer = false; auto compute = CinnComputation::Compile(target, prog, options); @@ -284,7 +284,7 @@ TEST(cinn_computation, decomposer_cpu) { // with decomposer { auto prog = CreateAddProgram(); - auto target = common::DefaultHostTarget(); + auto target = cinn::common::DefaultHostTarget(); auto options = CinnComputation::DefaultCompileOptions(); options.use_decomposer = true; auto compute = CinnComputation::Compile(target, prog, options); @@ -295,7 +295,7 @@ TEST(cinn_computation, decomposer_cpu) { #ifdef CINN_WITH_CUDA TEST(cinn_computation, gpu_stream) { // this test only shows the API usage - auto target = common::DefaultNVGPUTarget(); + auto target = cinn::common::DefaultNVGPUTarget(); auto prog = CreateAddProgram(); auto options = CinnComputation::DefaultCompileOptions(); @@ -309,7 +309,7 @@ TEST(cinn_computation, gpu_stream) { TEST(cinn_computation, without_instantiate_variables) { // this test only shows the API usage - auto target = common::DefaultHostTarget(); + auto target = cinn::common::DefaultHostTarget(); auto prog = CreateAddProgram(); auto options = CinnComputation::DefaultCompileOptions(); options.with_instantiate_variables = false; diff --git a/paddle/cinn/frontend/decomposer/activation.cc b/paddle/cinn/frontend/decomposer/activation.cc index 040d1af9b1b986..bde48429c6d35e 100644 --- a/paddle/cinn/frontend/decomposer/activation.cc +++ b/paddle/cinn/frontend/decomposer/activation.cc @@ -28,8 +28,10 @@ void relu(const Instruction& instr, const DecomposerContext& context) { auto output = instr->outputs[0]; auto* builder = context.builder(); - auto bcast_zero = builder->FillConstant( - x->shape, 0.0f, common::UniqName("zero"), common::Type2Str(x->type)); + auto bcast_zero = builder->FillConstant(x->shape, + 0.0f, + cinn::common::UniqName("zero"), + cinn::common::Type2Str(x->type)); auto out = builder->Max(x, bcast_zero); // map the the output of decomposed operator to the original. @@ -46,8 +48,10 @@ void relu_grad(const Instruction& instr, const DecomposerContext& context) { auto dx = instr->outputs[0]; auto* builder = context.builder(); - auto bcast_zero = builder->FillConstant( - out->shape, 0.0f, common::UniqName("zero"), common::Type2Str(out->type)); + auto bcast_zero = builder->FillConstant(out->shape, + 0.0f, + cinn::common::UniqName("zero"), + cinn::common::Type2Str(out->type)); auto condition = builder->GreaterThan(out, bcast_zero); auto res = builder->Select(condition, dout, bcast_zero); @@ -65,12 +69,14 @@ void gelu(const Instruction& instr, const DecomposerContext& context) { auto* builder = context.builder(); // x * (0.5 + 0.5 * erf(sqrtf(0.5) * x)) - auto p_5 = builder->FillConstant( - x->shape, 0.5f, common::UniqName("p_5"), common::Type2Str(x->type)); + auto p_5 = builder->FillConstant(x->shape, + 0.5f, + cinn::common::UniqName("p_5"), + cinn::common::Type2Str(x->type)); auto p_7 = builder->FillConstant(x->shape, std::sqrt(0.5), - common::UniqName("p_7"), - common::Type2Str(x->type)); + cinn::common::UniqName("p_7"), + cinn::common::Type2Str(x->type)); auto erf = builder->Erf(builder->Multiply(x, p_7)); auto cdf = builder->Add(p_5, builder->Multiply(p_5, erf)); auto out = builder->Multiply(x, cdf); diff --git a/paddle/cinn/frontend/decomposer/activation_test.cc b/paddle/cinn/frontend/decomposer/activation_test.cc index a024aa47537548..de8e6047011d27 100644 --- a/paddle/cinn/frontend/decomposer/activation_test.cc +++ b/paddle/cinn/frontend/decomposer/activation_test.cc @@ -77,7 +77,7 @@ TEST(Decomposer, softmax_decomposer) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); RunDecomposer(&program, target); auto graph = diff --git a/paddle/cinn/frontend/decomposer/batch_norm.cc b/paddle/cinn/frontend/decomposer/batch_norm.cc index 19c53bd506b8f9..b2d59053e43dee 100644 --- a/paddle/cinn/frontend/decomposer/batch_norm.cc +++ b/paddle/cinn/frontend/decomposer/batch_norm.cc @@ -80,8 +80,8 @@ struct BatchNormHelper { auto element_count_1d = builder->FillConstant(sum->shape, element_count, - common::UniqName("element_count"), - common::Type2Str(sum->type)); + cinn::common::UniqName("element_count"), + cinn::common::Type2Str(sum->type)); auto mean = builder->Divide(sum, element_count_1d); return mean; } @@ -93,8 +93,8 @@ struct BatchNormHelper { auto element_count_1d = builder->FillConstant(x_square_sum->shape, element_count, - common::UniqName("element_count"), - common::Type2Str(x_square_sum->type)); + cinn::common::UniqName("element_count"), + cinn::common::Type2Str(x_square_sum->type)); auto x_square_mean = builder->Divide(x_square_sum, element_count_1d); auto variance = builder->Subtract( x_square_mean, builder->Multiply(mean, builder->Identity(mean))); @@ -103,10 +103,11 @@ struct BatchNormHelper { // std_variance_inv = rsqrt(variance + epsilon) Variable StdVarianceInv1d(Variable variance, float epsilon) { - auto epsilon_1d = builder->FillConstant(variance->shape, - epsilon, - common::UniqName("epsilon"), - common::Type2Str(variance->type)); + auto epsilon_1d = + builder->FillConstant(variance->shape, + epsilon, + cinn::common::UniqName("epsilon"), + cinn::common::Type2Str(variance->type)); auto std_variance_inv = builder->Rsqrt(builder->Add(variance, epsilon_1d)); return std_variance_inv; } @@ -117,8 +118,8 @@ struct BatchNormHelper { auto epsilon_4d = builder->FillConstant(variance_4d->shape, epsilon, - common::UniqName("epsilon"), - common::Type2Str(variance_4d->type)); + cinn::common::UniqName("epsilon"), + cinn::common::Type2Str(variance_4d->type)); auto std_variance_inv_4d = builder->Rsqrt(builder->Add(variance_4d, epsilon_4d)); return std_variance_inv_4d; @@ -129,14 +130,16 @@ struct BatchNormHelper { Variable UpdateMeanVariance(Variable moving_value, Variable saved_value, float momentum) { - auto factor_0 = builder->FillConstant(moving_value->shape, - momentum, - common::UniqName("factor_0"), - common::Type2Str(moving_value->type)); - auto factor_1 = builder->FillConstant(saved_value->shape, - 1.0f - momentum, - common::UniqName("factor_1"), - common::Type2Str(saved_value->type)); + auto factor_0 = + builder->FillConstant(moving_value->shape, + momentum, + cinn::common::UniqName("factor_0"), + cinn::common::Type2Str(moving_value->type)); + auto factor_1 = + builder->FillConstant(saved_value->shape, + 1.0f - momentum, + cinn::common::UniqName("factor_1"), + cinn::common::Type2Str(saved_value->type)); auto new_moving_value = builder->Add(builder->Multiply(moving_value, factor_0), builder->Multiply(saved_value, factor_1)); @@ -253,11 +256,11 @@ void batch_norm_grad(const Instruction& instr, // => x_grad = tmp0 * (tmp1 - tmp2 - tmp3) auto scaled_std_variance_inv = builder->Multiply(scale, helper.StdVarianceInv1d(save_variance, epsilon)); - auto element_count_1d = - builder->FillConstant(scaled_std_variance_inv->shape, - helper.element_count, - common::UniqName("element_count_1d"), - common::Type2Str(scaled_std_variance_inv->type)); + auto element_count_1d = builder->FillConstant( + scaled_std_variance_inv->shape, + helper.element_count, + cinn::common::UniqName("element_count_1d"), + cinn::common::Type2Str(scaled_std_variance_inv->type)); auto tmp0 = builder->BroadcastTo( builder->Divide(scaled_std_variance_inv, element_count_1d), x->shape, @@ -266,8 +269,8 @@ void batch_norm_grad(const Instruction& instr, auto element_count_4d = builder->FillConstant(y_grad->shape, helper.element_count, - common::UniqName("element_count_4d"), - common::Type2Str(y_grad->type)); + cinn::common::UniqName("element_count_4d"), + cinn::common::Type2Str(y_grad->type)); auto tmp1 = builder->Multiply(y_grad, element_count_4d); auto tmp2 = builder->BroadcastTo(bias_grad, x->shape, {helper.channel_dim}); @@ -283,8 +286,8 @@ void batch_norm_grad(const Instruction& instr, auto epsilon_1d = builder->FillConstant(save_variance->shape, epsilon, - common::UniqName("epsilon"), - common::Type2Str(save_variance->type)); + cinn::common::UniqName("epsilon"), + cinn::common::Type2Str(save_variance->type)); auto variance_add_eps = builder->Add(save_variance, epsilon_1d); auto variance_add_eps_4d = builder->BroadcastTo(variance_add_eps, x->shape, {helper.channel_dim}); diff --git a/paddle/cinn/frontend/decomposer/batch_norm_test.cc b/paddle/cinn/frontend/decomposer/batch_norm_test.cc index 87c6cccd0dea7d..e395ab58b720c0 100644 --- a/paddle/cinn/frontend/decomposer/batch_norm_test.cc +++ b/paddle/cinn/frontend/decomposer/batch_norm_test.cc @@ -189,7 +189,7 @@ TEST(Decomposer, BatchNormTrain) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); RunDecomposer(&program, target, cinn::frontend::DefaultTrainingOptimizeOptions().program_passes, @@ -389,7 +389,7 @@ TEST(Decomposer, BatchNormGrad) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); RunDecomposer(&program, target, cinn::frontend::DefaultTrainingOptimizeOptions().program_passes, diff --git a/paddle/cinn/frontend/decomposer/test_helper.h b/paddle/cinn/frontend/decomposer/test_helper.h index 526cee8182ea31..4a7bb9b2f80918 100644 --- a/paddle/cinn/frontend/decomposer/test_helper.h +++ b/paddle/cinn/frontend/decomposer/test_helper.h @@ -85,7 +85,7 @@ void CopyFromVector(const std::vector& vec, size_t numel = tensor->shape().numel(); CHECK_EQ(vec.size(), numel); - if (target == common::DefaultNVGPUTarget()) { + if (target == cinn::common::DefaultNVGPUTarget()) { #ifdef CINN_WITH_CUDA cudaMemcpy(data, vec.data(), numel * sizeof(T), cudaMemcpyHostToDevice); #else @@ -204,7 +204,7 @@ void RunAndCheckShape(NetBuilder* builder, T high = 1, const std::vector& passes = {"Decomposer"}) { auto prog = builder->Build(); - Target target = common::DefaultTarget(); + Target target = cinn::common::DefaultTarget(); RunDecomposer(&prog, target, passes, output_names); auto graph = std::make_shared(prog, target); hlir::framework::ApplyPasses(graph.get(), DefaultOpFusionPasses()); diff --git a/paddle/cinn/frontend/decomposer/top_k_test.cc b/paddle/cinn/frontend/decomposer/top_k_test.cc index a01cbcec0f6934..5dc70e36921d4a 100644 --- a/paddle/cinn/frontend/decomposer/top_k_test.cc +++ b/paddle/cinn/frontend/decomposer/top_k_test.cc @@ -29,7 +29,7 @@ TEST(Decomposer, top_k_decomposer) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); RunDecomposer(&program, target); auto graph = diff --git a/paddle/cinn/frontend/decomposer_registry.h b/paddle/cinn/frontend/decomposer_registry.h index 258c81e3350083..3dc142468a9e5e 100644 --- a/paddle/cinn/frontend/decomposer_registry.h +++ b/paddle/cinn/frontend/decomposer_registry.h @@ -67,7 +67,7 @@ class InstrDecomposerRegistry : public Registry { } inline const Decomposer* Get(const std::string& op_name, - const common::Target& target) { + const cinn::common::Target& target) { const Decomposer* decomposer = Find(op_name, target); CHECK(decomposer) << "Decomposer for [" << op_name << ", " << target << "] is not registered"; @@ -75,7 +75,7 @@ class InstrDecomposerRegistry : public Registry { } inline const Decomposer* Find(const std::string& name, - const common::Target& target) { + const cinn::common::Target& target) { return Registry::Find(name + "_" + target.arch_str()); } diff --git a/paddle/cinn/frontend/decomposer_registry_test.cc b/paddle/cinn/frontend/decomposer_registry_test.cc index ad3828706b1a6f..125b6cce97c646 100644 --- a/paddle/cinn/frontend/decomposer_registry_test.cc +++ b/paddle/cinn/frontend/decomposer_registry_test.cc @@ -21,7 +21,7 @@ namespace cinn::frontend { TEST(InstrDecomposerRegistry, basic) { - common::Target target = common::DefaultHostTarget(); + cinn::common::Target target = cinn::common::DefaultHostTarget(); ASSERT_EQ(InstrDecomposerRegistry::Global()->Find("conv", target), nullptr); ASSERT_NE(InstrDecomposerRegistry::Global()->Find("relu", target), nullptr); } diff --git a/paddle/cinn/frontend/interpreter_test.cc b/paddle/cinn/frontend/interpreter_test.cc old mode 100755 new mode 100644 index ab3f211120b94b..240a82a646081c --- a/paddle/cinn/frontend/interpreter_test.cc +++ b/paddle/cinn/frontend/interpreter_test.cc @@ -24,7 +24,8 @@ namespace cinn::frontend { TEST(Interpreter, basic) { Interpreter executor({"A"}, {{1, 30}}); - executor.LoadPaddleModel(FLAGS_model_dir, common::DefaultTarget(), true); + executor.LoadPaddleModel( + FLAGS_model_dir, cinn::common::DefaultTarget(), true); executor.Run(); // fc_0.tmp_2 is eliminated by OpFusion, so here // change to get tenor of the out variable diff --git a/paddle/cinn/frontend/net_builder.cc b/paddle/cinn/frontend/net_builder.cc index c7abf882e413d1..b9f6135bdd5b5e 100644 --- a/paddle/cinn/frontend/net_builder.cc +++ b/paddle/cinn/frontend/net_builder.cc @@ -28,8 +28,8 @@ namespace cinn { namespace frontend { -using common::Context; -using common::Type; +using cinn::common::Context; +using cinn::common::Type; using hlir::framework::Operator; using utils::AttributeMap; using utils::ShapeType; @@ -275,7 +275,7 @@ Variable NetBuilder::FillConstant(const std::vector& shape, const std::string& name, const std::string& dtype, bool force_cpu) { - const auto& type = common::Str2Type(dtype); + const auto& type = cinn::common::Str2Type(dtype); utils::Attribute value; if (type.is_float()) { @@ -533,7 +533,7 @@ Variable NetBuilder::Cast(const Variable& operand, const std::string& dtype) { Variable NetBuilder::BitcastConvert(const Variable& operand, const std::string& dtype) { - std::string input_data_type = common::Type2Str(operand->type); + std::string input_data_type = cinn::common::Type2Str(operand->type); return CustomInstr("bitcast_convert", {operand}, {{"dtype", dtype}, {"input_data_type", input_data_type}}) @@ -1125,7 +1125,8 @@ Variable NetBuilder::Cholesky(const Variable& x, bool upper) { : LessEqual(index_row, index_col); auto mask_mat = Reshape(mask, {m, m}); auto mask_full = BroadcastTo(mask_mat, x->shape); - auto zeros = FillConstant(x->shape, 0.0f, "zeros", common::Type2Str(x->type)); + auto zeros = + FillConstant(x->shape, 0.0f, "zeros", cinn::common::Type2Str(x->type)); auto out = Select(mask_full, cholesky_out, zeros); return out; } diff --git a/paddle/cinn/frontend/net_builder.h b/paddle/cinn/frontend/net_builder.h index 17f33e8c2ac092..dde45e2d42fdaf 100644 --- a/paddle/cinn/frontend/net_builder.h +++ b/paddle/cinn/frontend/net_builder.h @@ -396,7 +396,7 @@ class NetBuilder { * @param id_hint The input variable's name. Default is None. * @return The new input. */ - Placeholder CreateInput(const common::Type& type, + Placeholder CreateInput(const cinn::common::Type& type, const cinn::utils::ShapeType& shape, const std::string& id_hint = ""); @@ -411,8 +411,9 @@ class NetBuilder { const T& value, const std::string& name = "", const std::string& dtype = "") { - auto true_dtype = - dtype.empty() ? common::Type2Str(common::type_of()) : dtype; + auto true_dtype = dtype.empty() + ? cinn::common::Type2Str(cinn::common::type_of()) + : dtype; auto out = CustomInstr( "const_scalar", {}, {{"value", value}, {"dtype", true_dtype}}) @@ -441,7 +442,8 @@ class NetBuilder { using TYPE = typename decltype(all_datas)::value_type; auto true_dtype = - dtype.empty() ? common::Type2Str(common::type_of()) : dtype; + dtype.empty() ? cinn::common::Type2Str(cinn::common::type_of()) + : dtype; const auto& real_shape = GetVectorShape(value); @@ -526,8 +528,11 @@ class NetBuilder { T value, const std::string& name = "", bool force_cpu = false) { - return FillConstant( - shape, value, name, common::Type2Str(common::type_of()), force_cpu); + return FillConstant(shape, + value, + name, + cinn::common::Type2Str(cinn::common::type_of()), + force_cpu); } /** diff --git a/paddle/cinn/frontend/net_builder_test.cc b/paddle/cinn/frontend/net_builder_test.cc index bede6049dbe38e..46839aa890ec5c 100644 --- a/paddle/cinn/frontend/net_builder_test.cc +++ b/paddle/cinn/frontend/net_builder_test.cc @@ -90,9 +90,9 @@ TEST(net_build, TestTransValidVarName) { TEST(net_build, program_execute_multi_elementwise_add) { auto program = CreateAddProgram(); #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif std::unordered_set fetch_ids; @@ -131,9 +131,9 @@ TEST(net_build, program_execute_fc) { auto program = builder.Build(); #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif std::unordered_set fetch_ids; @@ -176,9 +176,9 @@ TEST(net_build, program_execute_multi_elementwise_add_bf16) { auto program = builder.Build(); #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif std::unordered_set fetch_ids; @@ -218,9 +218,9 @@ TEST(net_build, program_execute_fc_bf16) { auto program = builder.Build(); #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif std::unordered_set fetch_ids; @@ -282,9 +282,9 @@ TEST(net_build, program_execute_pool2d) { auto program = builder.Build(); #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif std::unordered_set fetch_ids; @@ -314,9 +314,9 @@ TEST(net_build, program_execute_reverse) { auto program = builder.Build(); #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif std::unordered_set fetch_ids; @@ -348,9 +348,9 @@ TEST(net_build, program_execute_gather) { auto program = builder.Build(); #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif std::unordered_set fetch_ids; auto graph = Optimize(&program, fetch_ids, target); @@ -408,9 +408,9 @@ TEST(net_build, program_execute_gather_nd) { auto program = builder.Build(); #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif std::unordered_set fetch_ids; @@ -470,9 +470,9 @@ TEST(net_build, program_execute_cast) { auto program = builder.Build(); #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif std::unordered_set fetch_ids; auto graph = Optimize(&program, fetch_ids, target); @@ -525,9 +525,9 @@ TEST(net_build, program_execute_squeeze_case0) { auto program = builder.Build(); #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif std::unordered_set fetch_ids; auto graph = Optimize(&program, fetch_ids, target); @@ -585,9 +585,9 @@ TEST(net_build, program_execute_squeeze_case1) { auto program = builder.Build(); #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif std::unordered_set fetch_ids; auto graph = Optimize(&program, fetch_ids, target); @@ -645,9 +645,9 @@ TEST(net_build, program_execute_squeeze_case2) { auto program = builder.Build(); #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif std::unordered_set fetch_ids; auto graph = Optimize(&program, fetch_ids, target); @@ -704,9 +704,9 @@ TEST(net_build, program_execute_squeeze_case3) { auto program = builder.Build(); #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif std::unordered_set fetch_ids; auto graph = Optimize(&program, fetch_ids, target); @@ -763,9 +763,9 @@ TEST(net_build, program_execute_squeeze_case4) { auto program = builder.Build(); #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif std::unordered_set fetch_ids; auto graph = Optimize(&program, fetch_ids, target); @@ -820,9 +820,9 @@ TEST(net_build, program_execute_argsort) { auto program = builder.Build(); #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif std::unordered_set fetch_ids; auto graph = Optimize(&program, fetch_ids, target); @@ -882,9 +882,9 @@ TEST(net_build, program_execute_sort) { auto program = builder.Build(); #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif std::unordered_set fetch_ids; auto graph = Optimize(&program, fetch_ids, target); @@ -943,9 +943,9 @@ TEST(net_build, program_execute_arange_float) { auto program = builder.Build(); #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif std::unordered_set fetch_ids; auto graph = Optimize(&program, fetch_ids, target); @@ -985,9 +985,9 @@ TEST(net_build, program_execute_arange_int) { auto program = builder.Build(); #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif std::unordered_set fetch_ids; auto graph = Optimize(&program, fetch_ids, target); @@ -1029,9 +1029,9 @@ TEST(net_build, program_argmax_case1) { auto program = builder.Build(); #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif std::unordered_set fetch_ids; auto graph = Optimize(&program, fetch_ids, target); @@ -1107,7 +1107,7 @@ TEST(net_build, program_argmax_case2) { Variable output = builder.Argmax(input, 1, false); auto program = builder.Build(); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); std::unordered_set fetch_ids; auto graph = Optimize(&program, fetch_ids, target); @@ -1182,9 +1182,9 @@ TEST(net_build, program_argmin_case1) { Variable output = builder.Argmin(input, 1, true); auto program = builder.Build(); #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif std::unordered_set fetch_ids; @@ -1261,9 +1261,9 @@ TEST(net_build, program_argmin_case2) { Variable output = builder.Argmin(input, 1, false); auto program = builder.Build(); #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif std::unordered_set fetch_ids; auto graph = Optimize(&program, fetch_ids, target); @@ -1339,9 +1339,9 @@ TEST(net_build, program_execute_repeat_axis_0) { auto program = builder.Build(); #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif std::unordered_set fetch_ids; auto graph = Optimize(&program, fetch_ids, target); @@ -1395,9 +1395,9 @@ TEST(net_build, program_execute_repeat_axis_1) { auto program = builder.Build(); #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif std::unordered_set fetch_ids; auto graph = Optimize(&program, fetch_ids, target); @@ -1457,9 +1457,9 @@ TEST(net_build, program_execute_one_hot) { auto program = builder.Build(); #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif std::unordered_set fetch_ids; auto graph = Optimize(&program, fetch_ids, target); diff --git a/paddle/cinn/frontend/op_mapper_registry.h b/paddle/cinn/frontend/op_mapper_registry.h index 9351e60e8ff70f..396936bf63bdf6 100644 --- a/paddle/cinn/frontend/op_mapper_registry.h +++ b/paddle/cinn/frontend/op_mapper_registry.h @@ -59,7 +59,7 @@ class OpMapperContext { public: OpMapperContext( const hlir::framework::Scope& scope, - const common::Target& target, + const cinn::common::Target& target, NetBuilder* builder, std::unordered_map* var_map, std::unordered_map* var_model_to_program_map, @@ -99,7 +99,7 @@ class OpMapperContext { struct FeedInfo { std::vector shape; - common::Type type; + cinn::common::Type type; }; void AddFeedInfo(const std::string& name, const FeedInfo& info); @@ -108,7 +108,7 @@ class OpMapperContext { private: const hlir::framework::Scope& scope_; - const common::Target& target_; + const cinn::common::Target& target_; NetBuilder* builder_{nullptr}; std::unordered_map* var_map_{nullptr}; diff --git a/paddle/cinn/frontend/op_mappers/common_utils.h b/paddle/cinn/frontend/op_mappers/common_utils.h index 387a2c1fe7a8c9..61e9dc2cda93f4 100644 --- a/paddle/cinn/frontend/op_mappers/common_utils.h +++ b/paddle/cinn/frontend/op_mappers/common_utils.h @@ -181,7 +181,7 @@ inline std::string GetPaddleDtype(const paddle::cpp::OpDesc& op_desc, return ""; } - return common::Type2Str(dtype_cinn); + return cinn::common::Type2Str(dtype_cinn); } } // namespace utils diff --git a/paddle/cinn/frontend/op_mappers/paddle/clip.cc b/paddle/cinn/frontend/op_mappers/paddle/clip.cc index 1dc659b7410f45..f060ec4175fc99 100644 --- a/paddle/cinn/frontend/op_mappers/paddle/clip.cc +++ b/paddle/cinn/frontend/op_mappers/paddle/clip.cc @@ -37,7 +37,8 @@ void ClipOpMapper(const paddle::cpp::OpDesc& op_desc, << "The [Min] tensor shape of clip op should be [1], but here [" << cinn::utils::Join(min_val_tensor->shape, ", ") << "]"; if (x->type != min_val_tensor->type) { - min_val_tensor = builder->Cast(min_val_tensor, common::Type2Str(x->type)); + min_val_tensor = + builder->Cast(min_val_tensor, cinn::common::Type2Str(x->type)); } min_val_tensor = builder->BroadcastTo(min_val_tensor, x->shape); x = builder->Max(x, min_val_tensor); @@ -48,8 +49,8 @@ void ClipOpMapper(const paddle::cpp::OpDesc& op_desc, auto min_val_tensor = builder->FillConstant(x->shape, min_value, - common::UniqName(x->id + "_min"), - common::Type2Str(x->type)); + cinn::common::UniqName(x->id + "_min"), + cinn::common::Type2Str(x->type)); x = builder->Max(x, min_val_tensor); } @@ -62,7 +63,8 @@ void ClipOpMapper(const paddle::cpp::OpDesc& op_desc, << "The [Max] tensor shape of clip op should be [1], but here [" << cinn::utils::Join(max_val_tensor->shape, ", ") << "]"; if (x->type != max_val_tensor->type) { - max_val_tensor = builder->Cast(max_val_tensor, common::Type2Str(x->type)); + max_val_tensor = + builder->Cast(max_val_tensor, cinn::common::Type2Str(x->type)); } max_val_tensor = builder->BroadcastTo(max_val_tensor, x->shape); x = builder->Min(x, max_val_tensor); @@ -70,10 +72,11 @@ void ClipOpMapper(const paddle::cpp::OpDesc& op_desc, CHECK(op_desc.HasAttr("max")) << "The clip op should has [max] attribute or [Max] tensor input."; auto max_value = op_desc.GetAttr("max"); - auto max_val_tensor = builder->FillConstant(x->shape, - max_value, - common::UniqName("constant"), - common::Type2Str(x->type)); + auto max_val_tensor = + builder->FillConstant(x->shape, + max_value, + cinn::common::UniqName("constant"), + cinn::common::Type2Str(x->type)); x = builder->Min(x, max_val_tensor); } diff --git a/paddle/cinn/frontend/op_mappers/paddle/constant.cc b/paddle/cinn/frontend/op_mappers/paddle/constant.cc index 8f38bb4ee90340..ca11b9f5780831 100644 --- a/paddle/cinn/frontend/op_mappers/paddle/constant.cc +++ b/paddle/cinn/frontend/op_mappers/paddle/constant.cc @@ -91,7 +91,7 @@ void FillConstantOpMapper(const paddle::cpp::OpDesc& op_desc, CHECK(value_tensor->shape == cinn::utils::ShapeType{1}) << "The shape of [ValueTensor] should be [1], but here [" << cinn::utils::Join(value_tensor->shape, ", ") << "]"; - if (common::Type2Str(value_tensor->type) != dtype) { + if (cinn::common::Type2Str(value_tensor->type) != dtype) { value_tensor = ctx.Builder()->Cast(value_tensor, dtype); } out = ctx.Builder()->BroadcastTo(value_tensor, shape); @@ -131,7 +131,7 @@ void FillAnyLikeOpMapper(const paddle::cpp::OpDesc& op_desc, auto dtype = utils::GetPaddleDtype( op_desc, "dtype", paddle::cpp::VarDescAPI::Type::FP32); if (dtype.empty()) { - dtype = common::Type2Str(x->type); + dtype = cinn::common::Type2Str(x->type); } VLOG(4) << "FillAnyLikeOp: fill constant (" << value << ") with shape (" diff --git a/paddle/cinn/frontend/op_mappers/paddle/cumsum.cc b/paddle/cinn/frontend/op_mappers/paddle/cumsum.cc index 080d53302dc172..3482bacded2216 100644 --- a/paddle/cinn/frontend/op_mappers/paddle/cumsum.cc +++ b/paddle/cinn/frontend/op_mappers/paddle/cumsum.cc @@ -80,7 +80,7 @@ void CumsumOpMapper(const paddle::cpp::OpDesc& op_desc, mask = ctx.Builder()->BroadcastTo(mask, broadcast_shape); x = ctx.Builder()->BroadcastTo(x, broadcast_shape); auto false_value = ctx.Builder()->FillConstant( - x->shape, 0, UniqName("false_value"), common::Type2Str(x->type)); + x->shape, 0, UniqName("false_value"), cinn::common::Type2Str(x->type)); // Select elements with mask auto selected_x = ctx.Builder()->Select(mask, x, false_value); // Do reduce sum diff --git a/paddle/cinn/frontend/op_mappers/paddle/elementwise.cc b/paddle/cinn/frontend/op_mappers/paddle/elementwise.cc index 777b6a68d27af4..792ae1e922904d 100644 --- a/paddle/cinn/frontend/op_mappers/paddle/elementwise.cc +++ b/paddle/cinn/frontend/op_mappers/paddle/elementwise.cc @@ -223,7 +223,7 @@ void PowOpMapper(const paddle::cpp::OpDesc& op_desc, y = ctx.Builder()->FillConstant(x->shape, factor, cinn::UniqName(x_name + "_factor"), - common::Type2Str(x->type)); + cinn::common::Type2Str(x->type)); } else { LOG(FATAL) << "Cannot found [FactorTensor] input or [factor] attribute in " "paddle.pow! Please check."; diff --git a/paddle/cinn/frontend/op_mappers/paddle/layer_norm.cc b/paddle/cinn/frontend/op_mappers/paddle/layer_norm.cc index c9a138c3dbc4cd..3931145a5a05e4 100644 --- a/paddle/cinn/frontend/op_mappers/paddle/layer_norm.cc +++ b/paddle/cinn/frontend/op_mappers/paddle/layer_norm.cc @@ -94,10 +94,11 @@ void LayerNormOpMapper(const paddle::cpp::OpDesc& op_desc, std::vector shape{left, right}; auto x_reshape = builder->Reshape(x, shape); auto x_reduce = builder->ReduceSum(x_reshape, {1}); - auto ele_num = builder->FillConstant({left}, - static_cast(right), - common::UniqName("layer_norm_ele_num"), - common::Type2Str(x->type)); + auto ele_num = + builder->FillConstant({left}, + static_cast(right), + cinn::common::UniqName("layer_norm_ele_num"), + cinn::common::Type2Str(x->type)); auto x_mean = builder->Divide(x_reduce, ele_num); // use `E[|x|^2] - |E[x]|^2` instead of `E[|x - E[x]|^2])` to compute variance @@ -107,8 +108,8 @@ void LayerNormOpMapper(const paddle::cpp::OpDesc& op_desc, auto x_mean2 = builder->Multiply(x_mean, builder->Identity(x_mean)); auto zero = builder->FillConstant({left}, 0.f, - common::UniqName("layer_norm_zero"), - common::Type2Str(x->type)); + cinn::common::UniqName("layer_norm_zero"), + cinn::common::Type2Str(x->type)); auto x_var = builder->Max(builder->Subtract(x2_mean, x_mean2), zero); // compute x norm @@ -117,8 +118,8 @@ void LayerNormOpMapper(const paddle::cpp::OpDesc& op_desc, auto epsilon_var = builder->FillConstant({left}, epsilon, - common::UniqName("layer_norm_epsilon"), - common::Type2Str(x->type)); + cinn::common::UniqName("layer_norm_epsilon"), + cinn::common::Type2Str(x->type)); auto x_var_eps = builder->Add(x_var, epsilon_var); auto x_var_sqrt = builder->Sqrt(x_var_eps); auto y_out = diff --git a/paddle/cinn/frontend/op_mappers/paddle/norm.cc b/paddle/cinn/frontend/op_mappers/paddle/norm.cc index 8e45ead8bf185f..48731adcae060c 100644 --- a/paddle/cinn/frontend/op_mappers/paddle/norm.cc +++ b/paddle/cinn/frontend/op_mappers/paddle/norm.cc @@ -41,10 +41,11 @@ struct NormHelper { // std_square_sum = sqrt(square_sum + epsilon) Variable StdSquareSum(Variable square_sum, float epsilon) { - auto epsilon_1d = builder->FillConstant(square_sum->shape, - epsilon, - common::UniqName("norm_epsilon"), - common::Type2Str(square_sum->type)); + auto epsilon_1d = + builder->FillConstant(square_sum->shape, + epsilon, + cinn::common::UniqName("norm_epsilon"), + cinn::common::Type2Str(square_sum->type)); auto std_square_sum = builder->Sqrt(builder->Add(square_sum, epsilon_1d)); return std_square_sum; } @@ -99,14 +100,14 @@ void NormOpMapper(const paddle::cpp::OpDesc& op_desc, auto square_sum = helper.SquareSum(x); auto std_square_sum = helper.StdSquareSum(square_sum, epsilon); auto normalized = ctx.Builder()->Divide(x, std_square_sum); - auto y = ctx.Builder()->Cast(normalized, common::Type2Str(in_type)); + auto y = ctx.Builder()->Cast(normalized, cinn::common::Type2Str(in_type)); ctx.AddVar(out_name, y); ctx.AddVarModelToProgram(out_name, y->id); if (!norm_name.empty()) { auto norm_grad = - ctx.Builder()->Cast(std_square_sum, common::Type2Str(in_type)); + ctx.Builder()->Cast(std_square_sum, cinn::common::Type2Str(in_type)); ctx.AddVar(norm_name, norm_grad); ctx.AddVarModelToProgram(norm_name, norm_grad->id); } diff --git a/paddle/cinn/frontend/op_mappers/paddle/reduce.cc b/paddle/cinn/frontend/op_mappers/paddle/reduce.cc index 0d52d7ec6d7cd4..9162a1158edfaf 100644 --- a/paddle/cinn/frontend/op_mappers/paddle/reduce.cc +++ b/paddle/cinn/frontend/op_mappers/paddle/reduce.cc @@ -85,7 +85,7 @@ void ReduceOpMapper(const paddle::cpp::OpDesc& op_desc, auto dtype = utils::GetPaddleDtype( op_desc, "out_dtype", static_cast(-1)); - if (!dtype.empty() && common::Type2Str(out.value()->type) != dtype) { + if (!dtype.empty() && cinn::common::Type2Str(out.value()->type) != dtype) { out = ctx.Builder()->Cast(out.value(), dtype); } diff --git a/paddle/cinn/frontend/op_mappers/paddle/scale.cc b/paddle/cinn/frontend/op_mappers/paddle/scale.cc index b8c8b8c5498850..639af845edefed 100644 --- a/paddle/cinn/frontend/op_mappers/paddle/scale.cc +++ b/paddle/cinn/frontend/op_mappers/paddle/scale.cc @@ -49,12 +49,13 @@ void ScaleOpMapper(const paddle::cpp::OpDesc& op_desc, CHECK(scale_tensor->shape == cinn::utils::ShapeType{1}) << "The shape of [ScaleTensor] should be [1], but here [" << cinn::utils::Join(scale_tensor->shape, ", ") << "]"; - scale_tensor = ctx.Builder()->Cast(scale_tensor, common::Type2Str(x->type)); + scale_tensor = + ctx.Builder()->Cast(scale_tensor, cinn::common::Type2Str(x->type)); scale_tensor = ctx.Builder()->BroadcastTo(scale_tensor, x->shape); if (bias != 0.0f) { auto bias_tensor = ctx.Builder()->FillConstant( - x->shape, bias, x->id + "_bias", common::Type2Str(x->type)); + x->shape, bias, x->id + "_bias", cinn::common::Type2Str(x->type)); if (bias_after_scale) { out = ctx.Builder()->Add(bias_tensor, ctx.Builder()->Multiply(x, scale_tensor)); diff --git a/paddle/cinn/frontend/op_mappers/paddle/scatter.cc b/paddle/cinn/frontend/op_mappers/paddle/scatter.cc index ca7f15ab254c7e..8be9b563b4cd1e 100644 --- a/paddle/cinn/frontend/op_mappers/paddle/scatter.cc +++ b/paddle/cinn/frontend/op_mappers/paddle/scatter.cc @@ -40,10 +40,12 @@ void ScatterOpMapper(const paddle::cpp::OpDesc& op_desc, const auto& updates = ctx.GetVar(updates_name); CHECK(input->type == updates->type) << "checks whether the type of the input and the updates are the same."; - CHECK(indices->type == common::Int(32) || indices->type == common::Int(64)) + CHECK(indices->type == cinn::common::Int(32) || + indices->type == cinn::common::Int(64)) << "checks whether the data type of the indices is either int32 or int64"; - if (indices->type == common::Int(64)) { - indices = ctx.Builder()->Cast(indices, common::Type2Str(common::Int(32))); + if (indices->type == cinn::common::Int(64)) { + indices = ctx.Builder()->Cast( + indices, cinn::common::Type2Str(cinn::common::Int(32))); } CHECK_LE(indices->shape.size(), 2) << "Ids should be 0, 1 or 2 in scatter_op"; if (indices->shape.size() == 0) { @@ -61,8 +63,8 @@ void ScatterOpMapper(const paddle::cpp::OpDesc& op_desc, const auto& zeros = ctx.Builder()->FillConstant(updates->shape, 0, - common::UniqName("scatter_zeros"), - common::Type2Str(updates->type)); + cinn::common::UniqName("scatter_zeros"), + cinn::common::Type2Str(updates->type)); out = ctx.Builder()->ScatterAssign(input, zeros, indices); out = ctx.Builder()->ScatterAdd(out, updates, indices); } diff --git a/paddle/cinn/frontend/op_mappers/science/broadcast.cc b/paddle/cinn/frontend/op_mappers/science/broadcast.cc index f5b3f9cd20f90c..a4cdf22391ec22 100644 --- a/paddle/cinn/frontend/op_mappers/science/broadcast.cc +++ b/paddle/cinn/frontend/op_mappers/science/broadcast.cc @@ -33,7 +33,7 @@ void FillConstantOpMapper(const paddle::cpp::OpDesc& op_desc, op_desc, "dtype", static_cast(paddle::cpp::VarDescAPI::Type::FP32)); auto dtype_pd = static_cast(dtype_id); auto dtype_cinn = utils::CppVarType2CommonType(dtype_pd); - auto dtype = common::Type2Str(dtype_cinn); + auto dtype = cinn::common::Type2Str(dtype_cinn); VLOG(4) << "fill constant (" << value << ") with shape (" << cinn::utils::Join(shape, ",") << ") and dtype [" << dtype << "]"; diff --git a/paddle/cinn/frontend/op_mappers/science/transform.cc b/paddle/cinn/frontend/op_mappers/science/transform.cc index 45faa1961790dc..a58f93fd275979 100644 --- a/paddle/cinn/frontend/op_mappers/science/transform.cc +++ b/paddle/cinn/frontend/op_mappers/science/transform.cc @@ -413,7 +413,7 @@ void CastOpMapper(const paddle::cpp::OpDesc& op_desc, op_desc, "dtype", static_cast(paddle::cpp::VarDescAPI::Type::FP32)); auto dtype_pd = static_cast(dtype_id); auto dtype_cinn = utils::CppVarType2CommonType(dtype_pd); - auto dtype = common::Type2Str(dtype_cinn); + auto dtype = cinn::common::Type2Str(dtype_cinn); VLOG(4) << out_name << " = cast(" << x_name << ", dtype=" << dtype << ")"; diff --git a/paddle/cinn/frontend/optimize.cc b/paddle/cinn/frontend/optimize.cc index 1f4572a87d47ed..d88952cabefba6 100644 --- a/paddle/cinn/frontend/optimize.cc +++ b/paddle/cinn/frontend/optimize.cc @@ -134,7 +134,7 @@ std::vector DefaultOpFusionPasses() { std::shared_ptr Optimize( frontend::Program* program, const std::unordered_set& fetch_ids, - common::Target target, + cinn::common::Target target, const OptimizeOptions& options) { cinn::hlir::framework::PassPrinter::GetInstance()->Begin(fetch_ids); // Apply program passes @@ -154,7 +154,7 @@ std::shared_ptr Optimize( std::shared_ptr Optimize( frontend::Program* program, const std::unordered_set& fetch_ids, - common::Target target, + cinn::common::Target target, const std::vector& passes) { OptimizeOptions options; diff --git a/paddle/cinn/frontend/optimize.h b/paddle/cinn/frontend/optimize.h index 543c027308d7b1..b382d0c5a6fb4c 100755 --- a/paddle/cinn/frontend/optimize.h +++ b/paddle/cinn/frontend/optimize.h @@ -38,13 +38,13 @@ std::vector DefaultOpFusionPasses(); std::shared_ptr Optimize( frontend::Program* program, const std::unordered_set& fetch_ids, - common::Target target, + cinn::common::Target target, const OptimizeOptions& options = DefaultTrainingOptimizeOptions()); std::shared_ptr Optimize( frontend::Program* program, const std::unordered_set& fetch_ids, - common::Target target, + cinn::common::Target target, const std::vector& passes); } // namespace frontend diff --git a/paddle/cinn/frontend/paddle/model_parser.cc b/paddle/cinn/frontend/paddle/model_parser.cc index 5c2bf8eb37363b..dce003b225fe74 100644 --- a/paddle/cinn/frontend/paddle/model_parser.cc +++ b/paddle/cinn/frontend/paddle/model_parser.cc @@ -49,7 +49,7 @@ int SizeOfType(framework_proto::VarType::Type type) { void TensorFromStream(std::istream &is, hlir::framework::_Tensor_ *tensor, - const common::Target &target) { + const cinn::common::Target &target) { using Type = framework_proto::VarType::Type; uint32_t version; is.read(reinterpret_cast(&version), sizeof(version)); @@ -117,7 +117,7 @@ void TensorFromStream(std::istream &is, void LoadLoDTensor(std::istream &is, hlir::framework::Variable *var, - const common::Target &target) { + const cinn::common::Target &target) { auto &tensor = absl::get(*var); uint32_t version{}; is.read(reinterpret_cast(&version), sizeof(version)); @@ -170,7 +170,7 @@ void LoadParams(const std::string &path) {} // Load directly to CPU, and latter transfer to other devices. void LoadParam(const std::string &path, hlir::framework::Variable *out, - const common::Target &target) { + const cinn::common::Target &target) { std::ifstream fin(path, std::ios::binary); CHECK(fin.is_open()) << "failed to open file " << path; LoadLoDTensor(fin, out, target); @@ -190,7 +190,7 @@ void LoadCombinedParamsPb(const std::string &path, hlir::framework::Scope *scope, const cpp::ProgramDesc &cpp_prog, bool params_from_memory, - const common::Target &target) { + const cinn::common::Target &target) { CHECK(scope); auto prog = cpp_prog; auto &main_block_desc = *prog.GetBlock(0); @@ -236,7 +236,7 @@ void LoadModelPb(const std::string &model_dir, cpp::ProgramDesc *cpp_prog, bool combined, bool model_from_memory, - const common::Target &target) { + const cinn::common::Target &target) { CHECK(cpp_prog); CHECK(scope); cpp_prog->ClearBlocks(); diff --git a/paddle/cinn/frontend/paddle/model_parser.h b/paddle/cinn/frontend/paddle/model_parser.h index 8bc10108b79de7..03834a7f525c2e 100644 --- a/paddle/cinn/frontend/paddle/model_parser.h +++ b/paddle/cinn/frontend/paddle/model_parser.h @@ -30,14 +30,15 @@ namespace cinn::frontend::paddle { namespace framework_proto = ::cinn::frontend::paddle::proto; // Read a model and files of parameters in pb format. -void LoadModelPb(const std::string& model_dir, - const std::string& model_file, - const std::string& param_file, - hlir::framework::Scope* scope, - cpp::ProgramDesc* cpp_prog, - bool combined = true, - bool model_from_memory = false, - const common::Target& target = common::DefaultHostTarget()); +void LoadModelPb( + const std::string& model_dir, + const std::string& model_file, + const std::string& param_file, + hlir::framework::Scope* scope, + cpp::ProgramDesc* cpp_prog, + bool combined = true, + bool model_from_memory = false, + const cinn::common::Target& target = cinn::common::DefaultHostTarget()); // Read a __model__ file. std::unique_ptr LoadProgram( @@ -45,7 +46,7 @@ std::unique_ptr LoadProgram( void LoadLoDTensor(std::istream& is, hlir::framework::Variable* var, - const common::Target& target); + const cinn::common::Target& target); // Read a single file containing all the parameters. void LoadParams(const std::string& path); @@ -53,21 +54,21 @@ void LoadParams(const std::string& path); // Load a single parameter to an output tensor. void LoadParam(const std::string& path, hlir::framework::Variable* out, - const common::Target& target); + const cinn::common::Target& target); void LoadCombinedParamsPb( const std::string& path, hlir::framework::Scope* scope, const pb::ProgramDesc& prog, bool params_from_memory = false, - const common::Target& target = common::DefaultHostTarget()); + const cinn::common::Target& target = cinn::common::DefaultHostTarget()); // LoDTensor to ostream void TensorToStream(std::ostream& os, const hlir::framework::_Tensor_& tensor); void TensorFromStream( std::istream& is, hlir::framework::_Tensor_* tensor, - const common::Target& target = common::DefaultHostTarget()); + const cinn::common::Target& target = cinn::common::DefaultHostTarget()); void ReadBinaryFile(const std::string& filename, std::string* contents); } // namespace cinn::frontend::paddle diff --git a/paddle/cinn/frontend/paddle_model_convertor.cc b/paddle/cinn/frontend/paddle_model_convertor.cc index 1e4aa1eb46b221..a70818f38af219 100644 --- a/paddle/cinn/frontend/paddle_model_convertor.cc +++ b/paddle/cinn/frontend/paddle_model_convertor.cc @@ -35,10 +35,10 @@ namespace frontend { using cinn::utils::Attribute; PaddleModelConvertor::PaddleModelConvertor() - : PaddleModelConvertor(common::DefaultTarget(), nullptr, nullptr) {} + : PaddleModelConvertor(cinn::common::DefaultTarget(), nullptr, nullptr) {} PaddleModelConvertor::PaddleModelConvertor( - const common::Target& target, + const cinn::common::Target& target, std::shared_ptr builder, std::shared_ptr scope) : target_(target), builder_(builder), scope_(scope) { @@ -241,7 +241,7 @@ Program PaddleModelConvertor::operator()() { return builder_->Build(); } void PaddleModelConvertor::CreateInput(const std::string& dtype, const cinn::utils::ShapeType& shape, const std::string& name) { - OpMapperContext::FeedInfo feed_info = {shape, common::Str2Type(dtype)}; + OpMapperContext::FeedInfo feed_info = {shape, cinn::common::Str2Type(dtype)}; ctx_->AddFeedInfo(name, feed_info); RunOp("feed", {}, {{"Out", {name}}}, {}); diff --git a/paddle/cinn/frontend/paddle_model_convertor.h b/paddle/cinn/frontend/paddle_model_convertor.h index ee83223d8c965f..a6243ac369bae8 100644 --- a/paddle/cinn/frontend/paddle_model_convertor.h +++ b/paddle/cinn/frontend/paddle_model_convertor.h @@ -41,7 +41,7 @@ class PaddleModelConvertor { public: PaddleModelConvertor(); - PaddleModelConvertor(const common::Target& target, + PaddleModelConvertor(const cinn::common::Target& target, std::shared_ptr builder = nullptr, std::shared_ptr scope = nullptr); @@ -103,7 +103,7 @@ class PaddleModelConvertor { std::unique_ptr ctx_; std::shared_ptr builder_; - const common::Target& target_; + const cinn::common::Target& target_; std::shared_ptr scope_; }; diff --git a/paddle/cinn/frontend/paddle_model_convertor_test.cc b/paddle/cinn/frontend/paddle_model_convertor_test.cc index 953a3919601320..30364c05e417e7 100644 --- a/paddle/cinn/frontend/paddle_model_convertor_test.cc +++ b/paddle/cinn/frontend/paddle_model_convertor_test.cc @@ -92,7 +92,7 @@ void RunProgram(const Target& target, Program* prog) { } TEST(PaddleModelConvertor, basic) { - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); PaddleModelConvertor model_transform(target); model_transform.LoadModel(FLAGS_model_dir); diff --git a/paddle/cinn/frontend/paddle_model_to_program.cc b/paddle/cinn/frontend/paddle_model_to_program.cc index e17ca8863b5f57..52c91216dd9011 100644 --- a/paddle/cinn/frontend/paddle_model_to_program.cc +++ b/paddle/cinn/frontend/paddle_model_to_program.cc @@ -98,7 +98,8 @@ void PaddleModelToProgram::AddOpMapper_scale() { CHECK(scale_tensor_var) << "No scale tensor found in the scope"; auto& scale_tensor = absl::get(*scale_tensor_var); - scale = scale_tensor->mutable_data(common::DefaultHostTarget())[0]; + scale = scale_tensor->mutable_data( + cinn::common::DefaultHostTarget())[0]; } if (op_desc.HasAttr("bias")) { // the old model format bias = op_desc.GetAttr("bias"); diff --git a/paddle/cinn/frontend/paddle_model_to_program.h b/paddle/cinn/frontend/paddle_model_to_program.h index ab520e608de377..2fe376a6086191 100644 --- a/paddle/cinn/frontend/paddle_model_to_program.h +++ b/paddle/cinn/frontend/paddle_model_to_program.h @@ -43,7 +43,7 @@ class PaddleModelToProgram { explicit PaddleModelToProgram( hlir::framework::Scope* scope, std::unordered_map> input_shape_map, - const common::Target& target) + const cinn::common::Target& target) : scope_(scope), input_shape_map_(input_shape_map), target_(target), @@ -145,7 +145,7 @@ class PaddleModelToProgram { // map from var in Paddle model to var name in program. absl::flat_hash_map var_model_to_program_map_; hlir::framework::Scope* scope_{}; - common::Target target_; + cinn::common::Target target_; }; } // namespace frontend diff --git a/paddle/cinn/frontend/pass/auto_broadcast.cc b/paddle/cinn/frontend/pass/auto_broadcast.cc index 558105a44ad25e..785ceb9cad4a15 100644 --- a/paddle/cinn/frontend/pass/auto_broadcast.cc +++ b/paddle/cinn/frontend/pass/auto_broadcast.cc @@ -118,7 +118,7 @@ class AutoBroadcastPass : public ProgramPass { protected: void ApplyImpl(Program* program, const std::unordered_set& fetch_ids, - const common::Target& target) override { + const cinn::common::Target& target) override { NetBuilder builder("auto_broadcast_builder"); for (auto& var : program->GetInputs()) { builder.CreateInput(var); diff --git a/paddle/cinn/frontend/pass/auto_cast.cc b/paddle/cinn/frontend/pass/auto_cast.cc index 838ff8b06f1ddf..ebe129af680e46 100644 --- a/paddle/cinn/frontend/pass/auto_cast.cc +++ b/paddle/cinn/frontend/pass/auto_cast.cc @@ -39,8 +39,9 @@ Instruction CreateNewCastInstruction(const Variable& input, const Variable& output) { Instruction new_cast_instr("cast", {input}); new_cast_instr->outputs = {output}; - new_cast_instr->attrs = {{"dtype", common::Type2Str(output->type)}}; - new_cast_instr->attrs_ordered = {{"dtype", common::Type2Str(output->type)}}; + new_cast_instr->attrs = {{"dtype", cinn::common::Type2Str(output->type)}}; + new_cast_instr->attrs_ordered = { + {"dtype", cinn::common::Type2Str(output->type)}}; return new_cast_instr; } @@ -256,7 +257,7 @@ class AutoCastPass : public ProgramPass { protected: void ApplyImpl(Program* program, const std::unordered_set& fetch_ids, - const common::Target& target) override { + const cinn::common::Target& target) override { NetBuilder builder("auto_cast_builder"); for (auto& var : program->GetInputs()) { builder.CreateInput(var); diff --git a/paddle/cinn/frontend/pass/auto_cast_test.cc b/paddle/cinn/frontend/pass/auto_cast_test.cc index 4b570a2755cdbc..80532a39a3bba0 100644 --- a/paddle/cinn/frontend/pass/auto_cast_test.cc +++ b/paddle/cinn/frontend/pass/auto_cast_test.cc @@ -33,11 +33,11 @@ namespace cinn::frontend { TEST(AutoCast, Exp) { NetBuilder builder("net_builder"); - auto x = builder.CreateInput(common::Float16(), {4, 5, 3}, "X"); + auto x = builder.CreateInput(cinn::common::Float16(), {4, 5, 3}, "X"); auto out = builder.Exp(x); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::pair, std::vector> passes{ {}, {"AutoCast", "Decomposer"}}; CompareProgramPassResult(&program, target, {out->id}, -2, passes); @@ -45,11 +45,11 @@ TEST(AutoCast, Exp) { TEST(AutoCast, Exp_bf16) { NetBuilder builder("net_builder"); - auto x = builder.CreateInput(common::BFloat16(), {4, 5, 3}, "X"); + auto x = builder.CreateInput(cinn::common::BFloat16(), {4, 5, 3}, "X"); auto out = builder.Exp(x); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::pair, std::vector> passes{ {}, {"AutoCast", "Decomposer"}}; CompareProgramPassResult(&program, target, {out->id}, -2, passes); @@ -57,7 +57,8 @@ TEST(AutoCast, Exp_bf16) { TEST(AutoCast, BatchNorm) { NetBuilder builder("net_builder"); - auto x = builder.CreateInput(common::Float16(), {128, 64, 112, 112}, "X"); + auto x = + builder.CreateInput(cinn::common::Float16(), {128, 64, 112, 112}, "X"); auto scale = builder.FillConstant({64}, 1.0f, "scale", "float32"); auto bias = builder.FillConstant({64}, 0.0f, "bias", "float32"); auto mean = builder.FillConstant({64}, 0.0f, "mean", "float32"); @@ -66,7 +67,7 @@ TEST(AutoCast, BatchNorm) { x, scale, bias, mean, variance, 1e-5f, 0.9f, "NCHW", false); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::pair, std::vector> passes{ {}, {"AutoCast", "Decomposer"}}; CompareProgramPassResult(&program, target, {out[0]->id}, -2, passes); @@ -74,7 +75,8 @@ TEST(AutoCast, BatchNorm) { TEST(AutoCast, BatchNorm_bf16) { NetBuilder builder("net_builder"); - auto x = builder.CreateInput(common::BFloat16(), {128, 64, 112, 112}, "X"); + auto x = + builder.CreateInput(cinn::common::BFloat16(), {128, 64, 112, 112}, "X"); auto scale = builder.FillConstant({64}, 1.0f, "scale", "float32"); auto bias = builder.FillConstant({64}, 0.0f, "bias", "float32"); auto mean = builder.FillConstant({64}, 0.0f, "mean", "float32"); @@ -83,7 +85,7 @@ TEST(AutoCast, BatchNorm_bf16) { x, scale, bias, mean, variance, 1e-5f, 0.9f, "NCHW", false); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::pair, std::vector> passes{ {}, {"AutoCast", "Decomposer"}}; CompareProgramPassResult(&program, target, {out[0]->id}, -2, passes); diff --git a/paddle/cinn/frontend/pass/cast_collapsing.cc b/paddle/cinn/frontend/pass/cast_collapsing.cc index 5fc40d407029f5..7667336f544702 100644 --- a/paddle/cinn/frontend/pass/cast_collapsing.cc +++ b/paddle/cinn/frontend/pass/cast_collapsing.cc @@ -67,7 +67,7 @@ class CastCollapsingPass : public ProgramPass { void ApplyImpl(Program* program, const std::unordered_set& fetch_ids, - const common::Target& target) const override { + const cinn::common::Target& target) const override { // `out2instr` is used to represent the mapping of Output to Instruction. OutputToOpMap out2instr; // `in2instr` is used to represent the mapping of Input to Instruction. diff --git a/paddle/cinn/frontend/pass/cast_collapsing_test.cc b/paddle/cinn/frontend/pass/cast_collapsing_test.cc index 8384002c872cfb..42415da8ecdaa1 100644 --- a/paddle/cinn/frontend/pass/cast_collapsing_test.cc +++ b/paddle/cinn/frontend/pass/cast_collapsing_test.cc @@ -42,7 +42,7 @@ TEST(CastCollapsing, FuseTwoCast) { auto out = builder.Cast(x_t, "float32"); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{x.id()}, std::back_inserter(input_ids), @@ -63,7 +63,7 @@ TEST(CastCollapsing, FuseThreeCast) { auto out = builder.Cast(x_2t, "float32"); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{x.id()}, std::back_inserter(input_ids), @@ -82,7 +82,7 @@ TEST(CastCollapsing, ReplaceUselessCastWithIndentity) { auto out = builder.Cast(x, "float32"); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{x.id()}, std::back_inserter(input_ids), @@ -104,7 +104,7 @@ TEST(CastCollapsing, FuseCastToUseless) { auto out = builder.Add(x_3t, x_3t); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{x.id()}, std::back_inserter(input_ids), @@ -128,7 +128,7 @@ TEST(TransposeCollapsing, FuseTransposeWithMultiOutput) { auto out3 = builder.Transpose(x_3t, {0, 2, 1}); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{x.id()}, std::back_inserter(input_ids), @@ -159,7 +159,7 @@ TEST(TransposeCollapsing, FuseTwoSecTranspose) { auto out2 = builder.Transpose(x_2t, {0, 2, 1}); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{x.id()}, std::back_inserter(input_ids), @@ -181,7 +181,7 @@ TEST(TransposeCollapsing, FuseTwoHorizontalTranspose) { auto out = builder.Add(y_t1, y_t2); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{x.id()}, std::back_inserter(input_ids), @@ -203,7 +203,7 @@ TEST(TransposeCollapsing, FuseVerAndHorTranspose) { auto out = builder.Add(y_t2, y_t3); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{x.id()}, std::back_inserter(input_ids), diff --git a/paddle/cinn/frontend/pass/dead_code_eliminate.cc b/paddle/cinn/frontend/pass/dead_code_eliminate.cc index 0c093cf75fd024..2776135281dc0a 100644 --- a/paddle/cinn/frontend/pass/dead_code_eliminate.cc +++ b/paddle/cinn/frontend/pass/dead_code_eliminate.cc @@ -35,7 +35,7 @@ class DeadCodeEliminatePass : public ProgramPass { void ApplyImpl(Program* program, const std::unordered_set& fetch_ids, - const common::Target& target) override { + const cinn::common::Target& target) override { if (!CheckFetchIds(*program, fetch_ids)) { return; } diff --git a/paddle/cinn/frontend/pass/dead_code_eliminate_test.cc b/paddle/cinn/frontend/pass/dead_code_eliminate_test.cc index 7e418f394dae35..d2eb4faa70723d 100644 --- a/paddle/cinn/frontend/pass/dead_code_eliminate_test.cc +++ b/paddle/cinn/frontend/pass/dead_code_eliminate_test.cc @@ -46,7 +46,7 @@ TEST(DeadCodeEliminate, remove_single) { std::vector input_names = {x.id().data()}; std::vector output_names = {identity_1->id, reduce_sum_2->id}; - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::pair, std::vector> passes{ {"Decomposer"}, {"DeadCodeEliminate"}}; CompareResult( @@ -75,7 +75,7 @@ TEST(DeadCodeEliminate, remove_multiple) { std::vector input_names = {x.id().data()}; std::vector output_names = {reduce_sum_1->id}; - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::pair, std::vector> passes{ {"Decomposer"}, {"DeadCodeEliminate"}}; CompareResult( diff --git a/paddle/cinn/frontend/pass/decomposer.cc b/paddle/cinn/frontend/pass/decomposer.cc index b18ac57be73f31..06c685b418817c 100755 --- a/paddle/cinn/frontend/pass/decomposer.cc +++ b/paddle/cinn/frontend/pass/decomposer.cc @@ -30,7 +30,7 @@ class DecomposerPass : public ProgramPass { void ApplyImpl(Program* prog, const std::unordered_set& fetch_ids, - const common::Target& target) const override { + const cinn::common::Target& target) const override { // step 1: set the inputs of the origin program to the new program NetBuilder builder("decomposer_builder"); for (auto& var : prog->GetInputs()) { diff --git a/paddle/cinn/frontend/pass/decomposer_test.cc b/paddle/cinn/frontend/pass/decomposer_test.cc index 811d38ea693ddb..f38f86de4f07cb 100644 --- a/paddle/cinn/frontend/pass/decomposer_test.cc +++ b/paddle/cinn/frontend/pass/decomposer_test.cc @@ -60,9 +60,9 @@ TEST(DecomposePass, basic) { } #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif ProgramPass::Apply(&prog, {}, target, {"Decomposer"}); diff --git a/paddle/cinn/frontend/pass/expand_zero_dim_pass.cc b/paddle/cinn/frontend/pass/expand_zero_dim_pass.cc index 9732478c75b237..cd058e0a2ecdeb 100644 --- a/paddle/cinn/frontend/pass/expand_zero_dim_pass.cc +++ b/paddle/cinn/frontend/pass/expand_zero_dim_pass.cc @@ -32,7 +32,7 @@ class ExpandZeroDimPass : public ProgramPass { protected: void ApplyImpl(Program* program, const std::unordered_set& fetch_ids, - const common::Target& target) override { + const cinn::common::Target& target) override { NetBuilder builder("expand_zero_dim_builder"); for (int i = 0; i < program->size(); ++i) { auto& instr = (*program)[i]; diff --git a/paddle/cinn/frontend/pass/expand_zero_dim_pass_test.cc b/paddle/cinn/frontend/pass/expand_zero_dim_pass_test.cc index 9f65ec5d0ea33a..6420dede273c6d 100644 --- a/paddle/cinn/frontend/pass/expand_zero_dim_pass_test.cc +++ b/paddle/cinn/frontend/pass/expand_zero_dim_pass_test.cc @@ -85,7 +85,7 @@ TEST(ExpandZeroDimPass, expand_zero_dim_1) { auto y = builder.CreateInput(Float(32), {}, "y"); auto out = builder.Add(x, y); auto program = builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); size_t origin_size = program.size(); VLOG(1) << "Program Before ExpandZeroDimPass:\n" << program; @@ -125,7 +125,7 @@ TEST(ExpandZeroDimPass, expand_zero_dim_2) { auto y = builder.CreateInput(Float(32), {}, "y"); auto out = builder.Add(x, y); auto program = builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); size_t origin_size = program.size(); VLOG(1) << "Program Before ExpandZeroDimPass:\n" << program; diff --git a/paddle/cinn/frontend/pass/fill_constant_folding.cc b/paddle/cinn/frontend/pass/fill_constant_folding.cc index c6ee33bc6c79cc..b731c9ab3cf6a2 100644 --- a/paddle/cinn/frontend/pass/fill_constant_folding.cc +++ b/paddle/cinn/frontend/pass/fill_constant_folding.cc @@ -94,7 +94,7 @@ class FillConstantFoldingPass : public ProgramPass { void ApplyImpl(Program* program, const std::unordered_set& fetch_ids, - const common::Target& target) const override { + const cinn::common::Target& target) const override { auto in2instr = GetInputToOpMap(program); // `fill_constant_map` is used to represent the first fill_constant and its diff --git a/paddle/cinn/frontend/pass/fill_constant_folding_test.cc b/paddle/cinn/frontend/pass/fill_constant_folding_test.cc index 2300c16b3e3d00..e8385d972f733d 100644 --- a/paddle/cinn/frontend/pass/fill_constant_folding_test.cc +++ b/paddle/cinn/frontend/pass/fill_constant_folding_test.cc @@ -56,7 +56,7 @@ TEST(TransposeFolding, FoldTwoFillConstant) { auto transpose_y = builder.Transpose(y, {1, 0}); auto out = builder.Add(transpose_x, transpose_y); auto program = builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); size_t origin_size = program.size(); VLOG(1) << "Program Before FillConstantFolding:\n" << program; @@ -97,7 +97,7 @@ TEST(TransposeFolding, FoldTwoFillConstantWithSameOuput) { auto transpose_x = builder.Transpose(x, {1, 0}); auto out = builder.Add(y, y); auto program = builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); size_t origin_size = program.size(); VLOG(1) << "Program Before FillConstantFolding:\n" << program; @@ -136,7 +136,7 @@ TEST(TransposeFolding, FoldThreeFillConstant) { auto transpose_x = builder.Transpose(x, {1, 0}); auto out = builder.Add(y, z); auto program = builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); size_t origin_size = program.size(); VLOG(1) << "Program Before FillConstantFolding:\n" << program; // Program { @@ -175,7 +175,7 @@ TEST(TransposeFolding, FoldThreeFillConstantWithOneDiff) { auto transpose_x = builder.Transpose(x, {1, 0}); auto out = builder.Add(y, z); auto program = builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); auto graph = std::make_shared(program, target); auto scope = hlir::framework::BuildScope(target, graph); diff --git a/paddle/cinn/frontend/pass/fill_constant_rewriter.cc b/paddle/cinn/frontend/pass/fill_constant_rewriter.cc index 569d1ba77f859f..2ede43f4ae9b6d 100644 --- a/paddle/cinn/frontend/pass/fill_constant_rewriter.cc +++ b/paddle/cinn/frontend/pass/fill_constant_rewriter.cc @@ -154,7 +154,7 @@ class FillConstantRewriterPass : public ProgramPass { void ApplyImpl(Program* program, const std::unordered_set& fetch_ids, - const common::Target& target) override { + const cinn::common::Target& target) override { auto input2instr = GetInput2Instr(program); std::unordered_set remove_instr; diff --git a/paddle/cinn/frontend/pass/gemm_rewriter.cc b/paddle/cinn/frontend/pass/gemm_rewriter.cc index 9a43ea4ade125d..fe178c0b88137b 100644 --- a/paddle/cinn/frontend/pass/gemm_rewriter.cc +++ b/paddle/cinn/frontend/pass/gemm_rewriter.cc @@ -39,7 +39,7 @@ class GemmRewriterPass : public ProgramPass { void ApplyImpl(Program* prog, const std::unordered_set& fetch_ids, - const common::Target& target) override { + const cinn::common::Target& target) override { if (target.arch != Target::Arch::NVGPU || !prog->size()) { return; } diff --git a/paddle/cinn/frontend/pass/gemm_rewriter_test.cc b/paddle/cinn/frontend/pass/gemm_rewriter_test.cc index 88a4f7482f48ec..22f81a6b9a22fe 100755 --- a/paddle/cinn/frontend/pass/gemm_rewriter_test.cc +++ b/paddle/cinn/frontend/pass/gemm_rewriter_test.cc @@ -43,7 +43,7 @@ TEST(GemmRwriter, BatchedTransLeft) { auto out = builder.Add(d, e); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{a.id(), c.id(), e.id()}, std::back_inserter(input_ids), @@ -67,7 +67,7 @@ TEST(GemmRwriter, BatchedTransRight) { auto out = builder.Add(e, f); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{a.id(), b.id(), f.id()}, std::back_inserter(input_ids), @@ -92,7 +92,7 @@ TEST(GemmRwriter, BatchedTransTwo) { auto out = builder.Add(e, f); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{a.id(), c.id(), f.id()}, std::back_inserter(input_ids), @@ -115,7 +115,7 @@ TEST(GemmRwriter, BatchedNoTrans) { auto out = builder.Add(e, f); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{a.id(), b.id(), f.id()}, std::back_inserter(input_ids), @@ -139,7 +139,7 @@ TEST(GemmRwriter, TransLeft) { auto out = builder.Add(d, e); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{a.id(), c.id(), e.id()}, std::back_inserter(input_ids), @@ -163,7 +163,7 @@ TEST(GemmRwriter, TransRight) { auto out = builder.Add(e, f); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{a.id(), b.id(), f.id()}, std::back_inserter(input_ids), @@ -188,7 +188,7 @@ TEST(GemmRwriter, TransTwo) { auto out = builder.Add(e, f); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{a.id(), c.id(), f.id()}, std::back_inserter(input_ids), @@ -211,7 +211,7 @@ TEST(GemmRwriter, NoTrans) { auto out = builder.Add(e, f); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{a.id(), b.id(), f.id()}, std::back_inserter(input_ids), @@ -245,7 +245,7 @@ TEST(GemmRwriter, BatchedComplex) { auto out = builder.Add(p, q); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{d.id(), z.id()}, std::back_inserter(input_ids), @@ -276,7 +276,7 @@ TEST(GemmRwriter, Complex) { auto out = builder.Add(p, q); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{c.id(), z.id()}, std::back_inserter(input_ids), diff --git a/paddle/cinn/frontend/pass/pass_test_helper.h b/paddle/cinn/frontend/pass/pass_test_helper.h index 0aeebef4022fec..89e98cf5cc5a7e 100644 --- a/paddle/cinn/frontend/pass/pass_test_helper.h +++ b/paddle/cinn/frontend/pass/pass_test_helper.h @@ -73,7 +73,7 @@ inline void PrintMatrix(const std::vector& mat, int bs, int m, int n) { } inline void RunGraph(std::shared_ptr graph, - const common::Target& target, + const cinn::common::Target& target, const std::shared_ptr& scope, const std::vector& output_ids, const std::vector& graph_passes) { @@ -91,7 +91,7 @@ inline void RunGraph(std::shared_ptr graph, inline std::vector RunProgram( const Program& program, - const common::Target& target, + const cinn::common::Target& target, const std::vector& input_ids, const std::vector& output_ids, const std::vector& graph_passes, @@ -177,7 +177,7 @@ struct OptimizeConfig { }; inline void CompareResult(Program* program, - const common::Target& target, + const cinn::common::Target& target, const std::vector& input_ids, const std::vector& output_ids, size_t size_diff, @@ -222,7 +222,7 @@ inline void CompareResult(Program* program, inline bool CompareProgramPassResult( Program* program, - const common::Target& target, + const cinn::common::Target& target, const std::unordered_set& fetch_ids, const size_t size_diff, const OptimizeConfig& passes) { diff --git a/paddle/cinn/frontend/pass/program_topoerror_test.cc b/paddle/cinn/frontend/pass/program_topoerror_test.cc index bf1355e16387f7..95f84729241691 100644 --- a/paddle/cinn/frontend/pass/program_topoerror_test.cc +++ b/paddle/cinn/frontend/pass/program_topoerror_test.cc @@ -60,7 +60,7 @@ TEST(TransposeFoldingInput, TransposeWithMultiMamtul) { auto out = builder.Add(dot1, dot2); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{x.id(), y.id()}, std::back_inserter(input_ids), diff --git a/paddle/cinn/frontend/pass/remove_identity.cc b/paddle/cinn/frontend/pass/remove_identity.cc index bd80a45701dedd..81719b75677bfb 100644 --- a/paddle/cinn/frontend/pass/remove_identity.cc +++ b/paddle/cinn/frontend/pass/remove_identity.cc @@ -144,7 +144,7 @@ class RemoveIdentityPass : public ProgramPass { protected: void ApplyImpl(Program* program, const std::unordered_set& fetch_ids, - const common::Target& target) override { + const cinn::common::Target& target) override { CollectInfo(*program, fetch_ids); VLOG(3) << "Total remove " << remove_idxs_.size() << " instructions."; diff --git a/paddle/cinn/frontend/pass/test_helper.h b/paddle/cinn/frontend/pass/test_helper.h index fb636a6d27c676..ea3ed61f1165b3 100644 --- a/paddle/cinn/frontend/pass/test_helper.h +++ b/paddle/cinn/frontend/pass/test_helper.h @@ -74,7 +74,7 @@ std::vector CopyToVector(const hlir::framework::Tensor tensor) { class PassTest { public: - PassTest() { target_ = common::DefaultTarget(); } + PassTest() { target_ = cinn::common::DefaultTarget(); } int RunAndCheck(NetBuilder* builder, const std::vector& program_passes, diff --git a/paddle/cinn/frontend/pass/transpose_collapsing.cc b/paddle/cinn/frontend/pass/transpose_collapsing.cc index ecf71ae55a0aac..8f6530853acefe 100644 --- a/paddle/cinn/frontend/pass/transpose_collapsing.cc +++ b/paddle/cinn/frontend/pass/transpose_collapsing.cc @@ -78,7 +78,7 @@ class TransposeCollapsingPass : public ProgramPass { void ApplyImpl(Program* program, const std::unordered_set& fetch_ids, - const common::Target& target) const override { + const cinn::common::Target& target) const override { // `out2instr` is used to represent the mapping of Output to Instruction. OutputToOpMap out2instr; // `in2instr` is used to represent the mapping of Input to Instruction. diff --git a/paddle/cinn/frontend/pass/transpose_collapsing_test.cc b/paddle/cinn/frontend/pass/transpose_collapsing_test.cc index c82a4d3b3b24ce..b1cdf7f09e7c96 100644 --- a/paddle/cinn/frontend/pass/transpose_collapsing_test.cc +++ b/paddle/cinn/frontend/pass/transpose_collapsing_test.cc @@ -38,7 +38,7 @@ void SetInputData(const hlir::framework::Tensor& tensor, Target target) { host_memory[i] = static_cast(i); } #ifdef CINN_WITH_CUDA - if (target == common::DefaultNVGPUTarget()) { + if (target == cinn::common::DefaultNVGPUTarget()) { cudaMemcpy(data, host_memory.data(), tensor->shape().numel() * sizeof(float), @@ -46,7 +46,7 @@ void SetInputData(const hlir::framework::Tensor& tensor, Target target) { return; } #endif - CHECK(target == common::DefaultHostTarget()); + CHECK(target == cinn::common::DefaultHostTarget()); std::copy(host_memory.begin(), host_memory.end(), data); } std::vector> RunWithProgram( @@ -88,7 +88,7 @@ TEST(TransposeCollapsing, FuseTwoTranspose) { auto x_t = builder.Transpose(x, {0, 2, 1}); auto out = builder.Transpose(x_t, {2, 1, 0}); auto program = builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); std::initializer_list fetch_list = {out->id}; @@ -127,7 +127,7 @@ TEST(TransposeCollapsing, FuseThreeTranspose) { auto x_2t = builder.Transpose(x_1t, {2, 1, 0}); auto out = builder.Transpose(x_2t, {1, 2, 0}); auto program = builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); std::initializer_list fetch_list = {out->id}; @@ -166,7 +166,7 @@ TEST(TransposeCollapsing, RemoveUselessTranspose) { auto x_t = builder.Transpose(x, {0, 1, 2}); auto out = builder.Add(x, x_t); auto program = builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); std::initializer_list fetch_list = {out->id}; @@ -201,7 +201,7 @@ TEST(TransposeCollapsing, ReplaceUselessTransposeWithIndentity) { auto x = builder.CreateInput(Float(32), {4, 5, 3}, "X"); auto out = builder.Transpose(x, {0, 1, 2}); auto program = builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); std::initializer_list fetch_list = {out->id}; @@ -241,7 +241,7 @@ TEST(TransposeCollapsing, FuseTransposeToUseless) { auto x_3t = builder.Transpose(x_2t, {0, 2, 1}); auto out = builder.Add(x_3t, x_3t); auto program = builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); std::initializer_list fetch_list = {out->id}; @@ -286,7 +286,7 @@ TEST(TransposeCollapsing, FuseTransposeWithMultiOutput) { auto out2 = builder.Sqrt(x_2t); auto out3 = builder.Sqrt(x_3t); auto program = builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); std::initializer_list fetch_list = { out1->id, out2->id, out3->id}; @@ -338,7 +338,7 @@ TEST(TransposeCollapsing, FuseTwoSecTranspose) { auto x_4t = builder.Transpose(x_3t, {2, 1, 0}); auto out2 = builder.Sqrt(x_4t); auto program = builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); std::initializer_list fetch_list = {out1->id, out2->id}; @@ -384,7 +384,7 @@ TEST(TransposeCollapsing, FuseTwoHorizontalTranspose) { auto y_t2 = builder.Transpose(x, {0, 2, 1}); auto out = builder.Add(y_t1, y_t2); auto program = builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); std::initializer_list fetch_list = {out->id}; @@ -426,7 +426,7 @@ TEST(TransposeCollapsing, FuseVerAndHorTranspose) { auto y_t3 = builder.Transpose(x, {1, 2, 0}); auto out = builder.Add(y_t2, y_t3); auto program = builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); std::initializer_list fetch_list = {out->id}; diff --git a/paddle/cinn/frontend/pass/transpose_folding_base.h b/paddle/cinn/frontend/pass/transpose_folding_base.h index 4acc8e4f6d1f67..1703505b94a27d 100644 --- a/paddle/cinn/frontend/pass/transpose_folding_base.h +++ b/paddle/cinn/frontend/pass/transpose_folding_base.h @@ -52,7 +52,7 @@ class TransposeFoldingBase : public ProgramPass { void ApplyImpl(Program* program, const std::unordered_set& fetch_ids, - const common::Target& target) override { + const cinn::common::Target& target) override { set_target_instrs(); set_fold_instrs(); set_skip_instrs(); diff --git a/paddle/cinn/frontend/pass/transpose_folding_input_test.cc b/paddle/cinn/frontend/pass/transpose_folding_input_test.cc index b8befcc609c830..708572cefd5f11 100644 --- a/paddle/cinn/frontend/pass/transpose_folding_input_test.cc +++ b/paddle/cinn/frontend/pass/transpose_folding_input_test.cc @@ -56,7 +56,7 @@ TEST(TransposeFoldingInput, FoldIntoDotBatchedCase1) { auto out = builder.Matmul(transpose_x, y); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{x.id(), y.id()}, std::back_inserter(input_ids), @@ -81,7 +81,7 @@ TEST(TransposeFoldingInput, FoldIntoDotBachedCase2) { auto out = builder.Matmul(x, transpose_y); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{x.id(), y.id()}, std::back_inserter(input_ids), @@ -107,7 +107,7 @@ TEST(TransposeFoldingInput, FoldIntoDotBachedCase3) { auto out = builder.Matmul(transpose_x, transpose_y); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{x.id(), y.id()}, std::back_inserter(input_ids), @@ -132,7 +132,7 @@ TEST(TransposeFoldingInput, FoldIntoDotCase1) { auto out = builder.Matmul(x, transpose_y); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{x.id(), y.id()}, std::back_inserter(input_ids), @@ -162,7 +162,7 @@ TEST(TransposeFoldingInput, FoldIntoDotCase2) { auto out = builder.Add(d, q); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{c.id(), z.id()}, std::back_inserter(input_ids), @@ -187,7 +187,7 @@ TEST(TransposeFoldingInput, TransposeOutInFetchIds) { auto out = builder.Matmul(x, transpose_y); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{x.id(), y.id()}, std::back_inserter(input_ids), @@ -220,7 +220,7 @@ TEST(TransposeFoldingInput, TransposeOutUsedByOtherInstrs) { auto out = builder.Add(transpose_y, dot); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{x.id(), y.id()}, std::back_inserter(input_ids), @@ -249,7 +249,7 @@ TEST(TransposeFoldingInput, TransposeTwiceWithMatmul) { auto dot2 = builder.Matmul(z, x_t_t); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{x.id(), y.id(), z.id()}, std::back_inserter(input_ids), @@ -277,7 +277,7 @@ TEST(TransposeFoldingInput, TransposeWithMultiMamtul) { auto out = builder.Add(dot1, dot2); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{x.id(), y.id()}, std::back_inserter(input_ids), diff --git a/paddle/cinn/frontend/pass/transpose_folding_output_test.cc b/paddle/cinn/frontend/pass/transpose_folding_output_test.cc index 4004acbd8d0ea4..5a5e013b37fde0 100755 --- a/paddle/cinn/frontend/pass/transpose_folding_output_test.cc +++ b/paddle/cinn/frontend/pass/transpose_folding_output_test.cc @@ -43,7 +43,7 @@ TEST(TransposeFoldingOutput, BatchedMatmulTransLeft) { auto out = builder.Subtract(e, f); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{a.id(), c.id(), f.id()}, std::back_inserter(input_ids), @@ -71,7 +71,7 @@ TEST(TransposeFoldingOutput, BatchedGemmTransLeft) { auto out = builder.Add(e, f); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{a.id(), c.id(), f.id()}, std::back_inserter(input_ids), @@ -99,7 +99,7 @@ TEST(TransposeFoldingOutput, BatchedMatmulTransRight) { auto out = builder.Subtract(e, f); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{a.id(), b.id(), f.id()}, std::back_inserter(input_ids), @@ -127,7 +127,7 @@ TEST(TransposeFoldingOutput, BatchedGemmTransRight) { auto out = builder.Add(e, f); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{a.id(), b.id(), f.id()}, std::back_inserter(input_ids), @@ -156,7 +156,7 @@ TEST(TransposeFoldingOutput, BatchedMatmulTransTwo) { auto out = builder.Subtract(f, g); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{a.id(), c.id(), f.id()}, std::back_inserter(input_ids), @@ -185,7 +185,7 @@ TEST(TransposeFoldingOutput, BatchedGemmTransTwo) { auto out = builder.Add(f, g); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{a.id(), c.id(), f.id()}, std::back_inserter(input_ids), @@ -212,7 +212,7 @@ TEST(TransposeFoldingOutput, BatchedMatmulNoTrans) { auto out = builder.Subtract(f, g); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{a.id(), c.id(), f.id()}, std::back_inserter(input_ids), @@ -239,7 +239,7 @@ TEST(TransposeFoldingOutput, BatchedGemmNoTrans) { auto out = builder.Add(f, g); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{a.id(), c.id(), f.id()}, std::back_inserter(input_ids), @@ -267,7 +267,7 @@ TEST(TransposeFoldingOutput, MatmulTransLeft) { auto out = builder.Subtract(e, f); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{a.id(), c.id(), f.id()}, std::back_inserter(input_ids), @@ -295,7 +295,7 @@ TEST(TransposeFoldingOutput, GemmTransLeft) { auto out = builder.Add(e, f); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{a.id(), c.id(), f.id()}, std::back_inserter(input_ids), @@ -323,7 +323,7 @@ TEST(TransposeFoldingOutput, MatmulTransRight) { auto out = builder.Subtract(e, f); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{a.id(), b.id(), f.id()}, std::back_inserter(input_ids), @@ -351,7 +351,7 @@ TEST(TransposeFoldingOutput, GemmTransRight) { auto out = builder.Add(e, f); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{a.id(), b.id(), f.id()}, std::back_inserter(input_ids), @@ -380,7 +380,7 @@ TEST(TransposeFoldingOutput, MatmulTransTwo) { auto out = builder.Subtract(f, g); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{a.id(), c.id(), f.id()}, std::back_inserter(input_ids), @@ -409,7 +409,7 @@ TEST(TransposeFoldingOutput, GemmTransTwo) { auto out = builder.Add(f, g); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{a.id(), c.id(), f.id()}, std::back_inserter(input_ids), @@ -436,7 +436,7 @@ TEST(TransposeFoldingOutput, MatmulNoTrans) { auto out = builder.Subtract(f, g); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{a.id(), c.id(), f.id()}, std::back_inserter(input_ids), @@ -463,7 +463,7 @@ TEST(TransposeFoldingOutput, GemmNoTrans) { auto out = builder.Add(f, g); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{a.id(), c.id(), f.id()}, std::back_inserter(input_ids), @@ -505,7 +505,7 @@ TEST(TransposeFoldingOutput, BatchedComplex) { auto out = builder.Add(i, j); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{d.id(), z.id()}, std::back_inserter(input_ids), @@ -544,7 +544,7 @@ TEST(TransposeFoldingOutput, Complex) { auto out = builder.Add(i, j); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{c.id(), z.id()}, std::back_inserter(input_ids), @@ -575,7 +575,7 @@ TEST(TransposeFoldingOutput, MultiTransCaseOne) { auto out = builder.Add(h, g); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform( std::vector{a.id(), b.id(), e.id(), h.id()}, @@ -606,7 +606,7 @@ TEST(TransposeFoldingOutput, MultiTransCaseTwo) { auto out = builder.Add(h, g); auto program = builder.Build(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{a.id(), b.id(), h.id()}, std::back_inserter(input_ids), diff --git a/paddle/cinn/frontend/pass/transpose_scale_folding_test.cc b/paddle/cinn/frontend/pass/transpose_scale_folding_test.cc index 296ba7fba96a89..5d18b6ff8cbe22 100644 --- a/paddle/cinn/frontend/pass/transpose_scale_folding_test.cc +++ b/paddle/cinn/frontend/pass/transpose_scale_folding_test.cc @@ -40,7 +40,7 @@ TEST(ScaleFolding, FoldIntoDotBatchedCase1) { auto out = builder.Matmul(scale_x, y); auto program = builder.Build(); - common::Target target = common::DefaultTarget(); + cinn::common::Target target = cinn::common::DefaultTarget(); std::vector input_ids; absl::c_transform(std::vector{x.id(), y.id()}, std::back_inserter(input_ids), @@ -65,7 +65,7 @@ TEST(ScaleFolding, FoldIntoDotBatchedCase2) { auto out = builder.Matmul(scale_x, y); auto program = builder.Build(); - common::Target target = common::DefaultTarget(); + cinn::common::Target target = cinn::common::DefaultTarget(); std::vector input_ids; absl::c_transform(std::vector{x.id(), y.id()}, std::back_inserter(input_ids), @@ -90,7 +90,7 @@ TEST(ScaleFolding, FoldIntoDotBatchedCase3) { auto out = builder.Matmul(scale_x, y); auto program = builder.Build(); - common::Target target = common::DefaultTarget(); + cinn::common::Target target = cinn::common::DefaultTarget(); std::vector input_ids; absl::c_transform(std::vector{x.id(), y.id()}, std::back_inserter(input_ids), @@ -115,7 +115,7 @@ TEST(ScaleFolding, FoldIntoDotBatchedCase4) { auto out = builder.Matmul(x, scale_y); auto program = builder.Build(); - common::Target target = common::DefaultTarget(); + cinn::common::Target target = cinn::common::DefaultTarget(); std::vector input_ids; absl::c_transform(std::vector{x.id(), y.id()}, std::back_inserter(input_ids), @@ -141,7 +141,7 @@ TEST(ScaleFolding, FoldIntoDotBatchedCase5) { auto out = builder.Matmul(scale_x, scale_y); auto program = builder.Build(); - common::Target target = common::DefaultTarget(); + cinn::common::Target target = cinn::common::DefaultTarget(); std::vector input_ids; absl::c_transform(std::vector{x.id(), y.id()}, std::back_inserter(input_ids), @@ -168,7 +168,7 @@ TEST(ScaleFolding, FoldIntoDotBatchedCase6) { auto out = builder.Scale(orig_out, 2.0f); auto program = builder.Build(); - common::Target target = common::DefaultTarget(); + cinn::common::Target target = cinn::common::DefaultTarget(); std::vector input_ids; absl::c_transform(std::vector{x.id(), y.id()}, std::back_inserter(input_ids), @@ -198,7 +198,7 @@ TEST(TransposeScaleFolding, BatchComplexCase1) { auto out = builder.Transpose(scale_out, {0, 2, 1}); auto program = builder.Build(); - common::Target target = common::DefaultTarget(); + cinn::common::Target target = cinn::common::DefaultTarget(); std::vector input_ids; absl::c_transform(std::vector{x.id(), y.id()}, std::back_inserter(input_ids), @@ -228,7 +228,7 @@ TEST(TransposeScaleFolding, BatchComplexCase2) { auto out = builder.Scale(transpose_out, 2.0f); auto program = builder.Build(); - common::Target target = common::DefaultTarget(); + cinn::common::Target target = cinn::common::DefaultTarget(); std::vector input_ids; absl::c_transform(std::vector{x.id(), y.id()}, std::back_inserter(input_ids), @@ -254,7 +254,7 @@ TEST(TransposeScaleFolding, BatchComplexCase3) { auto out = builder.Matmul(transpose_x, scale_y); auto program = builder.Build(); - common::Target target = common::DefaultTarget(); + cinn::common::Target target = cinn::common::DefaultTarget(); std::vector input_ids; absl::c_transform(std::vector{x.id(), y.id()}, std::back_inserter(input_ids), @@ -279,7 +279,7 @@ TEST(TransposeScaleFolding, BatchComplexCase4) { auto out = builder.Matmul(transpose_x, scale_x); auto program = builder.Build(); - common::Target target = common::DefaultTarget(); + cinn::common::Target target = cinn::common::DefaultTarget(); std::vector input_ids; absl::c_transform(std::vector{x.id()}, std::back_inserter(input_ids), @@ -308,7 +308,7 @@ TEST(TransposeScaleFolding, BatchComplexCase5) { auto out = builder.Matmul(transpose_o, z); auto program = builder.Build(); - common::Target target = common::DefaultTarget(); + cinn::common::Target target = cinn::common::DefaultTarget(); std::vector input_ids; absl::c_transform(std::vector{x.id(), y.id()}, std::back_inserter(input_ids), @@ -335,7 +335,7 @@ TEST(TransposeScaleFolding, BatchComplexCase6) { auto out = builder.Transpose(out_matmul, {0, 2, 1}); auto program = builder.Build(); - common::Target target = common::DefaultTarget(); + cinn::common::Target target = cinn::common::DefaultTarget(); std::vector input_ids; absl::c_transform(std::vector{x.id()}, std::back_inserter(input_ids), @@ -364,7 +364,7 @@ TEST(TransposeBroadCastFolding, BatchComplexCase1) { auto out = builder.Scale(out_trans, 2.0f); auto program = builder.Build(); - common::Target target = common::DefaultTarget(); + cinn::common::Target target = cinn::common::DefaultTarget(); std::vector input_ids; absl::c_transform(std::vector{x.id(), y.id()}, std::back_inserter(input_ids), @@ -395,7 +395,7 @@ TEST(TransposeBroadCastFolding, BatchComplexCase2) { auto out = builder.Scale(out_trans, 2.0f); auto program = builder.Build(); - common::Target target = common::DefaultTarget(); + cinn::common::Target target = cinn::common::DefaultTarget(); std::vector input_ids; absl::c_transform(std::vector{x.id(), y.id()}, std::back_inserter(input_ids), diff --git a/paddle/cinn/frontend/program_pass.cc b/paddle/cinn/frontend/program_pass.cc index 1cd0903f97a031..3e60dbddb899a2 100644 --- a/paddle/cinn/frontend/program_pass.cc +++ b/paddle/cinn/frontend/program_pass.cc @@ -23,7 +23,7 @@ namespace frontend { void ProgramPass::Apply(Program* prog, const std::unordered_set& fetch_ids, - const common::Target& target, + const cinn::common::Target& target, const std::vector& passes) { std::vector fpass; for (auto& name : passes) { diff --git a/paddle/cinn/frontend/program_pass.h b/paddle/cinn/frontend/program_pass.h index ecdb23ef2e1f07..1572b3ed5c7ca2 100755 --- a/paddle/cinn/frontend/program_pass.h +++ b/paddle/cinn/frontend/program_pass.h @@ -38,7 +38,7 @@ class ProgramPass { */ static void Apply(Program* prog, const std::unordered_set& fetch_ids, - const common::Target& target, + const cinn::common::Target& target, const std::vector& passes); const std::string& name() const { return name_; } @@ -46,10 +46,10 @@ class ProgramPass { protected: virtual void ApplyImpl(Program* prog, const std::unordered_set& fetch_ids, - const common::Target& target) {} + const cinn::common::Target& target) {} virtual void ApplyImpl(Program* prog, const std::unordered_set& fetch_ids, - const common::Target& target) const { + const cinn::common::Target& target) const { return const_cast(this)->ApplyImpl(prog, fetch_ids, target); } diff --git a/paddle/cinn/frontend/syntax.cc b/paddle/cinn/frontend/syntax.cc index 9b534d8d146c01..8f9d9a44b13008 100644 --- a/paddle/cinn/frontend/syntax.cc +++ b/paddle/cinn/frontend/syntax.cc @@ -44,7 +44,8 @@ void Instruction::PrepareOutputs() { Instruction::Instruction(absl::string_view op_type, const std::vector& inputs, Program* parent) - : common::Shared<_Instruction_>(common::make_shared<_Instruction_>()) { + : cinn::common::Shared<_Instruction_>( + cinn::common::make_shared<_Instruction_>()) { get()->op_type = std::string(op_type); get()->parent_program = parent; get()->inputs = inputs; @@ -173,7 +174,7 @@ Variable Program::fused_meta_batchnorm_inference( epsilon = absl::get(attr_store.at("epsilon")); } auto eps_var = - primitive_const_scalar(epsilon, common::UniqName("epsilon")); + primitive_const_scalar(epsilon, cinn::common::UniqName("epsilon")); CHECK(!scale->shape.empty()) << "scale's shape is empty."; auto broadcast_eps = primitive_broadcast_to(eps_var, scale->shape, {0}); auto var_add_eps = add(variance, broadcast_eps); @@ -203,7 +204,7 @@ Variable Program::fused_batchnorm_inference( epsilon = absl::get(attr_store.at("epsilon")); } auto eps_var = - primitive_const_scalar(epsilon, common::UniqName("epsilon")); + primitive_const_scalar(epsilon, cinn::common::UniqName("epsilon")); CHECK(!scale->shape.empty()) << "scale's shape is empty."; auto var_add_eps = elementwise_add(variance, eps_var); auto rsrqt_var = primitive_rsqrt(var_add_eps); @@ -301,7 +302,7 @@ LoadPaddleProgram(const std::string& model_dir, std::unordered_map>& input_shape_map, // NOLINT bool is_combined, - const common::Target& target) { + const cinn::common::Target& target) { VLOG(1) << "Loading Paddle model from " << model_dir; PaddleModelToProgram paddle_to_program(scope, input_shape_map, target); return std::make_tuple(paddle_to_program(model_dir, is_combined), diff --git a/paddle/cinn/frontend/syntax.h b/paddle/cinn/frontend/syntax.h index a405e22ddb565c..a2fa024be41313 100644 --- a/paddle/cinn/frontend/syntax.h +++ b/paddle/cinn/frontend/syntax.h @@ -41,9 +41,9 @@ namespace frontend { struct Program; struct Variable; -struct _Variable_ : public common::Object { +struct _Variable_ : public cinn::common::Object { std::string id; - common::Type type; + cinn::common::Type type; std::vector shape; bool is_const = false; @@ -54,17 +54,18 @@ struct _Variable_ : public common::Object { /** * Variable represents the variable in a computation. */ -struct Variable : public common::Shared<_Variable_> { +struct Variable : public cinn::common::Shared<_Variable_> { /** * Constructor. * @param id_hint The identifier of the variable, if null, a random ID will be * assigned. */ explicit Variable(const std::string& id_hint = "") - : common::Shared<_Variable_>(common::make_shared<_Variable_>()) { + : cinn::common::Shared<_Variable_>( + cinn::common::make_shared<_Variable_>()) { if (!id_hint.empty()) CheckVarNameValid(id_hint); - get()->id = - id_hint.empty() ? common::Context::Global().NewName("var") : id_hint; + get()->id = id_hint.empty() ? cinn::common::Context::Global().NewName("var") + : id_hint; } void set_id(const std::string& id) { operator->()->id = id; } @@ -85,13 +86,14 @@ class Placeholder { * @param shape Shape of the fed * @param id ID of the fed */ - Placeholder(const common::Type& type, + Placeholder(const cinn::common::Type& type, const std::vector& shape, absl::string_view id_hint = "", bool is_const = false) { if (!id_hint.empty()) CheckVarNameValid(std::string(id_hint)); - id_ = id_hint.empty() ? common::Context::Global().NewName("placeholder") - : (std::string)id_hint; + id_ = id_hint.empty() + ? cinn::common::Context::Global().NewName("placeholder") + : (std::string)id_hint; var_ = Variable(id_); var_->shape = shape; var_->type = type; @@ -124,7 +126,7 @@ class Placeholder { /** * Data of a Instruction. */ -struct _Instruction_ : public common::Object { +struct _Instruction_ : public cinn::common::Object { using attr_t = hlir::framework::AttrType; std::string op_type; @@ -145,7 +147,7 @@ struct _Instruction_ : public common::Object { * Instruction is the basic computational unit of a Program, similar to the * operator concept in a DNN platform. */ -struct Instruction : public common::Shared<_Instruction_> { +struct Instruction : public cinn::common::Shared<_Instruction_> { explicit Instruction(absl::string_view op_type, const std::vector& inputs = {}, Program* parent = nullptr); @@ -528,12 +530,13 @@ std::tuple, absl::flat_hash_map, absl::flat_hash_map, absl::flat_hash_set> -LoadPaddleProgram(const std::string& model_dir, - hlir::framework::Scope* scope, - std::unordered_map>& - input_shape_map, // NOLINT - bool is_combined, - const common::Target& target = common::DefaultHostTarget()); +LoadPaddleProgram( + const std::string& model_dir, + hlir::framework::Scope* scope, + std::unordered_map>& + input_shape_map, // NOLINT + bool is_combined, + const cinn::common::Target& target = cinn::common::DefaultHostTarget()); std::ostream& operator<<(std::ostream& os, const Variable& x); std::ostream& operator<<(std::ostream& os, const Instruction& instr); diff --git a/paddle/cinn/frontend/syntax_test.cc b/paddle/cinn/frontend/syntax_test.cc index 1cc76ef2950619..1c38076a0b7851 100644 --- a/paddle/cinn/frontend/syntax_test.cc +++ b/paddle/cinn/frontend/syntax_test.cc @@ -63,7 +63,7 @@ TEST(syntax, basic) { TEST(syntax, program_execute_multi_elementwise_add) { auto program = CreateAddProgram(); - Target target = common::DefaultTarget(); + Target target = cinn::common::DefaultTarget(); std::unordered_set fetch_ids; auto graph = Optimize(&program, fetch_ids, target); // auto graph = std::make_shared(*program, target); @@ -84,7 +84,7 @@ TEST(syntax, program_execute_multi_elementwise_add) { TEST(syntax, program_execute_multi_elementwise_add2) { auto program = CreateAddProgram(); - Target target = common::DefaultTarget(); + Target target = cinn::common::DefaultTarget(); std::unordered_set fetch_ids; auto graph = Optimize(&program, fetch_ids, target); LOG(INFO) << "graph:\n" << graph->Visualize(); @@ -119,7 +119,7 @@ std::get<2>(programTuple); LOG(INFO) << "program:\n" << *program; - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); std::unordered_set fetch_ids; auto graph = cinn::frontend::Optimize(program.get(), fetch_ids, target); diff --git a/paddle/cinn/frontend/var_type_utils.h b/paddle/cinn/frontend/var_type_utils.h index b11c222da3f801..85a70ee4f53a99 100644 --- a/paddle/cinn/frontend/var_type_utils.h +++ b/paddle/cinn/frontend/var_type_utils.h @@ -24,10 +24,11 @@ namespace cinn { namespace frontend { namespace utils { -inline common::Type CppVarType2CommonType(paddle::cpp::VarDescAPI::Type type) { +inline cinn::common::Type CppVarType2CommonType( + paddle::cpp::VarDescAPI::Type type) { #define SET_TYPE_CASE_ITEM(v_type, c_type) \ case paddle::cpp::VarDescAPI::Type::v_type: \ - return common::c_type(); \ + return cinn::common::c_type(); \ break; static std::vector var_type_names_ = {"BOOL", // 0 @@ -87,7 +88,7 @@ inline common::Type CppVarType2CommonType(paddle::cpp::VarDescAPI::Type type) { << static_cast(type) << ")"; } #undef SET_DATA_TYPE_CASE_ITEM - return common::Type(); + return cinn::common::Type(); } inline OpMapperContext::FeedInfo GetFeedInfoFromDesc( diff --git a/paddle/cinn/hlir/dialect/operator/ir/manual_op.cc b/paddle/cinn/hlir/dialect/operator/ir/manual_op.cc index 33c8bbe1b86240..68a09ad7a9868b 100644 --- a/paddle/cinn/hlir/dialect/operator/ir/manual_op.cc +++ b/paddle/cinn/hlir/dialect/operator/ir/manual_op.cc @@ -16,9 +16,9 @@ #include #include "glog/logging.h" +#include "paddle/common/enforce.h" #include "paddle/fluid/pir/dialect/operator/ir/op_type.h" #include "paddle/pir/core/builtin_type.h" -#include "paddle/pir/core/enforce.h" #include "paddle/pir/core/op_base.h" #include "paddle/pir/dialect/control_flow/ir/cf_op.h" diff --git a/paddle/cinn/hlir/dialect/operator/transforms/add_broadcast_to_elementwise_pass.cc b/paddle/cinn/hlir/dialect/operator/transforms/add_broadcast_to_elementwise_pass.cc index f8d95617c6cb4d..e36e3a3cc156c4 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/add_broadcast_to_elementwise_pass.cc +++ b/paddle/cinn/hlir/dialect/operator/transforms/add_broadcast_to_elementwise_pass.cc @@ -16,11 +16,11 @@ #include "paddle/cinn/hlir/dialect/operator/ir/cinn_op.h" #include "paddle/cinn/hlir/framework/pir/utils.h" +#include "paddle/common/ddim.h" #include "paddle/fluid/pir/dialect/operator/ir/op_attribute.h" #include "paddle/fluid/pir/dialect/operator/ir/op_type.h" #include "paddle/fluid/pir/dialect/operator/ir/pd_op.h" #include "paddle/fluid/pir/drr/api/match_context.h" -#include "paddle/phi/core/ddim.h" #include "paddle/pir/core/builtin_dialect.h" #include "paddle/pir/pass/pass.h" #include "paddle/pir/pattern_rewrite/pattern_applicator.h" diff --git a/paddle/cinn/hlir/dialect/operator/transforms/group_merge/group_with_group_merge_pass.cc b/paddle/cinn/hlir/dialect/operator/transforms/group_merge/group_with_group_merge_pass.cc index 28eb1c0da8abcb..1c28039718a745 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/group_merge/group_with_group_merge_pass.cc +++ b/paddle/cinn/hlir/dialect/operator/transforms/group_merge/group_with_group_merge_pass.cc @@ -193,7 +193,7 @@ class GraphGroupFuseHelper final : public FuseHelper { Visit(node_producer); } }; - common::IsReachablePredicator is_reachable( + cinn::common::IsReachablePredicator is_reachable( MinDepth4Node, MaxDepth4Node, VisitNextNodes); return is_reachable(consumer, producer, [](OpGroupPtr) {}); } @@ -468,12 +468,12 @@ struct HorizontalFuseUtil { } size_t size_ele = - phi::product(GetMasterNode(*ele_group).outputs()[0].shape()); + ::common::product(GetMasterNode(*ele_group).outputs()[0].shape()); bool can_fuse = false; reduce_group->WalkOpNodes([&](const cinn::dialect::ir::OpNode& op) { if (op.kind() == OpPatternKind::kReduction) { - size_t size_master = phi::product(op.outputs()[0].shape()); + size_t size_master = ::common::product(op.outputs()[0].shape()); if (size_ele == size_master) { can_fuse = true; } @@ -1880,13 +1880,13 @@ class GeneralFusionMergePassHelper { continue; } - auto producer_output_shape = phi::vectorize( + auto producer_output_shape = ::common::vectorize( GetValueShape((*producer->output_ops.begin())->result(0))); - auto consumer_output_shape = phi::vectorize( + auto consumer_output_shape = ::common::vectorize( GetValueShape((*consumer->output_ops.begin())->result(0))); - auto consumer_master_input_shape = phi::vectorize(GetValueShape( + auto consumer_master_input_shape = ::common::vectorize(GetValueShape( (*(consumer->master_ops.begin()))->operand_source(0))); int producer_output_numel = @@ -1933,9 +1933,9 @@ class GeneralFusionMergePassHelper { continue; } - auto shape0 = phi::vectorize( + auto shape0 = ::common::vectorize( GetValueShape((*producer->output_ops.begin())->result(0))); - auto shape1 = phi::vectorize( + auto shape1 = ::common::vectorize( GetValueShape((*consumer->output_ops.begin())->result(0))); if (std::accumulate( diff --git a/paddle/cinn/hlir/dialect/operator/transforms/group_merge/group_with_group_merge_pass_utils.h b/paddle/cinn/hlir/dialect/operator/transforms/group_merge/group_with_group_merge_pass_utils.h index 1b996676d449df..7e874ecb8e95a1 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/group_merge/group_with_group_merge_pass_utils.h +++ b/paddle/cinn/hlir/dialect/operator/transforms/group_merge/group_with_group_merge_pass_utils.h @@ -47,7 +47,7 @@ static bool IsSameSize(const OpGroupPtr& src, const OpGroupPtr& dst) { auto size_0 = src_master_node.outputs()[0].shape(); auto size_1 = dst_master_node.outputs()[0].shape(); - return phi::product(size_0) == phi::product(size_1); + return ::common::product(size_0) == ::common::product(size_1); } static std::unordered_set GetInputOps( @@ -139,7 +139,8 @@ static int GetSharedSize(const cinn::dialect::ir::OpNode& op_node) { for (int idx = axes.back() + 1; idx < inshape.size(); ++idx) { lane = inshape[idx]; } - // int max_num_threads = common::DefaultNVGPUTarget().max_num_threads(); + // int max_num_threads = + // cinn::common::DefaultNVGPUTarget().max_num_threads(); int max_num_threads = 1000; if (lane > max_num_threads / 2) { return 0; diff --git a/paddle/cinn/hlir/dialect/operator/transforms/group_merge/group_with_group_merge_util.h b/paddle/cinn/hlir/dialect/operator/transforms/group_merge/group_with_group_merge_util.h index 7754a9e0932d3a..d59f673d53f7ba 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/group_merge/group_with_group_merge_util.h +++ b/paddle/cinn/hlir/dialect/operator/transforms/group_merge/group_with_group_merge_util.h @@ -84,8 +84,8 @@ inline bool is_same_size(const std::shared_ptr& first, return true; } - auto size_0 = phi::product(output_var_0); - auto size_1 = phi::product(output_var_1); + auto size_0 = ::common::product(output_var_0); + auto size_1 = ::common::product(output_var_1); return size_0 == size_1; } @@ -145,11 +145,11 @@ inline bool honrizontal_elementwise_fuse_reduce( auto ele_node_shape = GetValueShape((*ele_group->master_ops.begin())->result(0)); - int32_t size_ele = phi::product(ele_node_shape); + int32_t size_ele = ::common::product(ele_node_shape); // TODO(phlrain): seems extrame danger herem, why compare multi Master Node? for (auto* master : reduce_group->master_ops) { auto master_node_shape = GetValueShape(master->result(0)); - int32_t size_master = phi::product(master_node_shape); + int32_t size_master = ::common::product(master_node_shape); if (size_ele == size_master) { return true; } @@ -160,7 +160,7 @@ inline bool honrizontal_elementwise_fuse_reduce( inline bool elementwise_fuse_reduce(const std::shared_ptr& first, const std::shared_ptr& second) { - // if (helper->target_ == common::DefaultHostTarget()) { + // if (helper->target_ == cinn::common::DefaultHostTarget()) { // return true; // } // if same shape with horizontal relation @@ -205,11 +205,11 @@ inline bool elementwise_fuse_reduce(const std::shared_ptr& first, bool flag = true; auto first_node_shape = GetValueShape((*first->master_ops.begin())->result(0)); - int32_t size_first = phi::product(first_node_shape); + int32_t size_first = ::common::product(first_node_shape); for (::pir::Operation* master : masters_in_consumers) { auto second_node_shape = GetValueShape(master->result(0)); - int32_t size_second = phi::product(second_node_shape); + int32_t size_second = ::common::product(second_node_shape); if (size_first != size_second) { flag = false; break; @@ -301,10 +301,10 @@ inline bool broadcast_fuse_reduce(const std::shared_ptr& first, // CHECK(reducer) << "Can't find reduce op in group " << second->group_id; auto input_shape = GetValueShape(reducer->operand_source(0)); - auto input_size = phi::product(input_shape); + auto input_size = ::common::product(input_shape); auto output_shape = GetValueShape((*first->master_ops.begin())->result(0)); - auto output_size = phi::product(output_shape); + auto output_size = ::common::product(output_shape); if (input_size == output_size) { return elementwise_fuse_reduce(first, second); @@ -435,9 +435,9 @@ inline bool reduce_fuse_broadcast(const std::shared_ptr& first, // First type conditions // Get some reduce information auto reducer_input_shape = - phi::vectorize(GetValueShape(reducer->operand_source(0))); + ::common::vectorize(GetValueShape(reducer->operand_source(0))); auto reducer_output_shape = - phi::vectorize(GetValueShape(reducer->result(0))); + ::common::vectorize(GetValueShape(reducer->result(0))); std::vector reduce_axes = GetVectorAttr(reducer, "dim"); auto keep_dim = @@ -456,7 +456,7 @@ inline bool reduce_fuse_broadcast(const std::shared_ptr& first, reduce_size *= reducer_input_shape[idx - 1]; } // Check if the reduce size exceeds the hardware limit - // if (helper->target_ == common::DefaultNVGPUTarget() && + // if (helper->target_ == cinn::common::DefaultNVGPUTarget() && // reduce_size > helper->target_.max_num_threads()) { // return false; // } diff --git a/paddle/cinn/hlir/dialect/operator/transforms/group_merge/op_with_group_merge_pass.cc b/paddle/cinn/hlir/dialect/operator/transforms/group_merge/op_with_group_merge_pass.cc index fffd2edc027b61..54005eb22f25b3 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/group_merge/op_with_group_merge_pass.cc +++ b/paddle/cinn/hlir/dialect/operator/transforms/group_merge/op_with_group_merge_pass.cc @@ -186,7 +186,7 @@ bool WithoutLastDimInReduce(const std::vector& inshape, } int GetSharedSize(::pir::Operation* op) { - auto inshape = phi::vectorize(GetValueShape(op->result(0))); + auto inshape = ::common::vectorize(GetValueShape(op->result(0))); auto axes = GetVectorAttr(op, "dim"); @@ -195,8 +195,9 @@ int GetSharedSize(::pir::Operation* op) { for (size_t idx = axes.back() + 1; idx < inshape.size(); ++idx) { lane = inshape[idx]; } - // int max_num_threads = common::DefaultNVGPUTarget().max_num_threads(); - // todo(phlrain): get gpu max threads + // int max_num_threads = + // cinn::common::DefaultNVGPUTarget().max_num_threads(); todo(phlrain): get + // gpu max threads int max_num_threads = 2048; if (lane > max_num_threads / 2) { return 0; diff --git a/paddle/cinn/hlir/dialect/operator/transforms/group_merge/op_with_group_merge_util.h b/paddle/cinn/hlir/dialect/operator/transforms/group_merge/op_with_group_merge_util.h index ef8aa1fd2d565d..5169ef85198933 100644 --- a/paddle/cinn/hlir/dialect/operator/transforms/group_merge/op_with_group_merge_util.h +++ b/paddle/cinn/hlir/dialect/operator/transforms/group_merge/op_with_group_merge_util.h @@ -97,14 +97,14 @@ inline bool is_same_size(::pir::Operation* producer, if (producer_shape == consumer_shape) { return true; } - auto psize = phi::product(producer_shape); - auto csize = phi::product(consumer_shape); + auto psize = ::common::product(producer_shape); + auto csize = ::common::product(consumer_shape); return psize == csize; } inline bool without_last_dimension_in_reduce( ::pir::Operation* producer, const std::shared_ptr& consumer) { - auto in_shape = phi::vectorize(GetFirstInputShape(producer)); + auto in_shape = ::common::vectorize(GetFirstInputShape(producer)); auto reduce_axes = GetVectorAttr(producer, "dim"); return WithoutLastDimInReduce(in_shape, reduce_axes); } @@ -121,14 +121,14 @@ inline bool reduce_fuse_reduce(::pir::Operation* producer, } // check reduce has same input shape and output shape auto producer_input_shape = - phi::vectorize(GetValueShape(producer->operand_source(0))); + ::common::vectorize(GetValueShape(producer->operand_source(0))); auto producer_output_shape = - phi::vectorize(GetValueShape(producer->result(0))); + ::common::vectorize(GetValueShape(producer->result(0))); auto reducer_input_shape = - phi::vectorize(GetValueShape(reducer->operand_source(0))); + ::common::vectorize(GetValueShape(reducer->operand_source(0))); auto reducer_output_shape = - phi::vectorize(GetValueShape(reducer->result(0))); + ::common::vectorize(GetValueShape(reducer->result(0))); auto producer_reduce_dim = GetVectorAttr(producer, "dim"); auto reducer_reduce_dim = GetVectorAttr(reducer, "dim"); @@ -238,7 +238,7 @@ inline bool horizontal_or_vertical_reduce_relation( } // check producer has same shape with reducer op. - auto reduce_shape = phi::vectorize(GetFirstInputShape(reducer)); + auto reduce_shape = ::common::vectorize(GetFirstInputShape(reducer)); auto reduce_axes = GetVectorAttr(reducer, "dim"); for (auto& axis : reduce_axes) { @@ -248,8 +248,9 @@ inline bool horizontal_or_vertical_reduce_relation( } } - auto op_shape = phi::vectorize(GetValueShape(producer->result(0))); - // auto op_shape = phi::vectorize(GetFirstInputShape(producer)); + auto op_shape = + ::common::vectorize(GetValueShape(producer->result(0))); + // auto op_shape = ::common::vectorize(GetFirstInputShape(producer)); auto op_size = std::accumulate( op_shape.begin(), op_shape.end(), 1, std::multiplies()); auto reduce_size = std::accumulate( @@ -273,7 +274,7 @@ inline bool horizontal_or_vertical_reduce_relation( break; } - // helper->target_ == common::DefaultNVGPUTarget() + // helper->target_ == cinn::common::DefaultNVGPUTarget() // succesive_reduce_dimension <= helper->target_.max_num_threads() // TODO(phlrain): support is_gpu_target and max_thread bool is_gpu_target = true; @@ -330,11 +331,12 @@ inline bool reduce_fuse_broadcast(::pir::Operation* producer, return false; } - // if (helper->target_ != common::DefaultNVGPUTarget()) { + // if (helper->target_ != cinn::common::DefaultNVGPUTarget()) { // return true; // } - auto rinput_shape = phi::vectorize(GetFirstInputShape(producer)); + auto rinput_shape = + ::common::vectorize(GetFirstInputShape(producer)); auto reduce_axes = GetVectorAttr(producer, "dim"); auto keep_dim = producer->attributes() .at("keep_dim") @@ -359,7 +361,7 @@ inline bool reduce_fuse_broadcast(::pir::Operation* producer, // } auto routput_shape = - phi::vectorize(GetValueShape(producer->result(0))); + ::common::vectorize(GetValueShape(producer->result(0))); auto find_reducer = [&](::pir::Operation* op, ::pir::Operation* reducer, diff --git a/paddle/cinn/hlir/dialect/runtime/ir/jit_kernel_op.cc b/paddle/cinn/hlir/dialect/runtime/ir/jit_kernel_op.cc index cb996d9c8bd74f..3d6b82dff36651 100644 --- a/paddle/cinn/hlir/dialect/runtime/ir/jit_kernel_op.cc +++ b/paddle/cinn/hlir/dialect/runtime/ir/jit_kernel_op.cc @@ -16,8 +16,8 @@ #include "paddle/cinn/hlir/dialect/operator/ir/op_attribute.h" #include "paddle/cinn/hlir/framework/pir_compiler.h" +#include "paddle/common/enforce.h" #include "paddle/pir/core/builtin_attribute.h" -#include "paddle/pir/core/enforce.h" namespace cinn { namespace dialect { diff --git a/paddle/cinn/hlir/framework/accuracy_checker.cc b/paddle/cinn/hlir/framework/accuracy_checker.cc index 95b5b4102414f5..82777b89ba52ad 100644 --- a/paddle/cinn/hlir/framework/accuracy_checker.cc +++ b/paddle/cinn/hlir/framework/accuracy_checker.cc @@ -237,7 +237,7 @@ std::string AccuracyChecker::CheckTensor(const Tensor& tensor, const std::string& arg_name) { Tensor cpu_tensor; cpu_tensor->Resize(tensor->shape()); - T* dst = cpu_tensor->mutable_data(common::DefaultHostTarget()); + T* dst = cpu_tensor->mutable_data(cinn::common::DefaultHostTarget()); const T* src = tensor->data(); size_t numel = tensor->shape().numel(); @@ -259,7 +259,7 @@ std::string AccuracyChecker::CheckBuffer(const cinn_buffer_t* buffer, Tensor cpu_tensor; cpu_tensor->Resize(Shape(shape)); - T* dst = cpu_tensor->mutable_data(common::DefaultHostTarget()); + T* dst = cpu_tensor->mutable_data(cinn::common::DefaultHostTarget()); const T* src = reinterpret_cast(buffer->memory); size_t numel = cpu_tensor->shape().numel(); @@ -273,12 +273,12 @@ std::string AccuracyChecker::CheckBuffer(const cinn_buffer_t* buffer, template void AccuracyChecker::MemcpyDeviceToHost(const T* src, size_t numel, T* dst) { #ifdef CINN_WITH_CUDA - if (target_ == common::DefaultNVGPUTarget()) { + if (target_ == cinn::common::DefaultNVGPUTarget()) { cudaMemcpy(dst, src, numel * sizeof(T), cudaMemcpyDeviceToHost); return; } #endif - if (target_ == common::DefaultHostTarget()) { + if (target_ == cinn::common::DefaultHostTarget()) { for (size_t i = 0; i < numel; ++i) { dst[i] = src[i]; } diff --git a/paddle/cinn/hlir/framework/accuracy_checker_test.cc b/paddle/cinn/hlir/framework/accuracy_checker_test.cc index f5070101f8eb79..05efb7bd925c6f 100644 --- a/paddle/cinn/hlir/framework/accuracy_checker_test.cc +++ b/paddle/cinn/hlir/framework/accuracy_checker_test.cc @@ -49,20 +49,20 @@ void SetRandomTensor(Tensor tensor, Target target, bool generate_nan) { GenerateRandomData(random_nan_vec.data(), numel, generate_nan); #ifdef CINN_WITH_CUDA - if (target == common::DefaultNVGPUTarget()) { + if (target == cinn::common::DefaultNVGPUTarget()) { cudaMemcpy(dst, random_nan_vec.data(), numel * sizeof(float), cudaMemcpyHostToDevice); } #endif - if (target == common::DefaultHostTarget()) { + if (target == cinn::common::DefaultHostTarget()) { std::copy(random_nan_vec.begin(), random_nan_vec.end(), dst); } } TEST(AccuracyChecker, tensor) { - Target target = common::DefaultTarget(); + Target target = cinn::common::DefaultTarget(); Scope scope; scope.Var("x"); auto out = scope.GetTensor("x"); @@ -106,7 +106,7 @@ void InstantiateScope(Scope* scope, Target target) { } TEST(AccuracyChecker, instruction) { - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); Scope scope; InstantiateScope(&scope, target); @@ -148,7 +148,7 @@ void InitName2PodArgs(Target target, } TEST(AccuracyChecker, instruction_podargs) { - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); std::vector args_buffer(2); std::map name2podargs; InitName2PodArgs(target, &args_buffer, &name2podargs); diff --git a/paddle/cinn/hlir/framework/buffer.cc b/paddle/cinn/hlir/framework/buffer.cc index 83427abe9cbe7d..524433c165a5cf 100755 --- a/paddle/cinn/hlir/framework/buffer.cc +++ b/paddle/cinn/hlir/framework/buffer.cc @@ -44,7 +44,7 @@ void Buffer::Resize(uint32_t alignment, uint32_t size) { } } -void Buffer::SetTarget(const common::Target& target) { +void Buffer::SetTarget(const cinn::common::Target& target) { target_ = target; memory_mng_cache_ = MemoryManager::Global().RetrieveSafely(target_.arch); } @@ -59,7 +59,7 @@ void Buffer::ResizeLazy(uint32_t alignment, uint32_t size) { Resize(alignment, size); } -void Buffer::Resize(uint32_t size, const common::Target& target) { +void Buffer::Resize(uint32_t size, const cinn::common::Target& target) { if (target.arch != target_.arch) { Free(); SetTarget(target); @@ -69,7 +69,7 @@ void Buffer::Resize(uint32_t size, const common::Target& target) { void Buffer::Resize(uint32_t alignment, uint32_t size, - const common::Target& target) { + const cinn::common::Target& target) { if (target.arch != target_.arch) { Free(); SetTarget(target); @@ -77,7 +77,7 @@ void Buffer::Resize(uint32_t alignment, Resize(alignment, size); } -void Buffer::ResizeLazy(uint32_t size, const common::Target& target) { +void Buffer::ResizeLazy(uint32_t size, const cinn::common::Target& target) { if (target.arch != target_.arch) { Free(); SetTarget(target); @@ -87,7 +87,7 @@ void Buffer::ResizeLazy(uint32_t size, const common::Target& target) { void Buffer::ResizeLazy(uint32_t alignment, uint32_t size, - const common::Target& target) { + const cinn::common::Target& target) { if (target.arch != target_.arch) { Free(); SetTarget(target); diff --git a/paddle/cinn/hlir/framework/buffer.h b/paddle/cinn/hlir/framework/buffer.h index 4d5e7cb0afbeaf..78d832c5493b4d 100644 --- a/paddle/cinn/hlir/framework/buffer.h +++ b/paddle/cinn/hlir/framework/buffer.h @@ -34,7 +34,7 @@ namespace framework { */ struct Buffer final { Buffer() = default; - explicit Buffer(const common::Target& target) { SetTarget(target); } + explicit Buffer(const cinn::common::Target& target) { SetTarget(target); } ~Buffer() { Free(); } //! Resize the memory hold by this buffer *exactlly* to \p size. void Resize(uint32_t size); @@ -45,16 +45,18 @@ struct Buffer final { void ResizeLazy(uint32_t alignment, uint32_t size); //! Resize the memory to \p size in target \p target. - void Resize(uint32_t size, const common::Target& target); - void Resize(uint32_t alignment, uint32_t size, const common::Target& target); + void Resize(uint32_t size, const cinn::common::Target& target); + void Resize(uint32_t alignment, + uint32_t size, + const cinn::common::Target& target); //! Lazily resize the memory to \p size in target \p target. - void ResizeLazy(uint32_t size, const common::Target& target); + void ResizeLazy(uint32_t size, const cinn::common::Target& target); void ResizeLazy(uint32_t alignment, uint32_t size, - const common::Target& target); + const cinn::common::Target& target); - void SetTarget(const common::Target& target); + void SetTarget(const cinn::common::Target& target); const cinn_buffer_t* data() const { return &data_; } cinn_buffer_t* data() { return &data_; } @@ -81,7 +83,7 @@ struct Buffer final { cinn_buffer_t data_; //! The place where this buffer locates. - common::Target target_; + cinn::common::Target target_; //! Number of bytes of this buffer. uint32_t size_{}; diff --git a/paddle/cinn/hlir/framework/buffer_test.cc b/paddle/cinn/hlir/framework/buffer_test.cc index 6687077b263098..2fa644321b29b5 100755 --- a/paddle/cinn/hlir/framework/buffer_test.cc +++ b/paddle/cinn/hlir/framework/buffer_test.cc @@ -25,7 +25,7 @@ namespace hlir { namespace framework { TEST(Buffer, basic) { - Buffer buffer(common::DefaultHostTarget()); + Buffer buffer(cinn::common::DefaultHostTarget()); buffer.Resize(10 * sizeof(float)); auto* data = reinterpret_cast(buffer.data()->memory); for (int i = 0; i < 10; i++) data[i] = i; @@ -34,7 +34,7 @@ TEST(Buffer, basic) { #ifdef CINN_WITH_CUDA TEST(Buffer, nvgpu) { const int num_elements = 10; - Buffer buffer(common::DefaultNVGPUTarget()); + Buffer buffer(cinn::common::DefaultNVGPUTarget()); buffer.Resize(num_elements * sizeof(float)); auto* data = reinterpret_cast(buffer.data()->memory); std::vector host_data(num_elements); diff --git a/paddle/cinn/hlir/framework/graph.cc b/paddle/cinn/hlir/framework/graph.cc index faf8cd9f38c7e6..c94f150e3937c5 100644 --- a/paddle/cinn/hlir/framework/graph.cc +++ b/paddle/cinn/hlir/framework/graph.cc @@ -31,7 +31,7 @@ namespace cinn { namespace hlir { namespace framework { -using DTypeDict = absl::flat_hash_map; +using DTypeDict = absl::flat_hash_map; using ShapeDict = absl::flat_hash_map; void Graph::Initialize(const frontend::Program& prog, @@ -51,7 +51,7 @@ void Graph::Initialize(const frontend::Program& prog, Shared node_ptr(node_tmp); node_tmp->attrs.attr_store = temp->attrs; for (auto& input_v : temp->inputs) { - common::GraphNode* graph_node = this->RetrieveNode(input_v->id); + cinn::common::GraphNode* graph_node = this->RetrieveNode(input_v->id); if (!graph_node) { dtype_dict[input_v->id] = input_v->type; shape_dict[input_v->id] = input_v->shape; @@ -65,7 +65,7 @@ void Graph::Initialize(const frontend::Program& prog, } int out_idx = 0; for (auto& output_v : temp->outputs) { - common::GraphNode* graph_node = this->RetrieveNode(output_v->id); + cinn::common::GraphNode* graph_node = this->RetrieveNode(output_v->id); if (!graph_node) { dtype_dict[output_v->id] = output_v->type; shape_dict[output_v->id] = output_v->shape; @@ -92,10 +92,11 @@ std::vector> Graph::FusionGroupsToGroups() { std::vector> groups; if (fusion_groups.empty()) { // if no fusion_groups, the graph will be treated as a big group - const auto& nodes = this->CollectNodes([](const common::GraphNode* node) { - return node->safe_as() != nullptr && - node->safe_as()->op() != nullptr; - }); + const auto& nodes = + this->CollectNodes([](const cinn::common::GraphNode* node) { + return node->safe_as() != nullptr && + node->safe_as()->op() != nullptr; + }); std::vector group; group.reserve(nodes.size()); for (auto* node : nodes) { @@ -200,8 +201,9 @@ std::string Graph::DebugGroupedGraph( const auto& shape = shape_dict.count(id) ? cinn::utils::Join(shape_dict.at(id), ", ") : "-1"; - const auto& dtype = - dtype_dict.count(id) ? common::Type2Str(dtype_dict.at(id)) : "float32"; + const auto& dtype = dtype_dict.count(id) + ? cinn::common::Type2Str(dtype_dict.at(id)) + : "float32"; // generator python create_input code debug_str << " " << id << " = builder.create_input(type=\"" << dtype diff --git a/paddle/cinn/hlir/framework/graph.h b/paddle/cinn/hlir/framework/graph.h index 4c014043a1e248..d6ef914f0846ec 100644 --- a/paddle/cinn/hlir/framework/graph.h +++ b/paddle/cinn/hlir/framework/graph.h @@ -198,16 +198,20 @@ class Graph : public cinn::common::Graph { std::vector> fusion_groups; void RegisterNode(size_t key, Node* node) { - this->common::Graph::RegisterNode(key, node->as()); + this->cinn::common::Graph::RegisterNode( + key, node->as()); } void RegisterNode(size_t key, NodeData* node) { - this->common::Graph::RegisterNode(key, node->as()); + this->cinn::common::Graph::RegisterNode( + key, node->as()); } void RegisterNode(const std::string& key, Node* node) { - this->common::Graph::RegisterNode(key, node->as()); + this->cinn::common::Graph::RegisterNode( + key, node->as()); } void RegisterNode(const std::string& key, NodeData* node) { - this->common::Graph::RegisterNode(key, node->as()); + this->cinn::common::Graph::RegisterNode( + key, node->as()); } /** diff --git a/paddle/cinn/hlir/framework/graph_compiler.cc b/paddle/cinn/hlir/framework/graph_compiler.cc index acd4387efb7121..ffa599805f13ef 100644 --- a/paddle/cinn/hlir/framework/graph_compiler.cc +++ b/paddle/cinn/hlir/framework/graph_compiler.cc @@ -277,7 +277,7 @@ void GraphCompiler::InsertBufferHandlers( const auto& malloc_var_names = m_it->second; auto function_name = "malloc_buffer_instruction_" + std::to_string(step); auto malloc_instr = - std::make_unique(common::DefaultHostTarget(), + std::make_unique(cinn::common::DefaultHostTarget(), context->scope.get(), malloc_var_names, std::vector({}), @@ -300,7 +300,7 @@ void GraphCompiler::InsertBufferHandlers( const auto& free_var_names = f_it->second; auto function_name = "free_buffer_instruction_" + std::to_string(step); auto free_instr = - std::make_unique(common::DefaultHostTarget(), + std::make_unique(cinn::common::DefaultHostTarget(), context->scope.get(), std::vector({}), free_var_names, @@ -350,7 +350,7 @@ std::shared_ptr BuildScope(Target target, std::vector GetFuncFromImpl( const std::shared_ptr& impl, - const common::CINNValuePack& cinn_inputs, + const cinn::common::CINNValuePack& cinn_inputs, std::vector& all_arg_tensors, // NOLINT const std::vector& input_output_nodes, const std::string& node_id, @@ -359,7 +359,7 @@ std::vector GetFuncFromImpl( utils::EventType::kOrdinary); // 1.Call Op's Compute function, using the default stages and LowerVec to get // IR tree. - common::CINNValuePack C = impl->fcompute(cinn_inputs); + cinn::common::CINNValuePack C = impl->fcompute(cinn_inputs); // 2. Collect tensors and arguments // Add output tensors to all_arg_tensors @@ -367,7 +367,7 @@ std::vector GetFuncFromImpl( ir::Expr temp = C[i]; // checkout whether the tensor is with buffer. if (!temp.as_tensor_ref()->buffer.defined() || - target != common::DefaultNVGPUTarget()) { + target != cinn::common::DefaultNVGPUTarget()) { all_arg_tensors.push_back(temp.as_tensor_ref()); } } @@ -386,18 +386,18 @@ std::vector GetFuncFromImpl( VLOG(4) << fun; } - std::vector schedule_inputs; + std::vector schedule_inputs; for (int i = 0; i < C.size() - 1; ++i) { CHECK(C[i].is_tensor()); - schedule_inputs.push_back(common::CINNValue(C[i])); + schedule_inputs.push_back(cinn::common::CINNValue(C[i])); } for (auto& f : funcs) { - schedule_inputs.push_back(common::CINNValue(f->body)); + schedule_inputs.push_back(cinn::common::CINNValue(f->body)); } // 3. Call Op's Schedule function, optimizing the IR tree by new IR schedule - common::CINNValuePack expr_pack = - impl->fschedule(common::CINNValuePack{schedule_inputs}); + cinn::common::CINNValuePack expr_pack = + impl->fschedule(cinn::common::CINNValuePack{schedule_inputs}); // 4. Optimize the LoweredFunc VLOG(3) << "expr_pack.size() is : " << expr_pack.size() diff --git a/paddle/cinn/hlir/framework/graph_compiler.h b/paddle/cinn/hlir/framework/graph_compiler.h index ddbc29b504efcf..d972fc856c825d 100644 --- a/paddle/cinn/hlir/framework/graph_compiler.h +++ b/paddle/cinn/hlir/framework/graph_compiler.h @@ -115,7 +115,7 @@ std::shared_ptr BuildScope(Target target, // Given params, lower the op to LoweredFunc using new IR Schedule std::vector GetFuncFromImpl( const std::shared_ptr& impl, - const common::CINNValuePack& cinn_inputs, + const cinn::common::CINNValuePack& cinn_inputs, std::vector& tensor_inputs, // NOLINT const std::vector& input_output_nodes, const std::string& node_id, diff --git a/paddle/cinn/hlir/framework/graph_compiler_test.cc b/paddle/cinn/hlir/framework/graph_compiler_test.cc index e9ee1a21a4edf8..49f959eb90174b 100644 --- a/paddle/cinn/hlir/framework/graph_compiler_test.cc +++ b/paddle/cinn/hlir/framework/graph_compiler_test.cc @@ -30,7 +30,7 @@ namespace cinn { namespace hlir { namespace framework { -using common::Float; +using cinn::common::Float; using frontend::Placeholder; TEST(GraphCompilerTest, TestRemoveInvaildVariables) { @@ -41,7 +41,7 @@ TEST(GraphCompilerTest, TestRemoveInvaildVariables) { auto c = builder.Add(a, b, 1); auto d = builder.Relu(c); - auto target = common::DefaultHostTarget(); + auto target = cinn::common::DefaultHostTarget(); auto program = builder.Build(); auto graph = Optimize(&program, {}, target); @@ -66,7 +66,7 @@ TEST(GraphCompilerTest, TestInsertBufferHandlers) { auto c = builder.Add(a, b, 1); auto d = builder.Relu(c); - auto target = common::DefaultHostTarget(); + auto target = cinn::common::DefaultHostTarget(); auto program = builder.Build(); auto graph = Optimize(&program, {}, target); auto scope = BuildScope(target, graph); @@ -191,7 +191,7 @@ void RunCublas( auto C = net_builder.Matmul(A, B, trans_a, trans_b); auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); auto graph = std::make_shared(program, target); hlir::framework::ApplyPass(graph.get(), "TransToCustomCallPass"); @@ -245,7 +245,7 @@ TEST(GraphCompilerTest, TestLowering) { auto c = builder.Add(a, b, 1); auto d = builder.Relu(c); - auto target = common::DefaultNVGPUTarget(); + auto target = cinn::common::DefaultNVGPUTarget(); auto program = builder.Build(); auto graph = Optimize(&program, {}, target); auto scope = BuildScope(target, graph); @@ -265,7 +265,7 @@ TEST(GraphCompilerTest, TestCodegenAndJit) { auto c = builder.Add(a, b, 1); auto d = builder.Relu(c); - auto target = common::DefaultNVGPUTarget(); + auto target = cinn::common::DefaultNVGPUTarget(); auto program = builder.Build(); auto graph = Optimize(&program, {}, target); auto scope = BuildScope(target, graph); @@ -285,7 +285,7 @@ TEST(GraphCompilerTest, TestBuildInstruction) { auto c = builder.Add(a, b, 1); auto d = builder.Relu(c); - auto target = common::DefaultNVGPUTarget(); + auto target = cinn::common::DefaultNVGPUTarget(); auto program = builder.Build(); auto graph = Optimize(&program, {}, target); auto scope = BuildScope(target, graph); diff --git a/paddle/cinn/hlir/framework/graph_test.cc b/paddle/cinn/hlir/framework/graph_test.cc index 42aec09eca3c2d..b9741931d25235 100644 --- a/paddle/cinn/hlir/framework/graph_test.cc +++ b/paddle/cinn/hlir/framework/graph_test.cc @@ -35,7 +35,7 @@ TEST(Graph, visualize) { auto reduce_sum_1 = builder.ReduceSum(relu_1, {1}); auto program = builder.Build(); - auto target = common::DefaultHostTarget(); + auto target = cinn::common::DefaultHostTarget(); auto graph = std::make_shared(program, target); ApplyPass(graph.get(), "OpFusion"); @@ -56,7 +56,7 @@ TEST(Graph, visualize_recompute) { auto add_3 = builder.Add(y, broadcast_to_2); auto program = builder.Build(); - auto target = common::DefaultHostTarget(); + auto target = cinn::common::DefaultHostTarget(); auto graph = std::make_shared(program, target); ApplyPass(graph.get(), "OpFusionPass"); ApplyPass(graph.get(), "FusionMergePass"); diff --git a/paddle/cinn/hlir/framework/instruction.cc b/paddle/cinn/hlir/framework/instruction.cc index d9b624e518e22d..7a85318654efc5 100644 --- a/paddle/cinn/hlir/framework/instruction.cc +++ b/paddle/cinn/hlir/framework/instruction.cc @@ -83,7 +83,7 @@ void Instruction::UpdateArgsCache( args_cached_.resize(cache_size); for (int i = 0; i < cache_size; ++i) { - common::ArgsBuilder builder; + cinn::common::ArgsBuilder builder; std::vector all_args = in_args_[i]; all_args.insert( std::end(all_args), out_args_[i].begin(), out_args_[i].end()); @@ -175,7 +175,7 @@ void Instruction::Run( CHECK(fn_ptrs_[idx]) << "The LoweredFunc address should be set first by " "calling SetLoweredFunc method"; if (!dryrun) { - if (target_ == common::DefaultNVGPUTarget()) { + if (target_ == cinn::common::DefaultNVGPUTarget()) { ((lower_func_ptr_g)fn_ptrs_[idx])( static_cast(pod_args.data()), pod_args.size(), stream); } else { @@ -211,7 +211,7 @@ void Instruction::Run( pod_args[1], pod_args[2], static_cast(stream), - common::Layout::kNHWC); + cinn::common::Layout::kNHWC); } else { absl::flat_hash_map attrs_map = { @@ -231,7 +231,7 @@ void Instruction::Run( pod_args[1], pod_args[2], static_cast(stream), - common::Layout::kNCHW); + cinn::common::Layout::kNCHW); } } else if (str_attrs[0] == "backward_data") { // w, dy, dx @@ -322,7 +322,7 @@ void Instruction::Run( CHECK(fn_ptrs_[idx]) << "The LoweredFunc address should be set first by " "calling SetLoweredFunc method"; if (!dryrun) { - if (target_ == common::DefaultNVGPUTarget()) { + if (target_ == cinn::common::DefaultNVGPUTarget()) { ((lower_func_ptr_g)fn_ptrs_[idx])( static_cast(pod_args.data()), pod_args.size(), stream); } else { @@ -341,7 +341,7 @@ void Instruction::Run( CHECK(fn_ptrs_[idx]) << "The LoweredFunc address should be set first by " "calling SetLoweredFunc method"; if (!dryrun) { - if (target_ == common::DefaultNVGPUTarget()) { + if (target_ == cinn::common::DefaultNVGPUTarget()) { ((lower_func_ptr_g)fn_ptrs_[idx])( static_cast(pod_args.data()), pod_args.size(), stream); } else { diff --git a/paddle/cinn/hlir/framework/instruction.h b/paddle/cinn/hlir/framework/instruction.h index c6ec1aae014f5f..4e1a92e4b1c46a 100644 --- a/paddle/cinn/hlir/framework/instruction.h +++ b/paddle/cinn/hlir/framework/instruction.h @@ -109,7 +109,7 @@ class Instruction { auto& pod_args = args_cached_[idx]; CHECK(fn_ptrs_[idx]) << "The LoweredFunc address should be set first " "by calling SetLoweredFunc method"; - if (target_ == common::DefaultNVGPUTarget()) { + if (target_ == cinn::common::DefaultNVGPUTarget()) { ((lower_func_ptr_g)fn_ptrs_[idx])( static_cast(pod_args.data()), pod_args.size(), stream); } else { diff --git a/paddle/cinn/hlir/framework/instruction_test.cc b/paddle/cinn/hlir/framework/instruction_test.cc index 85c99282ee747d..2e2b412cf4fdff 100644 --- a/paddle/cinn/hlir/framework/instruction_test.cc +++ b/paddle/cinn/hlir/framework/instruction_test.cc @@ -46,7 +46,7 @@ std::unique_ptr GetLoweredFunc(int M, int N) { auto stages = CreateStages({z}); auto fn = Lower("fn", stages, {x, y, z}); - ir::Module::Builder builder("some_module", common::DefaultHostTarget()); + ir::Module::Builder builder("some_module", cinn::common::DefaultHostTarget()); builder.AddFunction(fn); auto jit = backends::SimpleJIT::Create(); @@ -59,7 +59,7 @@ void InstantiateScope(int M, int N, Scope* scope) { auto* var = scope->Var(name); auto& tensor = absl::get(*var); tensor->Resize(Shape{{M, N}}); - auto* data = tensor->mutable_data(common::DefaultHostTarget()); + auto* data = tensor->mutable_data(cinn::common::DefaultHostTarget()); for (int i = 0; i < M * N; i++) { data[i] = (rand() * 1.f) / RAND_MAX; // NOLINT } @@ -73,7 +73,8 @@ TEST(Instruction, basic) { Scope scope; InstantiateScope(M, N, &scope); // create Instruction - Instruction instr(common::DefaultHostTarget(), &scope, {"x", "y"}, {"z"}); + Instruction instr( + cinn::common::DefaultHostTarget(), &scope, {"x", "y"}, {"z"}); auto jit = GetLoweredFunc(M, N); auto fn_ptr = jit->Lookup("fn"); CHECK(fn_ptr); @@ -106,8 +107,8 @@ TEST(Instruction, RunWithRawPodArgs) { // case 1: create cinn_pod_value_t arguments dicrectly std::vector args_buffer( 3); // store {"x", "y", "z"} buffer objects - auto* default_memory_mng = - MemoryManager::Global().RetrieveSafely(common::DefaultHostTarget().arch); + auto* default_memory_mng = MemoryManager::Global().RetrieveSafely( + cinn::common::DefaultHostTarget().arch); int count = 0; for (const auto& name : std::vector({"x", "y", "z"})) { @@ -128,8 +129,10 @@ TEST(Instruction, RunWithRawPodArgs) { auto jit = GetLoweredFunc(M, N); auto fn_ptr = jit->Lookup("fn"); CHECK(fn_ptr); - Instruction instr( - common::DefaultHostTarget(), nullptr, {"x", "y"}, {"z"}); // empty scope + Instruction instr(cinn::common::DefaultHostTarget(), + nullptr, + {"x", "y"}, + {"z"}); // empty scope instr.SetLoweredFunc(reinterpret_cast(fn_ptr)); instr.Finalize(); @@ -312,12 +315,15 @@ TEST(Instruction, CONV_FORWARD) { Operator::GetAttrs("infershape")[conv2d]; CUDA_CALL(cudaSetDevice(0)); - auto buffer_x = - common::BufferBuilder(Float(32), {in, ic, ih, iw}).set_random().Build(); - auto buffer_w = - common::BufferBuilder(Float(32), {fn, fc, fh, fw}).set_random().Build(); - auto buffer_y = - common::BufferBuilder(Float(32), {on, oc, oh, ow}).set_random().Build(); + auto buffer_x = cinn::common::BufferBuilder(Float(32), {in, ic, ih, iw}) + .set_random() + .Build(); + auto buffer_w = cinn::common::BufferBuilder(Float(32), {fn, fc, fh, fw}) + .set_random() + .Build(); + auto buffer_y = cinn::common::BufferBuilder(Float(32), {on, oc, oh, ow}) + .set_random() + .Build(); void *dev_x = nullptr, *dev_w = nullptr, *dev_y = nullptr; CUDA_CALL(cudaMalloc(&dev_x, buffer_x->memory_size)); @@ -353,7 +359,7 @@ TEST(Instruction, CONV_FORWARD) { std::vector pod_args = {x, w, y}; Scope scope; - auto target = common::DefaultNVGPUTarget(); + auto target = cinn::common::DefaultNVGPUTarget(); std::vector in_args, out_args; TestInstruction instr(target, &scope, in_args, out_args, "conv2d"); @@ -403,12 +409,15 @@ TEST(Instruction, CONV_BACKWARD_DATA) { Operator::GetAttrs("infershape")[conv2d]; CUDA_CALL(cudaSetDevice(0)); - auto buffer_x = - common::BufferBuilder(Float(32), {in, ic, ih, iw}).set_random().Build(); - auto buffer_w = - common::BufferBuilder(Float(32), {fn, fc, fh, fw}).set_random().Build(); - auto buffer_y = - common::BufferBuilder(Float(32), {on, oc, oh, ow}).set_random().Build(); + auto buffer_x = cinn::common::BufferBuilder(Float(32), {in, ic, ih, iw}) + .set_random() + .Build(); + auto buffer_w = cinn::common::BufferBuilder(Float(32), {fn, fc, fh, fw}) + .set_random() + .Build(); + auto buffer_y = cinn::common::BufferBuilder(Float(32), {on, oc, oh, ow}) + .set_random() + .Build(); void *dev_x = nullptr, *dev_w = nullptr, *dev_y = nullptr; CUDA_CALL(cudaMalloc(&dev_x, buffer_x->memory_size)); @@ -445,7 +454,7 @@ TEST(Instruction, CONV_BACKWARD_DATA) { std::vector pod_args = {w, y, x}; Scope scope; - auto target = common::DefaultNVGPUTarget(); + auto target = cinn::common::DefaultNVGPUTarget(); std::vector in_args, out_args; TestInstruction instr(target, &scope, in_args, out_args, "conv2d"); @@ -510,12 +519,15 @@ TEST(Instruction, CONV_BACKWARD_FILTER) { ASSERT_EQ(infer_shape[0][3], fw); CUDA_CALL(cudaSetDevice(0)); - auto buffer_x = - common::BufferBuilder(Float(32), {in, ic, ih, iw}).set_random().Build(); - auto buffer_w = - common::BufferBuilder(Float(32), {fn, fc, fh, fw}).set_random().Build(); - auto buffer_y = - common::BufferBuilder(Float(32), {on, oc, oh, ow}).set_random().Build(); + auto buffer_x = cinn::common::BufferBuilder(Float(32), {in, ic, ih, iw}) + .set_random() + .Build(); + auto buffer_w = cinn::common::BufferBuilder(Float(32), {fn, fc, fh, fw}) + .set_random() + .Build(); + auto buffer_y = cinn::common::BufferBuilder(Float(32), {on, oc, oh, ow}) + .set_random() + .Build(); void *dev_x = nullptr, *dev_w = nullptr, *dev_y = nullptr; CUDA_CALL(cudaMalloc(&dev_x, buffer_x->memory_size)); @@ -552,7 +564,7 @@ TEST(Instruction, CONV_BACKWARD_FILTER) { std::vector pod_args = {x, y, w}; Scope scope; - auto target = common::DefaultNVGPUTarget(); + auto target = cinn::common::DefaultNVGPUTarget(); std::vector in_args, out_args; TestInstruction instr(target, &scope, in_args, out_args, "conv2d"); diff --git a/paddle/cinn/hlir/framework/memory.cc b/paddle/cinn/hlir/framework/memory.cc index 6c567bb84f6b72..bfc33b31beda9e 100755 --- a/paddle/cinn/hlir/framework/memory.cc +++ b/paddle/cinn/hlir/framework/memory.cc @@ -25,7 +25,7 @@ namespace cinn { namespace hlir { namespace framework { -using common::Target; +using cinn::common::Target; namespace { diff --git a/paddle/cinn/hlir/framework/memory.h b/paddle/cinn/hlir/framework/memory.h old mode 100755 new mode 100644 index ee84433ed29e45..3b8c59887d7fed --- a/paddle/cinn/hlir/framework/memory.h +++ b/paddle/cinn/hlir/framework/memory.h @@ -41,7 +41,7 @@ class MemoryInterface { */ class MemoryManager final { public: - using key_t = common::Target::Arch; + using key_t = cinn::common::Target::Arch; static MemoryManager& Global() { static auto* x = new MemoryManager; @@ -69,7 +69,8 @@ class MemoryManager final { private: MemoryManager(); - absl::flat_hash_map> + absl::flat_hash_map> memory_mngs_; CINN_DISALLOW_COPY_AND_ASSIGN(MemoryManager); diff --git a/paddle/cinn/hlir/framework/node.cc b/paddle/cinn/hlir/framework/node.cc index 20b2eb90921f02..4f50d930f4c7e2 100644 --- a/paddle/cinn/hlir/framework/node.cc +++ b/paddle/cinn/hlir/framework/node.cc @@ -22,22 +22,26 @@ namespace cinn { namespace hlir { namespace framework { -std::tuple Node::LinkTo( +std::tuple Node::LinkTo( NodeData* other) { - return this->common::GraphNode::LinkTo(other->as()); + return this->cinn::common::GraphNode::LinkTo( + other->as()); } -std::tuple NodeData::LinkTo( +std::tuple NodeData::LinkTo( Node* other) { - return this->common::GraphNode::LinkTo(other->as()); + return this->cinn::common::GraphNode::LinkTo( + other->as()); } void Node::Controls(NodeData* other) { - return this->common::GraphNode::Controls(other->as()); + return this->cinn::common::GraphNode::Controls( + other->as()); } void NodeData::Controls(Node* other) { - return this->common::GraphNode::Controls(other->as()); + return this->cinn::common::GraphNode::Controls( + other->as()); } namespace { @@ -82,15 +86,17 @@ std::ostream& operator<<(std::ostream& os, const NodeAttr& node_attr) { } //! Using index to sort the input/output tensors -bool edge_index_compare(const common::Shared& a, - const common::Shared& b) { +bool edge_index_compare( + const cinn::common::Shared& a, + const cinn::common::Shared& b) { CHECK_NOTNULL(a.get()); CHECK_NOTNULL(b.get()); return a->index() < b->index(); } -std::vector> Node::inlinks_in_order() const { - std::vector> ordered_links; +std::vector> +Node::inlinks_in_order() const { + std::vector> ordered_links; for (auto& in_edge : this->inlinks()) { ordered_links.push_back(in_edge); CHECK_GE(in_edge->index(), 0) @@ -101,8 +107,9 @@ std::vector> Node::inlinks_in_order() const { return ordered_links; } -std::vector> Node::outlinks_in_order() const { - std::vector> ordered_links; +std::vector> +Node::outlinks_in_order() const { + std::vector> ordered_links; for (auto& out_edge : this->outlinks()) { ordered_links.push_back(out_edge); CHECK_GE(out_edge->index(), 0) @@ -113,7 +120,7 @@ std::vector> Node::outlinks_in_order() const { return ordered_links; } -NodeData* InsertGraphOpNodeAfter(common::Graph* graph, +NodeData* InsertGraphOpNodeAfter(cinn::common::Graph* graph, Node* insert_node, NodeData* input_nodedata, Node* out_node, @@ -122,11 +129,11 @@ NodeData* InsertGraphOpNodeAfter(common::Graph* graph, CHECK(insert_node); CHECK(input_nodedata); input_nodedata->Controls(insert_node); - common::Shared node_ptr(insert_node); + cinn::common::Shared node_ptr(insert_node); auto* out_nodedata = new NodeData( - node_ptr, 0, 0, common::UniqName(insert_node->id() + "_out")); + node_ptr, 0, 0, cinn::common::UniqName(insert_node->id() + "_out")); insert_node->Controls(out_nodedata); - std::vector old_sources; + std::vector old_sources; auto input_links = out_node->inlinks_in_order(); if (out_node) { @@ -151,7 +158,7 @@ NodeData* InsertGraphOpNodeAfter(common::Graph* graph, return out_nodedata; } -NodeData* InsertGraphOpNodeBefore(common::Graph* graph, +NodeData* InsertGraphOpNodeBefore(cinn::common::Graph* graph, Node* insert_node, Node* input_node, NodeData* dst_data, @@ -161,9 +168,9 @@ NodeData* InsertGraphOpNodeBefore(common::Graph* graph, CHECK(input_node); CHECK(dst_data); auto node_ptr = dst_data->source_node; - auto* input_node_out = - new NodeData(node_ptr, 0, 0, common::UniqName(input_node->id() + "_out")); - std::vector old_sinks; + auto* input_node_out = new NodeData( + node_ptr, 0, 0, cinn::common::UniqName(input_node->id() + "_out")); + std::vector old_sinks; const auto& old_outlinks = input_node->outlinks_in_order(); for (auto& link : old_outlinks) { auto sink = link->sink(); @@ -173,7 +180,7 @@ NodeData* InsertGraphOpNodeBefore(common::Graph* graph, } input_node_out->Controls(insert_node); insert_node->Controls(dst_data); - dst_data->source_node = common::Shared(insert_node); + dst_data->source_node = cinn::common::Shared(insert_node); for (int i = 0; i < old_sinks.size(); i++) { if (i == pos) { diff --git a/paddle/cinn/hlir/framework/node.h b/paddle/cinn/hlir/framework/node.h index 31d316bbbff8d4..764492df45f383 100644 --- a/paddle/cinn/hlir/framework/node.h +++ b/paddle/cinn/hlir/framework/node.h @@ -32,7 +32,7 @@ namespace framework { class Node; class NodeData; -using NodePtr = common::Shared; +using NodePtr = cinn::common::Shared; using AttrType = utils::Attribute; using AttrMapType = utils::AttributeMap; @@ -65,7 +65,7 @@ std::ostream &operator<<(std::ostream &os, const NodeAttr &node_attr); /** * \brief Node represents an operation in a computation graph. */ -class Node : public common::GraphNode { +class Node : public cinn::common::GraphNode { public: Node() = default; Node(const Operator *op, const std::string &name, std::string id = {}) { @@ -74,7 +74,8 @@ class Node : public common::GraphNode { this->id_ = std::move(id); } const char *type_info() const override { return __type_info__; } - std::tuple LinkTo(NodeData *other); + std::tuple LinkTo( + NodeData *other); // This node determines another node, which means the other node depeneds on // this node. @@ -92,11 +93,13 @@ class Node : public common::GraphNode { //! Get the input tensors in order to match tensors correctly. If do refresh, //! we will update the links. - std::vector> inlinks_in_order() const; + std::vector> inlinks_in_order() + const; //! Get the output tensors in order to match tensors correctly. If do refresh, //! we will update the links. - std::vector> outlinks_in_order() const; + std::vector> outlinks_in_order() + const; inline const Operator *op() const { return this->attrs.op; } @@ -123,7 +126,7 @@ class Node : public common::GraphNode { template static NodePtr Create(Args &&...args) { - return common::Shared(new Node(std::forward(args)...)); + return cinn::common::Shared(new Node(std::forward(args)...)); } static constexpr char *__type_info__ = "hlir_framework_node"; @@ -138,7 +141,7 @@ class Node : public common::GraphNode { /** * \brief NodeData represents the output data from an operator. */ -class NodeData : public common::GraphNode { +class NodeData : public cinn::common::GraphNode { using attr_t = AttrType; public: @@ -155,7 +158,8 @@ class NodeData : public common::GraphNode { NodeData() : source_node(), output_index(), version(), id_(), is_const_() {} - std::tuple LinkTo(Node *other); + std::tuple LinkTo( + Node *other); // This node determines another node, which means the other node depeneds on // this node. @@ -219,13 +223,13 @@ class NodeData : public common::GraphNode { }; // insert op_node after input_data -NodeData *InsertGraphOpNodeAfter(common::Graph *graph, +NodeData *InsertGraphOpNodeAfter(cinn::common::Graph *graph, Node *insert_node, NodeData *input_nodedata, Node *dst_node, int pos); // insert op_node before out_data -NodeData *InsertGraphOpNodeBefore(common::Graph *graph, +NodeData *InsertGraphOpNodeBefore(cinn::common::Graph *graph, Node *insert_node, Node *input_node, NodeData *dst_data, diff --git a/paddle/cinn/hlir/framework/op_lowering.h b/paddle/cinn/hlir/framework/op_lowering.h index 87d89360d4fff0..8e69ebe4a618fc 100644 --- a/paddle/cinn/hlir/framework/op_lowering.h +++ b/paddle/cinn/hlir/framework/op_lowering.h @@ -30,7 +30,7 @@ namespace cinn { namespace hlir { namespace framework { -using common::Target; +using cinn::common::Target; using GroupPtr = std::shared_ptr; template diff --git a/paddle/cinn/hlir/framework/op_lowering_impl.cc b/paddle/cinn/hlir/framework/op_lowering_impl.cc index f955e7b96cf61a..0db3788b007cca 100644 --- a/paddle/cinn/hlir/framework/op_lowering_impl.cc +++ b/paddle/cinn/hlir/framework/op_lowering_impl.cc @@ -32,8 +32,8 @@ namespace cinn { namespace hlir { namespace framework { -using common::bfloat16; -using common::float16; +using cinn::common::bfloat16; +using cinn::common::float16; using framework::Node; using framework::NodeData; @@ -41,7 +41,7 @@ using framework::OpPatternKind; using framework::shape_t; using framework::StrategyFunction; -using common::Type; +using cinn::common::Type; using cinn::hlir::op::ExternalApiRegistry; @@ -211,10 +211,11 @@ std::vector OpLowererImpl::LowerCustomCall( } else { external_api = ExternalApiRegistry::Global()->GetExternalApi(node, target_); } - std::vector compute_args = { - common::CINNValue(group->GetFuncName()), common::CINNValue(external_api)}; - common::CINNValuePack pack = - impl->fcompute(common::CINNValuePack{compute_args}); + std::vector compute_args = { + cinn::common::CINNValue(group->GetFuncName()), + cinn::common::CINNValue(external_api)}; + cinn::common::CINNValuePack pack = + impl->fcompute(cinn::common::CINNValuePack{compute_args}); if (pack.size() != 1) { std::ostringstream err_msg; err_msg << "Lowering custom call, group func name: " << group->GetFuncName() @@ -371,19 +372,19 @@ std::vector OpLowererImpl::DoOpLower( std::unordered_map* tensor_map, std::vector* op_func_arg_tensors) { VLOG(4) << "Do lower with Compute, op: " << node->op()->name; - std::vector cinn_inputs; + std::vector cinn_inputs; for (const ir::Tensor& tensor : *op_func_arg_tensors) { - cinn_inputs.push_back(common::CINNValue(ir::Expr(tensor))); + cinn_inputs.push_back(cinn::common::CINNValue(ir::Expr(tensor))); } // set tensor name = node data name std::vector node_datas = GetAllNodeData(node); for (const NodeData* node_data : node_datas) { - cinn_inputs.push_back(common::CINNValue(node_data->id())); + cinn_inputs.push_back(cinn::common::CINNValue(node_data->id())); } // 1.Do compute - common::CINNValuePack pack = - op_impl->fcompute(common::CINNValuePack{cinn_inputs}); + cinn::common::CINNValuePack pack = + op_impl->fcompute(cinn::common::CINNValuePack{cinn_inputs}); poly::StageMap tmp_stages = pack.back(); std::string post = ""; @@ -405,7 +406,7 @@ std::vector OpLowererImpl::DoOpLower( // Insert output tensors into function arg if (!expr.as_tensor_ref()->buffer.defined() || - this->target_ != common::DefaultNVGPUTarget()) { + this->target_ != cinn::common::DefaultNVGPUTarget()) { op_func_arg_tensors->push_back(expr.as_tensor_ref()); expr.as_tensor_ref()->WithBuffer(); } @@ -448,18 +449,18 @@ ir::Expr OpLowererImpl::DoOpSchedule( const std::vector& op_func_arg_tensors, const std::vector& lowered_funcs) { VLOG(4) << "Do op schedule"; - std::vector schedule_inputs; + std::vector schedule_inputs; // 1.Collect tensors for (const ir::Tensor& op_func_arg_tensor : op_func_arg_tensors) { - schedule_inputs.push_back(common::CINNValue(op_func_arg_tensor)); + schedule_inputs.push_back(cinn::common::CINNValue(op_func_arg_tensor)); } // 2.Collect bodies to be scheduled for (const ir::LoweredFunc& func : lowered_funcs) { - schedule_inputs.push_back(common::CINNValue(func->body)); + schedule_inputs.push_back(cinn::common::CINNValue(func->body)); } // 3.Do schedule on AST - common::CINNValuePack expr_pack = - op_impl->fschedule(common::CINNValuePack{schedule_inputs}); + cinn::common::CINNValuePack expr_pack = + op_impl->fschedule(cinn::common::CINNValuePack{schedule_inputs}); VLOG(4) << "After op schedule: " << expr_pack[0].operator ir::Expr(); return expr_pack[0].operator ir::Expr(); diff --git a/paddle/cinn/hlir/framework/op_lowering_impl.h b/paddle/cinn/hlir/framework/op_lowering_impl.h index 5a562f4d1cabd0..895b73c87bdf1b 100644 --- a/paddle/cinn/hlir/framework/op_lowering_impl.h +++ b/paddle/cinn/hlir/framework/op_lowering_impl.h @@ -37,7 +37,7 @@ namespace hlir { namespace framework { using GroupPtr = std::shared_ptr; -using common::Target; +using cinn::common::Target; class OpLowererImpl; typedef bool (OpLowererImpl::*ScheduleDetermineFunction)(Node*); diff --git a/paddle/cinn/hlir/framework/op_lowering_test.cc b/paddle/cinn/hlir/framework/op_lowering_test.cc index 602003719e5746..07fcc7a48e0164 100644 --- a/paddle/cinn/hlir/framework/op_lowering_test.cc +++ b/paddle/cinn/hlir/framework/op_lowering_test.cc @@ -34,7 +34,7 @@ using frontend::RunDecomposer; void CodeGen(const ir::LoweredFunc& func) { #ifdef CINN_WITH_CUDA - auto target = common::DefaultNVGPUTarget(); + auto target = cinn::common::DefaultNVGPUTarget(); Module::Builder builder("module_builder", target); builder.AddFunction(func); @@ -44,7 +44,7 @@ void CodeGen(const ir::LoweredFunc& func) { std::string code = ""; compiler->Build(module, code); #else - auto target = common::DefaultHostTarget(); + auto target = cinn::common::DefaultHostTarget(); ir::Module::Builder builder("Module_Builder", target); builder.AddFunction(func); @@ -58,7 +58,7 @@ void CodeGen(const ir::LoweredFunc& func) { void Compile(NetBuilder& net_builder) { // NOLINT auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); RunDecomposer(&program, target); auto graph = std::make_shared(program, target); @@ -1204,9 +1204,9 @@ TEST(OP_LOWERING, Reduce_Fusion_Test_21) { */ TEST(OpFusionPass, Block_Reduce_Fuse_Broadcast) { - int sm_count = common::DefaultNVGPUTarget().get_multi_processor_count(); + int sm_count = cinn::common::DefaultNVGPUTarget().get_multi_processor_count(); int max_threads_per_sm = - common::DefaultNVGPUTarget().get_max_threads_per_sm(); + cinn::common::DefaultNVGPUTarget().get_max_threads_per_sm(); int warp_reduce_threshold = sm_count * max_threads_per_sm / 32; int h = warp_reduce_threshold - 10; int w = 256; @@ -1222,9 +1222,9 @@ TEST(OpFusionPass, Block_Reduce_Fuse_Broadcast) { } TEST(OpFusionPass, Block_Reduce_Fuse_Elementwise) { - int sm_count = common::DefaultNVGPUTarget().get_multi_processor_count(); + int sm_count = cinn::common::DefaultNVGPUTarget().get_multi_processor_count(); int max_threads_per_sm = - common::DefaultNVGPUTarget().get_max_threads_per_sm(); + cinn::common::DefaultNVGPUTarget().get_max_threads_per_sm(); int warp_reduce_threshold = sm_count * max_threads_per_sm / 32; int h = warp_reduce_threshold - 10; int w = 256; @@ -1240,9 +1240,9 @@ TEST(OpFusionPass, Block_Reduce_Fuse_Elementwise) { Compile(net_builder); } TEST(OpFusionPass, Warp_Reduce_Fuse_Broadcast) { - int sm_count = common::DefaultNVGPUTarget().get_multi_processor_count(); + int sm_count = cinn::common::DefaultNVGPUTarget().get_multi_processor_count(); int max_threads_per_sm = - common::DefaultNVGPUTarget().get_max_threads_per_sm(); + cinn::common::DefaultNVGPUTarget().get_max_threads_per_sm(); int warp_reduce_threshold = sm_count * max_threads_per_sm / 32; int h = warp_reduce_threshold + 10; int w = 256; @@ -1258,9 +1258,9 @@ TEST(OpFusionPass, Warp_Reduce_Fuse_Broadcast) { } TEST(OpFusionPass, Warp_Reduce_Fuse_Elementwise) { - int sm_count = common::DefaultNVGPUTarget().get_multi_processor_count(); + int sm_count = cinn::common::DefaultNVGPUTarget().get_multi_processor_count(); int max_threads_per_sm = - common::DefaultNVGPUTarget().get_max_threads_per_sm(); + cinn::common::DefaultNVGPUTarget().get_max_threads_per_sm(); int warp_reduce_threshold = sm_count * max_threads_per_sm / 32; int h = warp_reduce_threshold + 10; int w = 256; diff --git a/paddle/cinn/hlir/framework/op_lowering_util.cc b/paddle/cinn/hlir/framework/op_lowering_util.cc index 1af9ef05763517..5a332324c7c89b 100644 --- a/paddle/cinn/hlir/framework/op_lowering_util.cc +++ b/paddle/cinn/hlir/framework/op_lowering_util.cc @@ -53,11 +53,11 @@ ir::Tensor GetTensor( return lang::Placeholder(node_data->id(), shape_dict.at(node_data->id())); } else if (dtype.is_bfloat16()) { - return lang::Placeholder(node_data->id(), - shape_dict.at(node_data->id())); + return lang::Placeholder( + node_data->id(), shape_dict.at(node_data->id())); } else if (dtype.is_float16()) { - return lang::Placeholder(node_data->id(), - shape_dict.at(node_data->id())); + return lang::Placeholder( + node_data->id(), shape_dict.at(node_data->id())); } else if (dtype.is_bool()) { return lang::Placeholder(node_data->id(), shape_dict.at(node_data->id())); @@ -546,7 +546,7 @@ bool WithoutLastDimInReduce(const std::vector& shape, void LoopOrderAssignReduce(ir::IRSchedule& ir_sch, // NOLINT const std::string& block_name, const std::vector& axes, - const common::Target& target, + const cinn::common::Target& target, const bool just_reorder = false) { // reorder none-last reduce axis to last. // like: shape = [16,16,16,16,16],axes = [1,3] -> new order = [0, 2, 4, 1, 3]. @@ -597,7 +597,7 @@ void LoopAssignReduceWithoutLast(ir::IRSchedule& ir_sch, // NOLINT const std::string& block_name, const std::vector& inshape, const std::vector& axes, - const common::Target& target) { + const cinn::common::Target& target) { int tail = 0; bool bound = true; auto shape = pe::GetFirstStepReduceShape(inshape, axes, bound, tail); @@ -711,11 +711,11 @@ void LoopAssignReduceWithLast(ir::IRSchedule& ir_sch, // NOLINT const std::string& block_name, const std::vector& inshape, const std::vector& axes, - const common::Target& target) { + const cinn::common::Target& target) { // If the number of current device SM is smaller than the number of SM // required by Warp Reduce, the performance of Warp Reduce is better. // Otherwise, use Block Reduce. - auto max_num_threads = common::DefaultNVGPUTarget().max_num_threads(); + auto max_num_threads = cinn::common::DefaultNVGPUTarget().max_num_threads(); int need_reduce_last_count = 1; for (int i = 0; i < inshape.size(); i++) { if (find(axes.begin(), axes.end(), i) == axes.end()) { diff --git a/paddle/cinn/hlir/framework/op_strategy.h b/paddle/cinn/hlir/framework/op_strategy.h index b782e943b2c217..b0ff691828860c 100644 --- a/paddle/cinn/hlir/framework/op_strategy.h +++ b/paddle/cinn/hlir/framework/op_strategy.h @@ -36,12 +36,12 @@ using StrategyFunction = std::function( const std::vector&, const std::vector&, const std::vector>&, - const common::Target&)>; + const cinn::common::Target&)>; using InferShapeFunction = std::function>( const std::vector>&, const AttrMapType&)>; //! Operator implementation that includes compute and schedule function. -class OpImpl : public common::Object { +class OpImpl : public cinn::common::Object { public: //! Compute function CINNCompute fcompute; @@ -72,7 +72,7 @@ class OpImpl : public common::Object { * @param target The build target. * @return The computation schedule. */ - common::Shared GetSchedule( + cinn::common::Shared GetSchedule( const std::vector& outs, const std::vector& temp_tensors, const Target& target) { @@ -88,7 +88,7 @@ class OpImpl : public common::Object { }; //! Specialized implementations for operators under certain conditions. -class OpSpec : public common::Object { +class OpSpec : public cinn::common::Object { public: //! List of implementations. std::vector> implementations; @@ -119,7 +119,7 @@ class OpSpec : public common::Object { }; //! Operator strategy class. -class OpStrategy : public common::Object { +class OpStrategy : public cinn::common::Object { public: const char* type_info() const override { return __type_info__; } //! List of operator specializations. diff --git a/paddle/cinn/hlir/framework/op_test.cc b/paddle/cinn/hlir/framework/op_test.cc index 6648cbac3e17fe..bba9baf2c60c66 100644 --- a/paddle/cinn/hlir/framework/op_test.cc +++ b/paddle/cinn/hlir/framework/op_test.cc @@ -46,7 +46,7 @@ TEST(Operator, GetAttrs) { NodeAttr attrs; std::vector inputs{A, B}; std::vector type{Float(32)}; - common::Target target = common::DefaultHostTarget(); + cinn::common::Target target = cinn::common::DefaultHostTarget(); auto impl = OpStrategy::SelectImpl( strategy[add](attrs, inputs, type, {{100, 32}}, target)); @@ -56,10 +56,10 @@ TEST(Operator, GetAttrs) { std::string func_name = "add1"; std::string out_name = "C"; - common::CINNValuePack cinn_input = - common::CINNValuePack{{common::CINNValue(A), - common::CINNValue(B), - common::CINNValue(out_name)}}; + cinn::common::CINNValuePack cinn_input = + cinn::common::CINNValuePack{{cinn::common::CINNValue(A), + cinn::common::CINNValue(B), + cinn::common::CINNValue(out_name)}}; std::vector input_output_names{"A", "B", out_name}; auto funcs = framework::GetFuncFromImpl( diff --git a/paddle/cinn/hlir/framework/parallel_compiler.cc b/paddle/cinn/hlir/framework/parallel_compiler.cc index 3a15f7c42bef0d..57055a9eb20225 100644 --- a/paddle/cinn/hlir/framework/parallel_compiler.cc +++ b/paddle/cinn/hlir/framework/parallel_compiler.cc @@ -229,13 +229,14 @@ void ParallelCompiler::Task::CodegenAndJit() { VLOG(2) << "Start Codegen and JIT on Group " << group_id << " at thread: " << std::this_thread::get_id(); // build module - ir::Module::Builder builder(common::UniqName("module"), context->target); + ir::Module::Builder builder(cinn::common::UniqName("module"), + context->target); for (auto& func : pcompiler->result_.LoweredFuncs(group_id)) { builder.AddFunction(func); } auto ir_module = builder.Build(); - if (context->target == common::DefaultNVGPUTarget()) { + if (context->target == cinn::common::DefaultNVGPUTarget()) { #ifdef CINN_WITH_CUDA auto splited_module = backends::SplitCudaAndHostModule(ir_module); auto hmodule = std::get<0>(splited_module); diff --git a/paddle/cinn/hlir/framework/parallel_compiler_test.cc b/paddle/cinn/hlir/framework/parallel_compiler_test.cc index 0379da139ed947..79d502e744f6bb 100644 --- a/paddle/cinn/hlir/framework/parallel_compiler_test.cc +++ b/paddle/cinn/hlir/framework/parallel_compiler_test.cc @@ -31,7 +31,7 @@ TEST(ParallelCompilerTest, Add_TEST_0) { auto A = builder.CreateInput(Float(32), {128, 128}, "A"); auto B = builder.CreateInput(Float(32), {128, 128}, "B"); auto C = builder.Add(A, B); - auto target = common::DefaultNVGPUTarget(); + auto target = cinn::common::DefaultNVGPUTarget(); auto program = builder.Build(); auto graph = std::make_shared(program, target); auto scope = BuildScope(target, graph); @@ -49,7 +49,7 @@ TEST(ParallelCompilerTest, Conv2d_Test_0) { auto D = builder.Conv2d(A, B, {2, 2}, {1, 1}); auto E = builder.Add(C, D); - auto target = common::DefaultNVGPUTarget(); + auto target = cinn::common::DefaultNVGPUTarget(); auto program = builder.Build(); auto graph = frontend::Optimize(&program, {}, target); auto scope = BuildScope(target, graph); @@ -67,7 +67,7 @@ TEST(ParallelCompilerTest, Matmul_Test_0) { auto D = builder.Matmul(A, B); auto E = builder.Add(C, D); - auto target = common::DefaultNVGPUTarget(); + auto target = cinn::common::DefaultNVGPUTarget(); auto program = builder.Build(); auto graph = frontend::Optimize(&program, {}, target); auto scope = BuildScope(target, graph); diff --git a/paddle/cinn/hlir/framework/pir/compilation_task.cc b/paddle/cinn/hlir/framework/pir/compilation_task.cc index cacd2061e07520..cc792f3fce6495 100644 --- a/paddle/cinn/hlir/framework/pir/compilation_task.cc +++ b/paddle/cinn/hlir/framework/pir/compilation_task.cc @@ -61,7 +61,8 @@ void CompilationTask::Lowering() { } void CompilationTask::CodegenAndJit() { - ir::Module::Builder builder(common::UniqName("module"), context_->target_); + ir::Module::Builder builder(cinn::common::UniqName("module"), + context_->target_); CHECK_EQ(context_->predicates_.size(), context_->lowered_funcs_.size()); for (const ir::Expr predicate : context_->predicates_) { builder.AddPredicate(predicate); diff --git a/paddle/cinn/hlir/framework/pir/op_lowering_impl.cc b/paddle/cinn/hlir/framework/pir/op_lowering_impl.cc index 9c48ec2326bffe..b1b4e5c23e3755 100644 --- a/paddle/cinn/hlir/framework/pir/op_lowering_impl.cc +++ b/paddle/cinn/hlir/framework/pir/op_lowering_impl.cc @@ -29,8 +29,8 @@ #include "paddle/cinn/ir/schedule/ir_schedule.h" #include "paddle/cinn/lang/placeholder.h" #include "paddle/cinn/optim/transform_gpu_forloop.h" +#include "paddle/common/ddim.h" #include "paddle/fluid/pir/dialect/operator/ir/op_type.h" -#include "paddle/phi/core/ddim.h" PD_DECLARE_bool(cinn_use_cuda_vectorize); PD_DECLARE_bool(cinn_enable_map_expr); @@ -42,8 +42,8 @@ namespace hlir { namespace framework { namespace pir { +using cinn::common::Type; using cinn::hlir::op::ExternalApiRegistry; -using common::Type; using framework::OpPatternKind; using framework::StrategyFunction; @@ -60,14 +60,14 @@ bool IsInTensorMap( return false; } -common::Type GetTensorDtype(const ::pir::Value& value) { +cinn::common::Type GetTensorDtype(const ::pir::Value& value) { auto type_info = value.type().dyn_cast(); - auto in_shape = phi::vectorize(type_info.dims()); + auto in_shape = ::common::vectorize(type_info.dims()); auto dtype = type_info.dtype(); return CompatibleInfo::ConvertIRType(dtype); } -common::Type GetTensorDtype( +cinn::common::Type GetTensorDtype( const std::string& name, const std::unordered_map<::pir::Value, ir::Tensor>& tensor_map) { for (auto iter : tensor_map) { @@ -76,12 +76,12 @@ common::Type GetTensorDtype( } } VLOG(4) << name << " is not in tensor map, return FP32 by default."; - return common::F32(); + return cinn::common::F32(); } ir::Tensor GetTensor(const GroupPtr& group, const ::pir::Value& value) { auto type_info = value.type().dyn_cast(); - auto in_shape = phi::vectorize(type_info.dims()); + auto in_shape = ::common::vectorize(type_info.dims()); auto dtype = type_info.dtype(); std::string input_id = CompatibleInfo::ValueName(value); if (group->shape_analysis != nullptr) { @@ -145,7 +145,7 @@ void CollectOutputInfo(::pir::Operation* op, out_value.type().dyn_cast(); out_types->push_back(CompatibleInfo::ConvertIRType(type_info.dtype())); - auto out_shape = phi::vectorize(type_info.dims()); + auto out_shape = ::common::vectorize(type_info.dims()); out_shapes->push_back(std::move(out_shape)); } } @@ -457,10 +457,11 @@ std::vector OpLowererImpl::LowerCustomCall( // external_api = ExternalApiRegistry::Global()->GetExternalApi(node, // target_); // } - std::vector compute_args = { - common::CINNValue(group->FuncName()), common::CINNValue(external_api)}; - common::CINNValuePack pack = - impl->fcompute(common::CINNValuePack{compute_args}); + std::vector compute_args = { + cinn::common::CINNValue(group->FuncName()), + cinn::common::CINNValue(external_api)}; + cinn::common::CINNValuePack pack = + impl->fcompute(cinn::common::CINNValuePack{compute_args}); CHECK_EQ(pack.size(), 1UL); // reset input names as extern api input args can't be remove duplicate. // group->input_names.clear(); @@ -553,7 +554,7 @@ std::vector OpLowererImpl::PostProcess( } int_args_set.insert(symbol_name); group_func_args.emplace_back( - ir::_Var_::Make(symbol_name, common::Int(32))); + ir::_Var_::Make(symbol_name, cinn::common::Int(32))); group->int_args_map[non_tensor_arg_idx++] = {tensor_arg_idx, tensor_arg_dim_idx}; } @@ -632,21 +633,21 @@ std::vector OpLowererImpl::DoOpLower( std::unordered_map* tmp_tensor_info, std::vector* op_func_arg_tensors) { VLOG(4) << "Do lower with Compute, op: " << op->name(); - std::vector cinn_inputs; + std::vector cinn_inputs; for (const ir::Tensor& tensor : *op_func_arg_tensors) { - cinn_inputs.push_back(common::CINNValue(ir::Expr(tensor))); + cinn_inputs.push_back(cinn::common::CINNValue(ir::Expr(tensor))); } // set tensor name = operand hash name auto op_results = op->results(); for (const auto& result : op_results) { std::string output_id = CompatibleInfo::ValueName(result); - cinn_inputs.push_back(common::CINNValue(output_id)); + cinn_inputs.push_back(cinn::common::CINNValue(output_id)); } // 1.Do compute - common::CINNValuePack pack = - op_impl->fcompute(common::CINNValuePack{cinn_inputs}); + cinn::common::CINNValuePack pack = + op_impl->fcompute(cinn::common::CINNValuePack{cinn_inputs}); poly::StageMap tmp_stages = pack.back(); std::string post = ""; @@ -673,7 +674,7 @@ std::vector OpLowererImpl::DoOpLower( // Insert output tensors into function arg if (!expr.as_tensor_ref()->buffer.defined() || - this->target_ != common::DefaultNVGPUTarget()) { + this->target_ != cinn::common::DefaultNVGPUTarget()) { op_func_arg_tensors->push_back(expr.as_tensor_ref()); expr.as_tensor_ref()->WithBuffer(); } @@ -710,18 +711,18 @@ ir::Expr OpLowererImpl::DoOpSchedule( const std::vector& op_func_arg_tensors, const std::vector& lowered_funcs) { VLOG(4) << "Do op schedule"; - std::vector schedule_inputs; + std::vector schedule_inputs; // 1.Collect tensors for (const ir::Tensor& op_func_arg_tensor : op_func_arg_tensors) { - schedule_inputs.push_back(common::CINNValue(op_func_arg_tensor)); + schedule_inputs.push_back(cinn::common::CINNValue(op_func_arg_tensor)); } // 2.Collect bodies to be scheduled for (const ir::LoweredFunc& func : lowered_funcs) { - schedule_inputs.push_back(common::CINNValue(func->body)); + schedule_inputs.push_back(cinn::common::CINNValue(func->body)); } // 3.Do schedule on AST - common::CINNValuePack expr_pack = - op_impl->fschedule(common::CINNValuePack{schedule_inputs}); + cinn::common::CINNValuePack expr_pack = + op_impl->fschedule(cinn::common::CINNValuePack{schedule_inputs}); VLOG(4) << "After op schedule: " << expr_pack[0].operator ir::Expr(); return expr_pack[0].operator ir::Expr(); diff --git a/paddle/cinn/hlir/framework/pir/op_lowering_impl.h b/paddle/cinn/hlir/framework/pir/op_lowering_impl.h index c8c2bdaa134435..ba768cc498f864 100644 --- a/paddle/cinn/hlir/framework/pir/op_lowering_impl.h +++ b/paddle/cinn/hlir/framework/pir/op_lowering_impl.h @@ -41,7 +41,7 @@ namespace pir { using GroupPtr = std::shared_ptr; -using common::Target; +using cinn::common::Target; class OpLowererImpl; typedef bool (OpLowererImpl::*ScheduleDetermineFunction)(::pir::Operation*); diff --git a/paddle/cinn/hlir/framework/pir/op_lowering_util.cc b/paddle/cinn/hlir/framework/pir/op_lowering_util.cc index a9b14a215107a6..25c75ba4354ac7 100644 --- a/paddle/cinn/hlir/framework/pir/op_lowering_util.cc +++ b/paddle/cinn/hlir/framework/pir/op_lowering_util.cc @@ -519,7 +519,7 @@ ::pir::Operation* GetMasterToComputeAt( void LoopOrderAssignReduce(ir::IRSchedule& ir_sch, // NOLINT const std::string& block_name, const std::vector& axes, - const common::Target& target, + const cinn::common::Target& target, const bool just_reorder = false) { // reorder none-last reduce axis to last. // like: shape = [16,16,16,16,16],axes = [1,3] -> new order = [0, 2, 4, 1, 3]. @@ -570,11 +570,11 @@ void LoopAssignReduceWithLast(ir::IRSchedule& ir_sch, // NOLINT const std::string& block_name, const std::vector& inshape, const std::vector& axes, - const common::Target& target) { + const cinn::common::Target& target) { // If the number of current device SM is smaller than the number of SM // required by Warp Reduce, the performance of Warp Reduce is better. // Otherwise, use Block Reduce. - auto max_num_threads = common::DefaultNVGPUTarget().max_num_threads(); + auto max_num_threads = cinn::common::DefaultNVGPUTarget().max_num_threads(); int need_reduce_last_count = 1; for (int i = 0; i < inshape.size(); i++) { if (find(axes.begin(), axes.end(), i) == axes.end()) { @@ -699,7 +699,7 @@ void LoopAssignReduceWithoutLast(ir::IRSchedule& ir_sch, // NOLINT const std::string& block_name, const std::vector& inshape, const std::vector& axes, - const common::Target& target) { + const cinn::common::Target& target) { int tail = 0; bool bound = true; auto shape = pe::GetFirstStepReduceShape(inshape, axes, bound, tail); diff --git a/paddle/cinn/hlir/framework/pir/utils.cc b/paddle/cinn/hlir/framework/pir/utils.cc index 5070aae0670a67..83aa5ff5844265 100644 --- a/paddle/cinn/hlir/framework/pir/utils.cc +++ b/paddle/cinn/hlir/framework/pir/utils.cc @@ -229,10 +229,10 @@ utils::AttributeMap CompatibleInfo::ConvertAttributes( } #define CASE_TYPE(src, dst) \ - else if (type.isa<::pir::src>()) return common::dst(); + else if (type.isa<::pir::src>()) return cinn::common::dst(); -common::Type CompatibleInfo::ConvertIRType(::pir::Type type) { - if (type.isa<::pir::BFloat16Type>()) return common::BF16(); +cinn::common::Type CompatibleInfo::ConvertIRType(::pir::Type type) { + if (type.isa<::pir::BFloat16Type>()) return cinn::common::BF16(); CASE_TYPE(Float16Type, F16) CASE_TYPE(Float32Type, F32) CASE_TYPE(Float64Type, F64) @@ -270,7 +270,7 @@ OpPatternKind CompatibleInfo::OpKind(const ::pir::Operation& op) { std::vector CompatibleInfo::ValueShape(const ::pir::Value& value) { auto& dim = value.type().dyn_cast<::pir::DenseTensorType>().dims(); - return phi::vectorize(dim); + return ::common::vectorize(dim); } std::vector GetBroadcastAxis(const phi::DDim& in_shape, diff --git a/paddle/cinn/hlir/framework/pir/utils.h b/paddle/cinn/hlir/framework/pir/utils.h index 127a50eb925045..cd0f66af3f1ff4 100644 --- a/paddle/cinn/hlir/framework/pir/utils.h +++ b/paddle/cinn/hlir/framework/pir/utils.h @@ -20,7 +20,7 @@ #include "paddle/cinn/common/type.h" #include "paddle/cinn/hlir/framework/op.h" #include "paddle/cinn/utils/type_defs.h" -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" #include "paddle/pir/core/operation.h" namespace cinn { @@ -84,7 +84,7 @@ struct CompatibleInfo { static utils::AttributeMap ConvertAttributes(const ::pir::Operation& op); - static common::Type ConvertIRType(::pir::Type type); + static cinn::common::Type ConvertIRType(::pir::Type type); static std::vector ValueShape(const ::pir::Value& value); diff --git a/paddle/cinn/hlir/framework/print_graph_pass_test.cc b/paddle/cinn/hlir/framework/print_graph_pass_test.cc index cc3d51c4f79c09..b26c60c716d0c1 100644 --- a/paddle/cinn/hlir/framework/print_graph_pass_test.cc +++ b/paddle/cinn/hlir/framework/print_graph_pass_test.cc @@ -63,7 +63,7 @@ TEST(Operator, GetAttrs) { auto d = prog.add(c, b); auto e = prog.add(c, d); ASSERT_EQ(prog.size(), 3); - Graph* g = new Graph(prog, common::DefaultHostTarget()); + Graph* g = new Graph(prog, cinn::common::DefaultHostTarget()); ApplyPass(g, "PrintGraph"); auto s = g->GetAttrs("print_graph"); LOG(INFO) << s; diff --git a/paddle/cinn/hlir/framework/schedule.h b/paddle/cinn/hlir/framework/schedule.h index 3fe12f5afae7c3..737328602d7561 100644 --- a/paddle/cinn/hlir/framework/schedule.h +++ b/paddle/cinn/hlir/framework/schedule.h @@ -30,7 +30,7 @@ namespace framework { * For operations and all the operations they depend on. * The schedule per Operation is named as stage. */ -class Schedule : public common::Object { +class Schedule : public cinn::common::Object { public: const char* type_info() const override { return __type_info__; } diff --git a/paddle/cinn/hlir/framework/scope_test.cc b/paddle/cinn/hlir/framework/scope_test.cc index 23ac65469af9a1..c9b2cd47832c86 100644 --- a/paddle/cinn/hlir/framework/scope_test.cc +++ b/paddle/cinn/hlir/framework/scope_test.cc @@ -25,7 +25,7 @@ TEST(Scope, basic) { auto* var = scope.Var("key"); auto& tensor = absl::get(*var); tensor->Resize(Shape{{3, 1}}); - auto* data = tensor->mutable_data(common::DefaultHostTarget()); + auto* data = tensor->mutable_data(cinn::common::DefaultHostTarget()); data[0] = 0.f; data[1] = 1.f; data[2] = 2.f; diff --git a/paddle/cinn/hlir/framework/tensor.h b/paddle/cinn/hlir/framework/tensor.h index 7b5d201d0f0ae0..59f115b32e2474 100644 --- a/paddle/cinn/hlir/framework/tensor.h +++ b/paddle/cinn/hlir/framework/tensor.h @@ -29,7 +29,7 @@ namespace cinn { namespace hlir { namespace framework { -using common::Target; +using cinn::common::Target; struct Shape { using dim_t = int; @@ -68,7 +68,7 @@ class _Tensor_ : public Object { inline void* mutable_data(const Target& target, const Type& type) { set_type(type); - if (target == common::DefaultHostTarget()) { + if (target == cinn::common::DefaultHostTarget()) { buffer_->ResizeLazy(1024, shape_.numel() * type.bytes(), target); } else { buffer_->ResizeLazy(shape_.numel() * type.bytes(), target); @@ -79,7 +79,7 @@ class _Tensor_ : public Object { template inline T* mutable_data(const Target& target) { set_type(type_of()); - if (target == common::DefaultHostTarget()) { + if (target == cinn::common::DefaultHostTarget()) { buffer_->ResizeLazy(1024, shape_.numel() * sizeof(T), target); } else { buffer_->ResizeLazy(shape_.numel() * sizeof(T), target); @@ -104,7 +104,7 @@ class _Tensor_ : public Object { const char* type_info() const override { return __type_info__; } private: - common::Type type_; + cinn::common::Type type_; // A shared ptr to make it easier to share buffer between tensors. std::shared_ptr buffer_; Shape shape_; diff --git a/paddle/cinn/hlir/framework/tensor_test.cc b/paddle/cinn/hlir/framework/tensor_test.cc index 30ce7c158696df..0644bf941c8a84 100644 --- a/paddle/cinn/hlir/framework/tensor_test.cc +++ b/paddle/cinn/hlir/framework/tensor_test.cc @@ -24,7 +24,7 @@ TEST(Tensor, basic) { _Tensor_ tensor; tensor.Resize(Shape{{3, 2}}); - auto* data = tensor.mutable_data(common::DefaultHostTarget()); + auto* data = tensor.mutable_data(cinn::common::DefaultHostTarget()); for (int i = 0; i < tensor.shape().numel(); i++) { data[i] = i; diff --git a/paddle/cinn/hlir/framework/visualize_helper.cc b/paddle/cinn/hlir/framework/visualize_helper.cc index bc8c5e41241868..b6e73f6f2c6978 100644 --- a/paddle/cinn/hlir/framework/visualize_helper.cc +++ b/paddle/cinn/hlir/framework/visualize_helper.cc @@ -177,7 +177,7 @@ bool MakeDirectory(const std::string& dirname, mode_t mode) { std::string GenNodeDataLabel( const NodeData* node, const absl::flat_hash_map& shape_dict, - const absl::flat_hash_map& dtype_dict, + const absl::flat_hash_map& dtype_dict, const std::string dot_nodedata_id) { std::stringstream ss; ss << dot_nodedata_id; @@ -194,7 +194,7 @@ std::string GenNodeDataLabel( } if (dtype_dict.count(node->id())) { ss << "\\n"; - ss << common::Type2Str(dtype_dict.at(node->id())); + ss << cinn::common::Type2Str(dtype_dict.at(node->id())); } return ss.str(); @@ -344,7 +344,7 @@ void AddGroupNode( const std::string& dot_cluster_id, const std::unordered_set& fetch_var_ids, const absl::flat_hash_map& shape_dict, - const absl::flat_hash_map& dtype_dict, + const absl::flat_hash_map& dtype_dict, std::unordered_map* recompute_nodes, std::unordered_map* outnode2dot_id, std::unordered_set* nodedatas_set, diff --git a/paddle/cinn/hlir/framework/visualize_helper.h b/paddle/cinn/hlir/framework/visualize_helper.h index 3afd3a974db0c8..2f281d3e3d4768 100644 --- a/paddle/cinn/hlir/framework/visualize_helper.h +++ b/paddle/cinn/hlir/framework/visualize_helper.h @@ -136,7 +136,7 @@ bool MakeDirectory(const std::string& dirname, mode_t mode); std::string GenNodeDataLabel( const NodeData* node, const absl::flat_hash_map& shape_dict, - const absl::flat_hash_map& dtype_dict, + const absl::flat_hash_map& dtype_dict, const std::string dot_nodedata_id); void Summary(const std::vector>& groups, @@ -152,7 +152,7 @@ void AddGroupNode( const std::string& dot_cluster_id, const std::unordered_set& fetch_var_ids, const absl::flat_hash_map& shape_dict, - const absl::flat_hash_map& dtype_dict, + const absl::flat_hash_map& dtype_dict, std::unordered_map* recompute_nodes, std::unordered_map* outnode2dot_id, std::unordered_set* nodedatas_set, diff --git a/paddle/cinn/hlir/op/broadcast.cc b/paddle/cinn/hlir/op/broadcast.cc index 2ce71d5198cf97..c2fc4586d94507 100644 --- a/paddle/cinn/hlir/op/broadcast.cc +++ b/paddle/cinn/hlir/op/broadcast.cc @@ -30,9 +30,9 @@ namespace cinn { namespace hlir { namespace op { -using common::_CINNValuePack_; -using common::CINNValue; -using common::CINNValuePack; +using cinn::common::_CINNValuePack_; +using cinn::common::CINNValue; +using cinn::common::CINNValuePack; using framework::OpStrategy; using framework::shape_t; using framework::StrategyFunction; diff --git a/paddle/cinn/hlir/op/contrib/argmax.cc b/paddle/cinn/hlir/op/contrib/argmax.cc index 041cfe7dc47a50..7de32179b52a0b 100644 --- a/paddle/cinn/hlir/op/contrib/argmax.cc +++ b/paddle/cinn/hlir/op/contrib/argmax.cc @@ -39,12 +39,12 @@ namespace cinn { namespace hlir { namespace op { -using common::CINNValue; +using cinn::common::CINNValue; using framework::shape_t; using ir::Tensor; std::vector Argmax(const Tensor &in_tensor, - const common::Target &target, + const cinn::common::Target &target, poly::StageMap stages, const int &axis, const bool &keep_dims, @@ -116,7 +116,7 @@ std::shared_ptr StrategyForArgmax( [=](lang::Args args, lang::RetValue *ret) { CHECK(!args.empty()) << "The input argument of argmax compute is empty! Please check."; - common::CINNValuePack pack_args = args[0]; + cinn::common::CINNValuePack pack_args = args[0]; std::string tensor_name = UniqName("Argmax_out"); CHECK_GE(pack_args.size(), 1U) << "There should be 1 input args for argmax compute"; @@ -135,14 +135,14 @@ std::shared_ptr StrategyForArgmax( CINNValue(out_tensor[1]), CINNValue(out_tensor[2]), CINNValue(stages)}; - *ret = common::CINNValuePack{cinn_values}; + *ret = cinn::common::CINNValuePack{cinn_values}; }); framework::CINNSchedule argmax_schedule([=](lang::Args args, lang::RetValue *ret) { CHECK(!args.empty()) << "The input argument of argmax_schedule is empty! Please check.\n"; - common::CINNValuePack arg_pack = args[0]; + cinn::common::CINNValuePack arg_pack = args[0]; std::vector vec_ast; for (int i = 0; i < arg_pack.size(); i++) { if (arg_pack[i].is_expr()) { @@ -187,9 +187,9 @@ std::shared_ptr StrategyForArgmax( if (prod_size > 1 && target.arch == Target::Arch::X86) { pe::IRScheduleInjectiveCPU(ir_sch, output_shapes.front(), target, true); } - std::vector res{ - common::CINNValue(ir_sch.GetModule().GetExprs().at(0))}; - *ret = common::CINNValuePack{res}; + std::vector res{ + cinn::common::CINNValue(ir_sch.GetModule().GetExprs().at(0))}; + *ret = cinn::common::CINNValuePack{res}; }); auto strategy = std::make_shared(); diff --git a/paddle/cinn/hlir/op/contrib/argmax.h b/paddle/cinn/hlir/op/contrib/argmax.h index b52f9e80f4ce56..4b5b519614f18c 100644 --- a/paddle/cinn/hlir/op/contrib/argmax.h +++ b/paddle/cinn/hlir/op/contrib/argmax.h @@ -22,7 +22,7 @@ namespace cinn { namespace hlir { namespace op { std::vector Argmax(const ir::Tensor &in_tensor, - const common::Target &target, + const cinn::common::Target &target, poly::StageMap stages, const int &axis, const bool &keep_dims = false, diff --git a/paddle/cinn/hlir/op/contrib/argmax_test.cc b/paddle/cinn/hlir/op/contrib/argmax_test.cc index 786e19b163a9a0..7a1fc95384cbc1 100644 --- a/paddle/cinn/hlir/op/contrib/argmax_test.cc +++ b/paddle/cinn/hlir/op/contrib/argmax_test.cc @@ -34,9 +34,9 @@ namespace hlir { namespace op { TEST(GenerateCode_Cpu, Argmax_Keep) { - common::Context::Global().ResetNameId(); + cinn::common::Context::Global().ResetNameId(); - common::Target target = common::DefaultHostTarget(); + cinn::common::Target target = cinn::common::DefaultHostTarget(); int axis = 1; ir::Expr n(4); diff --git a/paddle/cinn/hlir/op/contrib/argmin.cc b/paddle/cinn/hlir/op/contrib/argmin.cc index 3caaf45c46a5eb..8f9d2ec9f45fd3 100644 --- a/paddle/cinn/hlir/op/contrib/argmin.cc +++ b/paddle/cinn/hlir/op/contrib/argmin.cc @@ -39,12 +39,12 @@ namespace cinn { namespace hlir { namespace op { -using common::CINNValue; +using cinn::common::CINNValue; using framework::shape_t; using ir::Tensor; std::vector Argmin(const Tensor &in_tensor, - const common::Target &target, + const cinn::common::Target &target, poly::StageMap stages, const int &axis, const bool &keep_dims, @@ -115,7 +115,7 @@ std::shared_ptr StrategyForArgmin( [=](lang::Args args, lang::RetValue *ret) { CHECK(!args.empty()) << "The input argument of argmin compute is empty! Please check."; - common::CINNValuePack pack_args = args[0]; + cinn::common::CINNValuePack pack_args = args[0]; CHECK_GE(pack_args.size(), 1U) << "There should be 1 input args for argmax compute"; Expr in_expr = pack_args[0]; @@ -133,14 +133,14 @@ std::shared_ptr StrategyForArgmin( CINNValue(out_tensor[1]), CINNValue(out_tensor[2]), CINNValue(stages)}; - *ret = common::CINNValuePack{cinn_values}; + *ret = cinn::common::CINNValuePack{cinn_values}; }); framework::CINNSchedule argmin_schedule([=](lang::Args args, lang::RetValue *ret) { CHECK(!args.empty()) << "The input argument of arange_schedule is empty! Please check.\n"; - common::CINNValuePack arg_pack = args[0]; + cinn::common::CINNValuePack arg_pack = args[0]; std::vector vec_ast; for (int i = 0; i < arg_pack.size(); i++) { if (arg_pack[i].is_expr()) { @@ -185,9 +185,9 @@ std::shared_ptr StrategyForArgmin( if (prod_size > 1 && target.arch == Target::Arch::X86) { pe::IRScheduleInjectiveCPU(ir_sch, output_shapes.front(), target, true); } - std::vector res{ - common::CINNValue(ir_sch.GetModule().GetExprs().at(0))}; - *ret = common::CINNValuePack{res}; + std::vector res{ + cinn::common::CINNValue(ir_sch.GetModule().GetExprs().at(0))}; + *ret = cinn::common::CINNValuePack{res}; }); auto strategy = std::make_shared(); diff --git a/paddle/cinn/hlir/op/contrib/argmin.h b/paddle/cinn/hlir/op/contrib/argmin.h index 17b0095b5c8a41..a0cc5261fb9262 100644 --- a/paddle/cinn/hlir/op/contrib/argmin.h +++ b/paddle/cinn/hlir/op/contrib/argmin.h @@ -22,7 +22,7 @@ namespace cinn { namespace hlir { namespace op { std::vector Argmin(const ir::Tensor& in_tensor, - const common::Target& target, + const cinn::common::Target& target, poly::StageMap stages, const int& axis, const bool& keep_dims = false, diff --git a/paddle/cinn/hlir/op/contrib/argmin_test.cc b/paddle/cinn/hlir/op/contrib/argmin_test.cc index a979870fe88a90..beec71e153bb09 100644 --- a/paddle/cinn/hlir/op/contrib/argmin_test.cc +++ b/paddle/cinn/hlir/op/contrib/argmin_test.cc @@ -33,9 +33,9 @@ namespace hlir { namespace op { TEST(GenerateCode_Cpu, Argmin_Keep) { - common::Context::Global().ResetNameId(); + cinn::common::Context::Global().ResetNameId(); - common::Target target = common::DefaultHostTarget(); + cinn::common::Target target = cinn::common::DefaultHostTarget(); int axis = 1; ir::Expr n(4); diff --git a/paddle/cinn/hlir/op/contrib/assert_true.cc b/paddle/cinn/hlir/op/contrib/assert_true.cc index a91f740c54892c..c3f39144b8a59c 100644 --- a/paddle/cinn/hlir/op/contrib/assert_true.cc +++ b/paddle/cinn/hlir/op/contrib/assert_true.cc @@ -33,8 +33,8 @@ namespace cinn { namespace hlir { namespace op { -using common::CINNValue; -using common::CINNValuePack; +using cinn::common::CINNValue; +using cinn::common::CINNValuePack; std::shared_ptr StrategyForAssertTrue( const framework::NodeAttr &attrs, diff --git a/paddle/cinn/hlir/op/contrib/bitcast_convert.cc b/paddle/cinn/hlir/op/contrib/bitcast_convert.cc index cfa957ba5f06fd..dc8516b160bd24 100644 --- a/paddle/cinn/hlir/op/contrib/bitcast_convert.cc +++ b/paddle/cinn/hlir/op/contrib/bitcast_convert.cc @@ -39,8 +39,8 @@ namespace cinn { namespace hlir { namespace op { -using common::CINNValue; -using common::CINNValuePack; +using cinn::common::CINNValue; +using cinn::common::CINNValuePack; using framework::shape_t; ir::Tensor BitcastConvert(const ir::Tensor &input, @@ -98,8 +98,8 @@ std::vector InferShapeForBitcastConvert( auto input_data_type_name = absl::get(attrs.at("input_data_type")); auto output_data_type_name = absl::get(attrs.at("dtype")); - auto input_data_type = common::Str2Type(input_data_type_name); - auto output_data_type = common::Str2Type(output_data_type_name); + auto input_data_type = cinn::common::Str2Type(input_data_type_name); + auto output_data_type = cinn::common::Str2Type(output_data_type_name); auto output_shape = std::vector(inputs_shape.begin(), inputs_shape.end()); @@ -124,7 +124,7 @@ std::vector InferShapeForBitcastConvert( std::vector InferDtypeForBitcastConvert( const std::vector &inputs_type, const framework::AttrMapType &attrs) { CHECK(attrs.count("dtype")); - return {common::Str2Type(absl::get(attrs.at("dtype")))}; + return {cinn::common::Str2Type(absl::get(attrs.at("dtype")))}; } } // namespace op diff --git a/paddle/cinn/hlir/op/contrib/cholesky.cc b/paddle/cinn/hlir/op/contrib/cholesky.cc index a1d43859c71bdd..1784ef570ed681 100644 --- a/paddle/cinn/hlir/op/contrib/cholesky.cc +++ b/paddle/cinn/hlir/op/contrib/cholesky.cc @@ -47,8 +47,8 @@ namespace cinn { namespace hlir { namespace op { -using common::CINNValue; -using common::CINNValuePack; +using cinn::common::CINNValue; +using cinn::common::CINNValuePack; std::shared_ptr StrategyForCholesky( const framework::NodeAttr &attrs, diff --git a/paddle/cinn/hlir/op/contrib/gather_nd.cc b/paddle/cinn/hlir/op/contrib/gather_nd.cc index 1a3f06330f6c76..9e23642a11d651 100644 --- a/paddle/cinn/hlir/op/contrib/gather_nd.cc +++ b/paddle/cinn/hlir/op/contrib/gather_nd.cc @@ -41,8 +41,8 @@ namespace cinn { namespace hlir { namespace op { -using common::CINNValue; -using common::CINNValuePack; +using cinn::common::CINNValue; +using cinn::common::CINNValuePack; ir::Tensor GatherNd(const ir::Tensor &x, const ir::Tensor &index, @@ -62,16 +62,17 @@ ir::Tensor GatherNd(const ir::Tensor &x, std::vector indices_position; for (size_t i = 0; i < index_shape_size - 1; ++i) { indices_position.push_back( - ir::Cast::Make(common::Int(32), indices[i])); + ir::Cast::Make(cinn::common::Int(32), indices[i])); } - indices_position.push_back(ir::Cast::Make(common::Int(32), Expr(0))); + indices_position.push_back( + ir::Cast::Make(cinn::common::Int(32), Expr(0))); size_t indices_position_size = indices_position.size(); std::vector real_indices; for (size_t i = 0; i < index_shape.back().as_int32(); ++i) { indices_position[indices_position_size - 1] = - ir::Cast::Make(common::Int(32), Expr(i)); + ir::Cast::Make(cinn::common::Int(32), Expr(i)); real_indices.push_back( - ir::Cast::Make(common::Int(32), index(indices_position))); + ir::Cast::Make(cinn::common::Int(32), index(indices_position))); } if (real_indices.size() == x_shape_size) { return x(real_indices); @@ -127,7 +128,7 @@ std::shared_ptr StrategyForGatherNd( lang::RetValue *ret) { CHECK(!args.empty()) << "The input argument of gather_nd_schedule is " "empty! Please check.\n"; - common::CINNValuePack arg_pack = args[0]; + cinn::common::CINNValuePack arg_pack = args[0]; std::vector vec_ast; for (int i = 0; i < arg_pack.size(); i++) { if (arg_pack[i].is_expr()) { @@ -150,9 +151,9 @@ std::shared_ptr StrategyForGatherNd( pe::IRScheduleInjectiveCPU(ir_sch, output_shapes.front(), target, true); } } - std::vector res{ - common::CINNValue(ir_sch.GetModule().GetExprs().at(0))}; - *ret = common::CINNValuePack{res}; + std::vector res{ + cinn::common::CINNValue(ir_sch.GetModule().GetExprs().at(0))}; + *ret = cinn::common::CINNValuePack{res}; }); auto strategy = std::make_shared(); diff --git a/paddle/cinn/hlir/op/contrib/gather_nd_test.cc b/paddle/cinn/hlir/op/contrib/gather_nd_test.cc index ee5f47477a5de1..46702a7a80c63e 100644 --- a/paddle/cinn/hlir/op/contrib/gather_nd_test.cc +++ b/paddle/cinn/hlir/op/contrib/gather_nd_test.cc @@ -33,9 +33,9 @@ namespace hlir { namespace op { TEST(GenerateCode_Cpu, GatherNd) { - common::Context::Global().ResetNameId(); + cinn::common::Context::Global().ResetNameId(); - common::Target target = common::DefaultHostTarget(); + cinn::common::Target target = cinn::common::DefaultHostTarget(); ir::Expr dim0(1); ir::Expr dim1(2); diff --git a/paddle/cinn/hlir/op/contrib/gaussian_random.cc b/paddle/cinn/hlir/op/contrib/gaussian_random.cc index 1d70e4098e7211..2b6a3019c6fcda 100644 --- a/paddle/cinn/hlir/op/contrib/gaussian_random.cc +++ b/paddle/cinn/hlir/op/contrib/gaussian_random.cc @@ -47,8 +47,8 @@ namespace cinn { namespace hlir { namespace op { -using common::CINNValue; -using common::CINNValuePack; +using cinn::common::CINNValue; +using cinn::common::CINNValuePack; std::shared_ptr StrategyForGaussianRandom( const framework::NodeAttr &attrs, @@ -88,7 +88,7 @@ std::vector InferDtypeForGaussianRandom( if (attrs.find("dtype") != attrs.end()) { dtype = absl::get(attrs.at("dtype")); } - std::vector res{common::Str2Type(dtype)}; + std::vector res{cinn::common::Str2Type(dtype)}; CHECK(res[0].is_float(32) || res[0].is_float(64)) << "gaussian_random only support float32 and float64, but here " << res[0] << "! Please check."; diff --git a/paddle/cinn/hlir/op/contrib/logical_right_shift.cc b/paddle/cinn/hlir/op/contrib/logical_right_shift.cc index f4223a2d794128..4f176850a4d3c9 100644 --- a/paddle/cinn/hlir/op/contrib/logical_right_shift.cc +++ b/paddle/cinn/hlir/op/contrib/logical_right_shift.cc @@ -42,9 +42,9 @@ namespace cinn { namespace hlir { namespace op { -using common::_CINNValuePack_; -using common::CINNValue; -using common::CINNValuePack; +using cinn::common::_CINNValuePack_; +using cinn::common::CINNValue; +using cinn::common::CINNValuePack; using framework::OpStrategy; using framework::shape_t; using framework::StrategyFunction; @@ -54,9 +54,9 @@ ir::Tensor LogicalRightShift(const ir::Tensor &A, const Target &target, const std::string &output_name) { std::string extern_func = "cinn_"; - if (target == common::DefaultHostTarget()) { + if (target == cinn::common::DefaultHostTarget()) { extern_func += "host_"; - } else if (target == common::DefaultNVGPUTarget()) { + } else if (target == cinn::common::DefaultNVGPUTarget()) { extern_func += "nvgpu_"; } else { CINN_NOT_IMPLEMENTED diff --git a/paddle/cinn/hlir/op/contrib/logical_right_shift_test.cc b/paddle/cinn/hlir/op/contrib/logical_right_shift_test.cc index 19315022167110..f23867a391d4bf 100644 --- a/paddle/cinn/hlir/op/contrib/logical_right_shift_test.cc +++ b/paddle/cinn/hlir/op/contrib/logical_right_shift_test.cc @@ -33,9 +33,9 @@ namespace hlir { namespace op { TEST(GenerateCode_Cpu, LogicalRightShift) { - common::Context::Global().ResetNameId(); + cinn::common::Context::Global().ResetNameId(); - common::Target target = common::DefaultHostTarget(); + cinn::common::Target target = cinn::common::DefaultHostTarget(); lang::Placeholder x("x", std::vector{10}); lang::Placeholder y("y", std::vector{10}); ir::Tensor res = LogicalRightShift(x, y, target, "test_logical_right_shift"); diff --git a/paddle/cinn/hlir/op/contrib/lookup_table.cc b/paddle/cinn/hlir/op/contrib/lookup_table.cc index 2796d45e016b92..3e4ebd679a9047 100644 --- a/paddle/cinn/hlir/op/contrib/lookup_table.cc +++ b/paddle/cinn/hlir/op/contrib/lookup_table.cc @@ -43,8 +43,8 @@ namespace cinn { namespace hlir { namespace op { -using common::CINNValue; -using common::CINNValuePack; +using cinn::common::CINNValue; +using cinn::common::CINNValuePack; ir::Tensor LookupTable(const ir::Tensor& table, const ir::Tensor& ids, @@ -65,7 +65,7 @@ ir::Tensor LookupTable(const ir::Tensor& table, offsets.emplace_back(Expr(0)); // Because the current conversion rules have not been completed, static // conversion is done here. - auto ids_offset = ir::Cast::Make(common::I32(), ids(offsets)); + auto ids_offset = ir::Cast::Make(cinn::common::I32(), ids(offsets)); auto pred = ir::And::Make( Expr(padding_idx != -1), ir::EQ::Make(ids_offset, Expr(static_cast(padding_idx)))); @@ -73,7 +73,7 @@ ir::Tensor LookupTable(const ir::Tensor& table, ir::Cast::Make(table->type(), Expr(0)), table(ids_offset, indices.back())); }, - common::UniqName(output_name)); + cinn::common::UniqName(output_name)); } std::shared_ptr StrategyForLookupTable( diff --git a/paddle/cinn/hlir/op/contrib/lookup_table_test.cc b/paddle/cinn/hlir/op/contrib/lookup_table_test.cc index d09d4238f6268e..43a4e3526629f1 100644 --- a/paddle/cinn/hlir/op/contrib/lookup_table_test.cc +++ b/paddle/cinn/hlir/op/contrib/lookup_table_test.cc @@ -34,9 +34,9 @@ namespace hlir { namespace op { TEST(GenerateCode_Cpu, LookupTable) { - common::Context::Global().ResetNameId(); + cinn::common::Context::Global().ResetNameId(); - common::Target target = common::DefaultHostTarget(); + cinn::common::Target target = cinn::common::DefaultHostTarget(); lang::Placeholder in1("in1", {10, 20}); lang::Placeholder in2("in2", std::vector{2, 2, 1}); @@ -72,9 +72,9 @@ TEST(GenerateCode_Gpu, LookupTable) { if (!cinn::runtime::IsCompiledWithCUDA()) { return; } - common::Context::Global().ResetNameId(); + cinn::common::Context::Global().ResetNameId(); - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); lang::Placeholder in1("in1", {10, 20}); lang::Placeholder in2("in2", std::vector{2, 2, 1}); diff --git a/paddle/cinn/hlir/op/contrib/one_hot.cc b/paddle/cinn/hlir/op/contrib/one_hot.cc index c7d0c3564da949..5070026a434c68 100644 --- a/paddle/cinn/hlir/op/contrib/one_hot.cc +++ b/paddle/cinn/hlir/op/contrib/one_hot.cc @@ -41,7 +41,7 @@ namespace cinn { namespace hlir { namespace op { -using common::CINNValuePack; +using cinn::common::CINNValuePack; ir::Tensor OneHot(const ir::Tensor& indices, const ir::Tensor& on_value, @@ -94,7 +94,7 @@ ir::Tensor OneHot(const ir::Tensor& indices, return ir::Select::Make( ir::EQ::Make(elem, idx), on_value_cast, off_value_cast); }, - common::UniqName(output_name)); + cinn::common::UniqName(output_name)); return res; } @@ -144,7 +144,7 @@ std::vector InferDtypeForOneHot(const std::vector& inputs_type, dtype = absl::get(attrs.at("dtype")); } - std::vector res{common::Str2Type(dtype)}; + std::vector res{cinn::common::Str2Type(dtype)}; return res; } @@ -175,7 +175,7 @@ std::shared_ptr StrategyForOneHot( lang::RetValue* ret) { CHECK(!args.empty()) << "The input argument of one_hot compute is empty! Please check.\n"; - common::CINNValuePack pack_args = args[0]; + cinn::common::CINNValuePack pack_args = args[0]; CHECK(!pack_args.empty()) << "at least one input tensor for transpose compute\n"; CHECK_GE(pack_args.size(), 3U); @@ -198,15 +198,15 @@ std::shared_ptr StrategyForOneHot( off_value, depth, axis, - common::Str2Type(dtype), + cinn::common::Str2Type(dtype), tensor_name); - std::vector res; + std::vector res; auto stages = CreateStages({indices, on_value, off_value}); stages->InsertLazily(out); - res.push_back(common::CINNValue(out)); - res.push_back(common::CINNValue(stages)); - *ret = common::CINNValuePack{res}; + res.push_back(cinn::common::CINNValue(out)); + res.push_back(cinn::common::CINNValue(stages)); + *ret = cinn::common::CINNValuePack{res}; }); auto strategy = std::make_shared(); diff --git a/paddle/cinn/hlir/op/contrib/one_hot_test.cc b/paddle/cinn/hlir/op/contrib/one_hot_test.cc index 572172de0ab41b..7478876c4766d5 100644 --- a/paddle/cinn/hlir/op/contrib/one_hot_test.cc +++ b/paddle/cinn/hlir/op/contrib/one_hot_test.cc @@ -33,9 +33,9 @@ namespace hlir { namespace op { TEST(GenerateCode_Cpu, OneHot) { - common::Context::Global().ResetNameId(); + cinn::common::Context::Global().ResetNameId(); - common::Target target = common::DefaultHostTarget(); + cinn::common::Target target = cinn::common::DefaultHostTarget(); Expr m(4); Expr n(4); @@ -52,7 +52,7 @@ TEST(GenerateCode_Cpu, OneHot) { off_value, depth, axis, - common::Str2Type(dtype), + cinn::common::Str2Type(dtype), "test_one_hot"); poly::StageMap stages = poly::CreateStages({res}); diff --git a/paddle/cinn/hlir/op/contrib/randint.cc b/paddle/cinn/hlir/op/contrib/randint.cc index 335155fd5afad4..8838656b814c0d 100644 --- a/paddle/cinn/hlir/op/contrib/randint.cc +++ b/paddle/cinn/hlir/op/contrib/randint.cc @@ -47,8 +47,8 @@ namespace cinn { namespace hlir { namespace op { -using common::CINNValue; -using common::CINNValuePack; +using cinn::common::CINNValue; +using cinn::common::CINNValuePack; std::shared_ptr StrategyForRandInt( const framework::NodeAttr &attrs, @@ -86,7 +86,7 @@ std::vector InferShapeForRandInt( std::vector InferDtypeForRandInt(const std::vector &inputs_type, const framework::AttrMapType &attrs) { std::string dtype = "int32"; - std::vector res{common::Str2Type(dtype)}; + std::vector res{cinn::common::Str2Type(dtype)}; return res; } diff --git a/paddle/cinn/hlir/op/contrib/reciprocal.cc b/paddle/cinn/hlir/op/contrib/reciprocal.cc index f9a71f7172b5b4..9b89395f8277b8 100644 --- a/paddle/cinn/hlir/op/contrib/reciprocal.cc +++ b/paddle/cinn/hlir/op/contrib/reciprocal.cc @@ -42,9 +42,9 @@ namespace cinn { namespace hlir { namespace op { -using common::_CINNValuePack_; -using common::CINNValue; -using common::CINNValuePack; +using cinn::common::_CINNValuePack_; +using cinn::common::CINNValue; +using cinn::common::CINNValuePack; using framework::OpStrategy; using framework::shape_t; using framework::StrategyFunction; @@ -71,7 +71,7 @@ ir::Tensor Reciprocal(const ir::Tensor &input, const std::string &output_name) { [=](const std::vector &indice) { ir::Tensor out_tensor(input); auto e = out_tensor(indice); - return common::make_const(input->type(), 1.0f) / e; + return cinn::common::make_const(input->type(), 1.0f) / e; }, output_name)}; } diff --git a/paddle/cinn/hlir/op/contrib/reciprocal_test.cc b/paddle/cinn/hlir/op/contrib/reciprocal_test.cc index c23afb3e7fd468..cfc22f878fc3d1 100644 --- a/paddle/cinn/hlir/op/contrib/reciprocal_test.cc +++ b/paddle/cinn/hlir/op/contrib/reciprocal_test.cc @@ -33,9 +33,9 @@ namespace hlir { namespace op { TEST(GenerateCode_Cpu, Reciprocal) { - common::Context::Global().ResetNameId(); + cinn::common::Context::Global().ResetNameId(); - common::Target target = common::DefaultHostTarget(); + cinn::common::Target target = cinn::common::DefaultHostTarget(); ir::Expr n(4); ir::Expr m(2); diff --git a/paddle/cinn/hlir/op/contrib/repeat.cc b/paddle/cinn/hlir/op/contrib/repeat.cc index 6f35a0f9b058c5..f77e5939099b52 100644 --- a/paddle/cinn/hlir/op/contrib/repeat.cc +++ b/paddle/cinn/hlir/op/contrib/repeat.cc @@ -40,7 +40,7 @@ namespace cinn { namespace hlir { namespace op { -using common::CINNValuePack; +using cinn::common::CINNValuePack; std::vector Repeat(const ir::Tensor &tensor, int repeats, @@ -79,7 +79,7 @@ std::vector Repeat(const ir::Tensor &tensor, } return tensor(idx); }, - common::UniqName(output_name)); + cinn::common::UniqName(output_name)); return {res}; } @@ -166,22 +166,22 @@ std::shared_ptr StrategyForRepeat( std::vector out = Repeat(tensor_A, repeats, axis, tensor_name); CHECK(out.size() == 1U) << "The size of Repeat's output should be 1"; - std::vector res; + std::vector res; auto stages = CreateStages({tensor_A}); for (auto &t : out) { stages->InsertLazily(t); - res.push_back(common::CINNValue(t)); + res.push_back(cinn::common::CINNValue(t)); } - res.push_back(common::CINNValue(stages)); - *ret = common::CINNValuePack{res}; + res.push_back(cinn::common::CINNValue(stages)); + *ret = cinn::common::CINNValuePack{res}; }); framework::CINNSchedule repeat_schedule([=](lang::Args args, lang::RetValue *ret) { CHECK(!args.empty()) << "The input argument of repeat schedule is empty! Please check.\n"; - common::CINNValuePack arg_pack = args[0]; + cinn::common::CINNValuePack arg_pack = args[0]; std::vector vec_ast; for (int i = 0; i < arg_pack.size(); i++) { if (arg_pack[i].is_expr()) { @@ -204,9 +204,9 @@ std::shared_ptr StrategyForRepeat( pe::IRScheduleInjectiveCPU(ir_sch, output_shapes.front(), target, true); } } - std::vector res{ - common::CINNValue(ir_sch.GetModule().GetExprs().at(0))}; - *ret = common::CINNValuePack{res}; + std::vector res{ + cinn::common::CINNValue(ir_sch.GetModule().GetExprs().at(0))}; + *ret = cinn::common::CINNValuePack{res}; }); auto strategy = std::make_shared(); diff --git a/paddle/cinn/hlir/op/contrib/repeat_test.cc b/paddle/cinn/hlir/op/contrib/repeat_test.cc index a5abd5bb758046..358bfdd52d0db4 100755 --- a/paddle/cinn/hlir/op/contrib/repeat_test.cc +++ b/paddle/cinn/hlir/op/contrib/repeat_test.cc @@ -33,9 +33,9 @@ namespace hlir { namespace op { TEST(GenerateCode_Cpu, Repeat) { - common::Context::Global().ResetNameId(); + cinn::common::Context::Global().ResetNameId(); - common::Target target = common::DefaultHostTarget(); + cinn::common::Target target = cinn::common::DefaultHostTarget(); ir::Expr m(4); ir::Expr n(4); diff --git a/paddle/cinn/hlir/op/contrib/resize.cc b/paddle/cinn/hlir/op/contrib/resize.cc index 786365d21dcd92..d74f4647878b00 100644 --- a/paddle/cinn/hlir/op/contrib/resize.cc +++ b/paddle/cinn/hlir/op/contrib/resize.cc @@ -39,26 +39,26 @@ namespace cinn { namespace hlir { namespace op { -using common::CINNValuePack; +using cinn::common::CINNValuePack; #define __get_pixel(input, h, w, n, c, y, x) \ input({n, \ c, \ - common::AutoSimplify( \ + cinn::common::AutoSimplify( \ ir::Max::Make(ir::Min::Make(y, h - Expr(1)), Expr(0))), \ - common::AutoSimplify( \ + cinn::common::AutoSimplify( \ ir::Max::Make(ir::Min::Make(x, w - Expr(1)), Expr(0)))}) ir::Tensor Resize(const ir::Tensor &input, - const common::Target &target, + const cinn::common::Target &target, const std::vector &out_shape, const std::string &mode, const std::string &output_name) { std::string func_name; - if (target.arch == common::Target::Arch::NVGPU) { + if (target.arch == cinn::common::Target::Arch::NVGPU) { func_name.assign("cinn_cuda_resize_"); - } else if (target.arch == common::Target::Arch::X86) { + } else if (target.arch == cinn::common::Target::Arch::X86) { func_name.assign("cinn_host_resize_"); } else { LOG(FATAL) << "Resize only supports X86 and NVGPU ! Please Check.\n"; @@ -85,14 +85,16 @@ ir::Tensor Resize(const ir::Tensor &input, Expr value; if (mode == "nearest") { - Expr in_y = ir::Cast::Make(common::F32(), in_h) / - ir::Cast::Make(common::F32(), out_h) * - ir::Cast::Make(common::F32(), out_y); - Expr in_x = ir::Cast::Make(common::F32(), in_w) / - ir::Cast::Make(common::F32(), out_w) * - ir::Cast::Make(common::F32(), out_x); - Expr in_y_int = ir::Cast::Make(common::Int(32), lang::Floor(in_y)); - Expr in_x_int = ir::Cast::Make(common::Int(32), lang::Floor(in_x)); + Expr in_y = ir::Cast::Make(cinn::common::F32(), in_h) / + ir::Cast::Make(cinn::common::F32(), out_h) * + ir::Cast::Make(cinn::common::F32(), out_y); + Expr in_x = ir::Cast::Make(cinn::common::F32(), in_w) / + ir::Cast::Make(cinn::common::F32(), out_w) * + ir::Cast::Make(cinn::common::F32(), out_x); + Expr in_y_int = + ir::Cast::Make(cinn::common::Int(32), lang::Floor(in_y)); + Expr in_x_int = + ir::Cast::Make(cinn::common::Int(32), lang::Floor(in_x)); std::vector in_indices = { indices[0], indices[1], in_y_int, in_x_int}; value = input(in_indices); @@ -126,7 +128,7 @@ ir::Tensor Resize(const ir::Tensor &input, return value; }, - common::UniqName(output_name)); + cinn::common::UniqName(output_name)); return res; } @@ -209,19 +211,19 @@ std::shared_ptr StrategyForResize( ir::Tensor out = Resize(tensor_A, target, out_shape, mode, tensor_name); - std::vector res; + std::vector res; auto stages = CreateStages({tensor_A}); stages->InsertLazily(out); - res.push_back(common::CINNValue(out)); - res.push_back(common::CINNValue(stages)); - *ret = common::CINNValuePack{res}; + res.push_back(cinn::common::CINNValue(out)); + res.push_back(cinn::common::CINNValue(stages)); + *ret = cinn::common::CINNValuePack{res}; }); framework::CINNSchedule resize_schedule([=](lang::Args args, lang::RetValue *ret) { CHECK(!args.empty()) << "The input argument of resize schedule is empty! Please check.\n"; - common::CINNValuePack arg_pack = args[0]; + cinn::common::CINNValuePack arg_pack = args[0]; std::vector vec_ast; for (int i = 0; i < arg_pack.size(); i++) { if (arg_pack[i].is_expr()) { @@ -244,9 +246,9 @@ std::shared_ptr StrategyForResize( pe::IRScheduleInjectiveCPU(ir_sch, output_shapes.front(), target, true); } } - std::vector res{ - common::CINNValue(ir_sch.GetModule().GetExprs().at(0))}; - *ret = common::CINNValuePack{res}; + std::vector res{ + cinn::common::CINNValue(ir_sch.GetModule().GetExprs().at(0))}; + *ret = cinn::common::CINNValuePack{res}; }); auto strategy = std::make_shared(); diff --git a/paddle/cinn/hlir/op/contrib/resize.h b/paddle/cinn/hlir/op/contrib/resize.h index 77bf94878870db..b087eb36a9b51f 100644 --- a/paddle/cinn/hlir/op/contrib/resize.h +++ b/paddle/cinn/hlir/op/contrib/resize.h @@ -26,7 +26,7 @@ namespace hlir { namespace op { ir::Tensor Resize(const ir::Tensor &x, - const common::Target &target, + const cinn::common::Target &target, const std::vector &out_shape, const std::string &mode, const std::string &output_name); diff --git a/paddle/cinn/hlir/op/contrib/sort.cc b/paddle/cinn/hlir/op/contrib/sort.cc index 0941d2690b604a..8adc618e352e61 100644 --- a/paddle/cinn/hlir/op/contrib/sort.cc +++ b/paddle/cinn/hlir/op/contrib/sort.cc @@ -40,20 +40,20 @@ namespace cinn { namespace hlir { namespace op { -using common::CINNValue; -using common::CINNValuePack; +using cinn::common::CINNValue; +using cinn::common::CINNValuePack; std::vector ArgSort(const ir::Tensor &A, - const common::Target &target, + const cinn::common::Target &target, poly::StageMap stages, const int &axis, const bool &is_ascend, const std::string &name) { std::string find_func_name; std::string index_func_name; - if (target.arch == common::Target::Arch::NVGPU) { + if (target.arch == cinn::common::Target::Arch::NVGPU) { find_func_name.assign("cinn_nvgpu_next_smallest_int32"); - } else if (target.arch == common::Target::Arch::X86) { + } else if (target.arch == cinn::common::Target::Arch::X86) { find_func_name.assign("cinn_host_next_smallest_int32"); } else { LOG(FATAL) << "ArgSort only supports X86 and NVGPU ! Please Check.\n"; @@ -84,8 +84,8 @@ std::vector ArgSort(const ir::Tensor &A, stride = stride * A->shape[i]; } } - offset = common::AutoSimplify(offset); - stride = common::AutoSimplify(stride); + offset = cinn::common::AutoSimplify(offset); + stride = cinn::common::AutoSimplify(stride); auto A_shape_axis = A->shape[pos_axis]; return lang::CallExtern(index_func_name, {A, A_shape_axis, A(indices), offset, stride}); @@ -106,8 +106,8 @@ std::vector ArgSort(const ir::Tensor &A, stride = stride * A->shape[i]; } } - offset = common::AutoSimplify(offset); - stride = common::AutoSimplify(stride); + offset = cinn::common::AutoSimplify(offset); + stride = cinn::common::AutoSimplify(stride); auto A_shape_axis = A->shape[pos_axis]; auto idx = lang::CallExtern( @@ -121,7 +121,7 @@ std::vector ArgSort(const ir::Tensor &A, } std::vector Sort(const ir::Tensor &A, - const common::Target &target, + const cinn::common::Target &target, poly::StageMap stages, const int &axis, const bool &is_ascend, @@ -192,7 +192,7 @@ std::shared_ptr StrategyForSort( [=](lang::Args args, lang::RetValue *ret) { CHECK(!args.empty()) << "The input argument of sort_schedule is empty! Please check.\n"; - common::CINNValuePack arg_pack = args[0]; + cinn::common::CINNValuePack arg_pack = args[0]; std::vector vec_ast; for (int i = 0; i < arg_pack.size(); i++) { if (arg_pack[i].is_expr()) { @@ -218,9 +218,9 @@ std::shared_ptr StrategyForSort( pe::IRScheduleInjectiveCPU( ir_sch, output_shapes.front(), target, true); } - std::vector res{ - common::CINNValue(ir_sch.GetModule().GetExprs().at(0))}; - *ret = common::CINNValuePack{res}; + std::vector res{ + cinn::common::CINNValue(ir_sch.GetModule().GetExprs().at(0))}; + *ret = cinn::common::CINNValuePack{res}; }); auto strategy = std::make_shared(); @@ -275,7 +275,7 @@ std::shared_ptr StrategyForArgSort( lang::RetValue *ret) { CHECK(!args.empty()) << "The input argument of argsort_schedule is empty! Please check.\n"; - common::CINNValuePack arg_pack = args[0]; + cinn::common::CINNValuePack arg_pack = args[0]; std::vector vec_ast; for (int i = 0; i < arg_pack.size(); i++) { if (arg_pack[i].is_expr()) { @@ -300,9 +300,9 @@ std::shared_ptr StrategyForArgSort( if (prod_size > 1 && target.arch == Target::Arch::X86) { pe::IRScheduleInjectiveCPU(ir_sch, output_shapes.front(), target, true); } - std::vector res{ - common::CINNValue(ir_sch.GetModule().GetExprs().at(0))}; - *ret = common::CINNValuePack{res}; + std::vector res{ + cinn::common::CINNValue(ir_sch.GetModule().GetExprs().at(0))}; + *ret = cinn::common::CINNValuePack{res}; }); auto strategy = std::make_shared(); diff --git a/paddle/cinn/hlir/op/contrib/sort.h b/paddle/cinn/hlir/op/contrib/sort.h index bb07855666f214..93660861ef91b3 100644 --- a/paddle/cinn/hlir/op/contrib/sort.h +++ b/paddle/cinn/hlir/op/contrib/sort.h @@ -26,14 +26,14 @@ namespace hlir { namespace op { std::vector ArgSort(const ir::Tensor& A, - const common::Target& target, + const cinn::common::Target& target, poly::StageMap stages, const int& axis, const bool& is_ascend, const std::string& name); std::vector Sort(const ir::Tensor& A, - const common::Target& target, + const cinn::common::Target& target, poly::StageMap stages, const int& axis, const bool& is_ascend, diff --git a/paddle/cinn/hlir/op/contrib/sort_test.cc b/paddle/cinn/hlir/op/contrib/sort_test.cc index 3d2a8f6c73e38d..76386dea2ecef1 100644 --- a/paddle/cinn/hlir/op/contrib/sort_test.cc +++ b/paddle/cinn/hlir/op/contrib/sort_test.cc @@ -33,9 +33,9 @@ namespace hlir { namespace op { TEST(GenerateCode_Cpu, ArgSort) { - common::Context::Global().ResetNameId(); + cinn::common::Context::Global().ResetNameId(); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); ir::Expr n(4); ir::Expr h(28); @@ -72,9 +72,9 @@ TEST(GenerateCode_Cpu, ArgSort) { } TEST(GenerateCode_Cpu, Sort) { - common::Context::Global().ResetNameId(); + cinn::common::Context::Global().ResetNameId(); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); ir::Expr n(4); ir::Expr h(28); diff --git a/paddle/cinn/hlir/op/contrib/triangular_solve.cc b/paddle/cinn/hlir/op/contrib/triangular_solve.cc index 3ec35013fc417d..a6b45405367ab8 100644 --- a/paddle/cinn/hlir/op/contrib/triangular_solve.cc +++ b/paddle/cinn/hlir/op/contrib/triangular_solve.cc @@ -33,8 +33,8 @@ namespace cinn { namespace hlir { namespace op { -using common::CINNValue; -using common::CINNValuePack; +using cinn::common::CINNValue; +using cinn::common::CINNValuePack; std::shared_ptr StrategyForTriangularSolve( const framework::NodeAttr &attrs, diff --git a/paddle/cinn/hlir/op/contrib/uniform_random.cc b/paddle/cinn/hlir/op/contrib/uniform_random.cc index cd034560f8feba..0002f1be773557 100644 --- a/paddle/cinn/hlir/op/contrib/uniform_random.cc +++ b/paddle/cinn/hlir/op/contrib/uniform_random.cc @@ -47,8 +47,8 @@ namespace cinn { namespace hlir { namespace op { -using common::CINNValue; -using common::CINNValuePack; +using cinn::common::CINNValue; +using cinn::common::CINNValuePack; std::shared_ptr StrategyForUniformRandom( const framework::NodeAttr &attrs, @@ -89,7 +89,7 @@ std::vector InferDtypeForUniformRandom( if (attrs.find("dtype") != attrs.end()) { dtype = absl::get(attrs.at("dtype")); } - std::vector res{common::Str2Type(dtype)}; + std::vector res{cinn::common::Str2Type(dtype)}; CHECK(res[0].is_float(32) || res[0].is_float(64)) << "uniform_random only support float32 and float64, but here " << res[0] << "! Please check."; diff --git a/paddle/cinn/hlir/op/custom_call.cc b/paddle/cinn/hlir/op/custom_call.cc index bf411c0fb502fe..91c3ee6db08986 100644 --- a/paddle/cinn/hlir/op/custom_call.cc +++ b/paddle/cinn/hlir/op/custom_call.cc @@ -34,9 +34,9 @@ namespace cinn { namespace hlir { namespace op { -using common::_CINNValuePack_; -using common::CINNValue; -using common::CINNValuePack; +using cinn::common::_CINNValuePack_; +using cinn::common::CINNValue; +using cinn::common::CINNValuePack; using framework::OpStrategy; using framework::shape_t; using framework::StrategyFunction; @@ -54,14 +54,14 @@ class CustomCallArgsFuncRegistry { } void Register(const std::string &custom_call, - const common::Target &target, + const cinn::common::Target &target, ArgsFunc args_func) { auto id = custom_call + "_" + target.arch_str(); func_map_[id] = args_func; } ArgsFunc Lookup(const std::string &custom_call, - const common::Target &target) { + const cinn::common::Target &target) { auto id = custom_call + "_" + target.arch_str(); CHECK(func_map_.count(id)) << "Can't find " << custom_call << " for target " << target.arch_str(); @@ -100,7 +100,7 @@ std::shared_ptr StrategyForCustomCall( ir::Argument(kernel_args, ir::Argument::IO::kOutput), ir::Argument(kernel_args_num, ir::Argument::IO::kInput)}; // if target is nvgpu, add stream. - if (target == common::DefaultNVGPUTarget()) { + if (target == cinn::common::DefaultNVGPUTarget()) { ir::Var kernel_stream(KERNEL_STREAM, type_of()); host_args.push_back(kernel_stream); @@ -906,7 +906,7 @@ std::vector CustomCallArgsForMemset( } const auto &dtype = - common::Str2Type(absl::get(attr_store.at("dtype"))); + cinn::common::Str2Type(absl::get(attr_store.at("dtype"))); count *= dtype.bytes(); VLOG(4) << "call memset custom_call with value=" << utils::Attribute2String(value_attr) << " (" << value @@ -939,60 +939,68 @@ std::vector CustomCallArgsForMemcpy( bool RegisteryCustomCallArgsFunc() { #ifdef CINN_WITH_CUDA - CustomCallArgsFuncRegistry::Global().Register("cinn_call_cublas", - common::DefaultNVGPUTarget(), - CustomCallArgsForCublas); + CustomCallArgsFuncRegistry::Global().Register( + "cinn_call_cublas", + cinn::common::DefaultNVGPUTarget(), + CustomCallArgsForCublas); CustomCallArgsFuncRegistry::Global().Register( "cinn_call_gaussian_random", - common::DefaultNVGPUTarget(), + cinn::common::DefaultNVGPUTarget(), CustomCallArgsForGaussianRandom); - CustomCallArgsFuncRegistry::Global().Register("cinn_call_uniform_random", - common::DefaultNVGPUTarget(), - CustomCallArgsForUniformRandom); - CustomCallArgsFuncRegistry::Global().Register("cinn_call_randint", - common::DefaultNVGPUTarget(), - CustomCallArgsForRandInt); - CustomCallArgsFuncRegistry::Global().Register("cinn_call_cholesky_nvgpu", - common::DefaultNVGPUTarget(), - CustomCallArgsForCholesky); - CustomCallArgsFuncRegistry::Global().Register("cinn_call_batched_cublas", - common::DefaultNVGPUTarget(), - CustomCallArgsForBatchedCublas); + CustomCallArgsFuncRegistry::Global().Register( + "cinn_call_uniform_random", + cinn::common::DefaultNVGPUTarget(), + CustomCallArgsForUniformRandom); + CustomCallArgsFuncRegistry::Global().Register( + "cinn_call_randint", + cinn::common::DefaultNVGPUTarget(), + CustomCallArgsForRandInt); + CustomCallArgsFuncRegistry::Global().Register( + "cinn_call_cholesky_nvgpu", + cinn::common::DefaultNVGPUTarget(), + CustomCallArgsForCholesky); + CustomCallArgsFuncRegistry::Global().Register( + "cinn_call_batched_cublas", + cinn::common::DefaultNVGPUTarget(), + CustomCallArgsForBatchedCublas); CustomCallArgsFuncRegistry::Global().Register( "cinn_call_triangular_solve_nvgpu", - common::DefaultNVGPUTarget(), + cinn::common::DefaultNVGPUTarget(), CustomCallArgsForTriangularSolve); - CustomCallArgsFuncRegistry::Global().Register("cinn_assert_true_nvgpu", - common::DefaultNVGPUTarget(), - CustomCallArgsForAssertTrue); - CustomCallArgsFuncRegistry::Global().Register("cinn_call_cuda_memset", - common::DefaultNVGPUTarget(), - CustomCallArgsForMemset); - CustomCallArgsFuncRegistry::Global().Register("cinn_call_cuda_memcpy", - common::DefaultNVGPUTarget(), - CustomCallArgsForMemcpy); + CustomCallArgsFuncRegistry::Global().Register( + "cinn_assert_true_nvgpu", + cinn::common::DefaultNVGPUTarget(), + CustomCallArgsForAssertTrue); + CustomCallArgsFuncRegistry::Global().Register( + "cinn_call_cuda_memset", + cinn::common::DefaultNVGPUTarget(), + CustomCallArgsForMemset); + CustomCallArgsFuncRegistry::Global().Register( + "cinn_call_cuda_memcpy", + cinn::common::DefaultNVGPUTarget(), + CustomCallArgsForMemcpy); #endif #ifdef CINN_WITH_CUDNN CustomCallArgsFuncRegistry::Global().Register( "cinn_call_cudnn_conv2d_forward", - common::DefaultNVGPUTarget(), + cinn::common::DefaultNVGPUTarget(), CustomCallArgsForCudnnConvForward); CustomCallArgsFuncRegistry::Global().Register( "cinn_call_cudnn_conv2d_backward_data", - common::DefaultNVGPUTarget(), + cinn::common::DefaultNVGPUTarget(), CustomCallArgsForCudnnConvBackwardData); CustomCallArgsFuncRegistry::Global().Register( "cinn_call_cudnn_conv2d_backward_filter", - common::DefaultNVGPUTarget(), + cinn::common::DefaultNVGPUTarget(), CustomCallArgsForCudnnConvBackwardFilter); CustomCallArgsFuncRegistry::Global().Register( "cinn_call_cudnn_pool2d_forward", - common::DefaultNVGPUTarget(), + cinn::common::DefaultNVGPUTarget(), CustomCallArgsForCudnnPoolForward); CustomCallArgsFuncRegistry::Global().Register( "cinn_call_cudnn_pool2d_backward", - common::DefaultNVGPUTarget(), + cinn::common::DefaultNVGPUTarget(), CustomCallArgsForCudnnPoolBackward); #endif @@ -1002,15 +1010,17 @@ bool RegisteryCustomCallArgsFunc() { #ifdef CINN_WITH_MKL_CBLAS - CustomCallArgsFuncRegistry::Global().Register("cinn_call_cholesky_host", - common::DefaultHostTarget(), - CustomCallArgsForCholesky); + CustomCallArgsFuncRegistry::Global().Register( + "cinn_call_cholesky_host", + cinn::common::DefaultHostTarget(), + CustomCallArgsForCholesky); #endif - CustomCallArgsFuncRegistry::Global().Register("cinn_assert_true_host", - common::DefaultHostTarget(), - CustomCallArgsForAssertTrue); + CustomCallArgsFuncRegistry::Global().Register( + "cinn_assert_true_host", + cinn::common::DefaultHostTarget(), + CustomCallArgsForAssertTrue); return true; } diff --git a/paddle/cinn/hlir/op/elementwise.cc b/paddle/cinn/hlir/op/elementwise.cc index a488391714dd8f..78df4cf0b78ab7 100644 --- a/paddle/cinn/hlir/op/elementwise.cc +++ b/paddle/cinn/hlir/op/elementwise.cc @@ -31,9 +31,9 @@ namespace cinn { namespace hlir { namespace op { -using common::_CINNValuePack_; -using common::CINNValue; -using common::CINNValuePack; +using cinn::common::_CINNValuePack_; +using cinn::common::CINNValue; +using cinn::common::CINNValuePack; using framework::OpStrategy; using framework::shape_t; using framework::StrategyFunction; @@ -167,8 +167,8 @@ std::shared_ptr StrategyForScale( // Paddle upscale float16 or bfloat16 compute to float32, // we made CINN consistent with this behavior of Paddle - bool should_upscale_fp32 = - A->type() == common::F16() || A->type() == common::BF16(); + bool should_upscale_fp32 = A->type() == cinn::common::F16() || + A->type() == cinn::common::BF16(); out = Compute( A->shape, @@ -180,8 +180,9 @@ std::shared_ptr StrategyForScale( ? Expr(bias) : ir::Cast::Make(A->type(), Expr(bias)); Expr cast_A_indice = - should_upscale_fp32 ? ir::Cast::Make(common::F32(), A(indice)) - : A(indice); + should_upscale_fp32 + ? ir::Cast::Make(cinn::common::F32(), A(indice)) + : A(indice); Expr add_result = bias_after_scale ? cast_scale * cast_A_indice + cast_bias : cast_scale * (cast_A_indice + cast_bias); @@ -290,7 +291,7 @@ std::vector InferDtypeForConstScalar( if (attrs.find("dtype") != attrs.end()) { auto dtype_str = absl::get(attrs.at("dtype")); if (!dtype_str.empty()) { - out_type = common::Str2Type(dtype_str); + out_type = cinn::common::Str2Type(dtype_str); } } else { auto scalar = GetScalarExpr(attrs.at("value")); @@ -368,7 +369,7 @@ std::shared_ptr StrategyForFillConstant( CHECK(attrs.attr_store.count("force_cpu")); force_cpu = absl::get(attrs.attr_store.at("force_cpu")); - if (force_cpu && target != common::DefaultHostTarget()) { + if (force_cpu && target != cinn::common::DefaultHostTarget()) { LOG(WARNING) << "The attribute \"force_cpu\" of \"fill_constant\" " "not supported in CINN! The \"fill_constant\"'s " "output tensor will placed on " @@ -412,19 +413,19 @@ std::vector InferShapeForFillConstant( std::vector InferDtypeForFillConstant( const std::vector &inputs_type, const framework::AttrMapType &attrs) { - common::Type out_type; + cinn::common::Type out_type; CHECK(attrs.count("value")); if (attrs.find("dtype") != attrs.end()) { // attribute [dtype] are given auto dtype_str = absl::get(attrs.at("dtype")); - out_type = common::Str2Type(dtype_str); + out_type = cinn::common::Str2Type(dtype_str); VLOG(3) << "FillConstant output dtype (from [dtype]): " << dtype_str; } else { // attribute [dtype] no given, inferred by value's type auto scalar = GetScalarExpr(attrs.at("value")); out_type = scalar->type(); VLOG(3) << "FillConstant scalar type (from [value]): " - << common::Type2Str(out_type); + << cinn::common::Type2Str(out_type); } return {out_type}; } @@ -542,7 +543,7 @@ std::vector InferDtypeForAssignValue( auto dtype_str = absl::get(attrs.at("dtype")); if (!dtype_str.empty()) { // if the [dtype] is not empty, output as the given type - out_type = common::Str2Type(dtype_str); + out_type = cinn::common::Str2Type(dtype_str); } } @@ -555,10 +556,10 @@ std::vector InferDtypeForAssignValue( #define EXPAND_ATTR_TO_GET_DTYPE(TYPE) \ else if (absl::get_if(&value)) { /*NOLINT*/ \ - out_type = common::type_of(); \ + out_type = cinn::common::type_of(); \ } \ else if (absl::get_if>(&value)) { /*NOLINT*/ \ - out_type = common::type_of(); \ + out_type = cinn::common::type_of(); \ } if (false) { // NOLINT @@ -918,7 +919,7 @@ std::shared_ptr StrategyForCast( std::vector InferDtypeForCast(const std::vector &inputs_type, const framework::AttrMapType &attrs) { CHECK(attrs.count("dtype")); - return {common::Str2Type(absl::get(attrs.at("dtype")))}; + return {cinn::common::Str2Type(absl::get(attrs.at("dtype")))}; } std::shared_ptr StrategyForArange( @@ -936,7 +937,8 @@ std::shared_ptr StrategyForArange( auto start = absl::get(attr_store.at("start")); auto stop = absl::get(attr_store.at("stop")); auto step = absl::get(attr_store.at("step")); - auto dtype = common::Str2Type(absl::get(attr_store.at("dtype"))); + auto dtype = + cinn::common::Str2Type(absl::get(attr_store.at("dtype"))); framework::CINNCompute arange_compute( [=](lang::Args args, lang::RetValue *ret) { @@ -948,10 +950,10 @@ std::shared_ptr StrategyForArange( std::string tensor_name = pack_args[0].operator std::string(); auto out = pe::Arange(start, stop, step, dtype, tensor_name); - std::vector res; + std::vector res; auto stages = CreateStages({out}); - res.push_back(common::CINNValue(out)); - res.push_back(common::CINNValue(stages)); + res.push_back(cinn::common::CINNValue(out)); + res.push_back(cinn::common::CINNValue(stages)); *ret = CINNValuePack{res}; }); @@ -984,12 +986,12 @@ std::vector> InferShapeForArange( std::vector InferDtypeForArange(const std::vector &inputs_type, const framework::AttrMapType &attrs) { CHECK(attrs.count("dtype")); - return {common::Str2Type(absl::get(attrs.at("dtype")))}; + return {cinn::common::Str2Type(absl::get(attrs.at("dtype")))}; } std::vector InferDtypeForLogicalNot(const std::vector &inputs_type, const framework::AttrMapType &attrs) { - return {common::Bool()}; + return {cinn::common::Bool()}; } } // namespace op diff --git a/paddle/cinn/hlir/op/external_api_registry.cc b/paddle/cinn/hlir/op/external_api_registry.cc index 000f8b92de9054..cddb10c63b1467 100644 --- a/paddle/cinn/hlir/op/external_api_registry.cc +++ b/paddle/cinn/hlir/op/external_api_registry.cc @@ -18,13 +18,13 @@ namespace cinn { namespace hlir { namespace op { -ExternalApiInfo& ExternalApiRegistry::Register(const std::string& op_name, - const common::Target& target) { +ExternalApiInfo& ExternalApiRegistry::Register( + const std::string& op_name, const cinn::common::Target& target) { return __REGISTER__(GenKey(op_name, target)); } -std::string ExternalApiRegistry::GetExternalApi(const framework::Node* op_node, - const common::Target& target) { +std::string ExternalApiRegistry::GetExternalApi( + const framework::Node* op_node, const cinn::common::Target& target) { CHECK(op_node->attrs.attr_store.count("original_op")) << "a custom_call op must store its original op name"; std::string op_name = @@ -44,7 +44,7 @@ std::string ExternalApiRegistry::GetExternalApi(const framework::Node* op_node, } std::string ExternalApiRegistry::GenKey(const std::string& op_name, - const common::Target& target) { + const cinn::common::Target& target) { std::ostringstream oss; oss << target; return op_name + "_" + oss.str(); diff --git a/paddle/cinn/hlir/op/external_api_registry.h b/paddle/cinn/hlir/op/external_api_registry.h index 307cac68b2f20e..e7d85cca784fce 100644 --- a/paddle/cinn/hlir/op/external_api_registry.h +++ b/paddle/cinn/hlir/op/external_api_registry.h @@ -61,22 +61,23 @@ class ExternalApiRegistry : public Registry { } ExternalApiInfo& Register(const std::string& op_name, - const common::Target& target); + const cinn::common::Target& target); - bool Has(const std::string& op_name, const common::Target& target) { + bool Has(const std::string& op_name, const cinn::common::Target& target) { return nullptr != Registry::Find(GenKey(op_name, target)); } // return the api name on the specified target std::string GetExternalApi(const framework::Node* op_node, - const common::Target& target); + const cinn::common::Target& target); private: ExternalApiRegistry() = default; CINN_DISALLOW_COPY_AND_ASSIGN(ExternalApiRegistry); // the registered key consist of the name of op and the specified target - std::string GenKey(const std::string& op_name, const common::Target& target); + std::string GenKey(const std::string& op_name, + const cinn::common::Target& target); }; } // namespace op diff --git a/paddle/cinn/hlir/op/external_api_registry_test.cc b/paddle/cinn/hlir/op/external_api_registry_test.cc index 186fb8fa532624..36a15666277e67 100644 --- a/paddle/cinn/hlir/op/external_api_registry_test.cc +++ b/paddle/cinn/hlir/op/external_api_registry_test.cc @@ -27,12 +27,12 @@ using cinn::hlir::framework::Node; using cinn::hlir::op::ExternalApiRegistry; TEST(ExternalApiRegistry, Has) { - ASSERT_TRUE(ExternalApiRegistry::Global()->Has("matmul", - common::DefaultNVGPUTarget())); - ASSERT_TRUE(ExternalApiRegistry::Global()->Has("cholesky", - common::DefaultHostTarget())); + ASSERT_TRUE(ExternalApiRegistry::Global()->Has( + "matmul", cinn::common::DefaultNVGPUTarget())); + ASSERT_TRUE(ExternalApiRegistry::Global()->Has( + "cholesky", cinn::common::DefaultHostTarget())); ASSERT_FALSE(ExternalApiRegistry::Global()->Has( - "op_doesn't_exist", common::DefaultNVGPUTarget())); + "op_doesn't_exist", cinn::common::DefaultNVGPUTarget())); } TEST(ExternalApiRegistry, GetExternalApi) { @@ -41,13 +41,13 @@ TEST(ExternalApiRegistry, GetExternalApi) { node->attrs.attr_store["original_op"] = std::string("matmul"); ASSERT_EQ("cinn_call_cublas", ExternalApiRegistry::Global()->GetExternalApi( - node.get(), common::DefaultNVGPUTarget())); + node.get(), cinn::common::DefaultNVGPUTarget())); #ifdef CINN_WITH_CUDNN node->attrs.attr_store["conv_type"] = std::string("backward_data"); node->attrs.attr_store["original_op"] = std::string("conv2d"); ASSERT_EQ("cinn_call_cudnn_conv2d_backward_data", ExternalApiRegistry::Global()->GetExternalApi( - node.get(), common::DefaultNVGPUTarget())); + node.get(), cinn::common::DefaultNVGPUTarget())); #endif } diff --git a/paddle/cinn/hlir/op/nn.cc b/paddle/cinn/hlir/op/nn.cc index 22bd95dcbf0109..ca5d542d85a125 100644 --- a/paddle/cinn/hlir/op/nn.cc +++ b/paddle/cinn/hlir/op/nn.cc @@ -32,9 +32,9 @@ namespace cinn { namespace hlir { namespace op { -using common::_CINNValuePack_; -using common::CINNValue; -using common::CINNValuePack; +using cinn::common::_CINNValuePack_; +using cinn::common::CINNValue; +using cinn::common::CINNValuePack; using framework::OpStrategy; using framework::shape_t; using framework::StrategyFunction; @@ -481,7 +481,7 @@ std::vector InferShapeForConv2d( -1, -1, Float(32), - common::DefaultHostTarget(), + cinn::common::DefaultHostTarget(), key); int ic_bn = conv2d_factors["ic_bn"]; int oc_bn = conv2d_factors["oc_bn"]; @@ -885,7 +885,7 @@ std::shared_ptr StrategyForDepthwiseConv2d( [=](lang::Args args, lang::RetValue *ret) { CHECK(!args.empty()) << "The input argument of InjectiveSchedule is " "empty! Please check.\n"; - common::CINNValuePack arg_pack = args[0]; + cinn::common::CINNValuePack arg_pack = args[0]; std::vector vec_ast; std::vector vec_tensor; for (int i = 0; i < arg_pack.size(); i++) { @@ -906,9 +906,9 @@ std::shared_ptr StrategyForDepthwiseConv2d( } else { CINN_NOT_IMPLEMENTED } - std::vector res{ - common::CINNValue(ir_sch.GetModule().GetExprs().at(0))}; - *ret = common::CINNValuePack{res}; + std::vector res{ + cinn::common::CINNValue(ir_sch.GetModule().GetExprs().at(0))}; + *ret = cinn::common::CINNValuePack{res}; }); auto strategy = std::make_shared(); diff --git a/paddle/cinn/hlir/op/op_broadcast_test.cc b/paddle/cinn/hlir/op/op_broadcast_test.cc index 8981712f5da643..4acb3371587cbd 100644 --- a/paddle/cinn/hlir/op/op_broadcast_test.cc +++ b/paddle/cinn/hlir/op/op_broadcast_test.cc @@ -48,7 +48,7 @@ TEST(Operator, Operator_ElementWise_Add_Test0) { NodeAttr attrs; std::vector inputs{A.tensor(), B.tensor()}; std::vector type{Float(32)}; - common::Target target = common::DefaultHostTarget(); + cinn::common::Target target = cinn::common::DefaultHostTarget(); auto impl = OpStrategy::SelectImpl(strategy[add]( attrs, inputs, type, {{M.as_int32(), N.as_int32()}}, target)); ASSERT_EQ(impl->name, "strategy.elementwise_add.x86"); @@ -58,10 +58,10 @@ TEST(Operator, Operator_ElementWise_Add_Test0) { Module::Builder builder("module0", target); std::string out_name = "C"; - common::CINNValuePack cinn_input = - common::CINNValuePack{{common::CINNValue(A), - common::CINNValue(B), - common::CINNValue(out_name)}}; + cinn::common::CINNValuePack cinn_input = + cinn::common::CINNValuePack{{cinn::common::CINNValue(A), + cinn::common::CINNValue(B), + cinn::common::CINNValue(out_name)}}; std::vector input_output_names{"A", "B", out_name}; auto funcs = framework::GetFuncFromImpl( @@ -83,28 +83,29 @@ TEST(Operator, Operator_ElementWise_Add_Test0) { cinn_buffer_t *B_buf; int set_value = 0; if (set_value != 0) { - A_buf = common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) + A_buf = cinn::common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) .set_align(512) .set_val(set_value) .Build(); - B_buf = common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) + B_buf = cinn::common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) .set_align(512) .set_val(set_value) .Build(); } else { - A_buf = common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) + A_buf = cinn::common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) .set_align(512) .set_random() .Build(); - B_buf = common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) + B_buf = cinn::common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) .set_align(512) .set_random() .Build(); } - auto *C_buf = common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) - .set_align(512) - .set_zero() - .Build(); + auto *C_buf = + cinn::common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) + .set_align(512) + .set_zero() + .Build(); cinn_pod_value_t a_arg(A_buf), b_arg(B_buf), c_arg(C_buf); cinn_pod_value_t args[] = {a_arg, b_arg, c_arg}; @@ -131,7 +132,7 @@ TEST(Operator, Operator_ElementWise_Add_Test1) { attrs.attr_store["axis"] = 1; std::vector inputs{A.tensor(), B.tensor()}; std::vector type{Float(32)}; - common::Target target = common::DefaultNVGPUTarget(); + cinn::common::Target target = cinn::common::DefaultNVGPUTarget(); auto impl = OpStrategy::SelectImpl( strategy[add](attrs, inputs, type, {{100, 32}}, target)); ASSERT_EQ(impl->name, "strategy.elementwise_add.x86"); @@ -141,10 +142,10 @@ TEST(Operator, Operator_ElementWise_Add_Test1) { Module::Builder builder("module", target); std::string out_name = "C"; - common::CINNValuePack cinn_input = - common::CINNValuePack{{common::CINNValue(A), - common::CINNValue(B), - common::CINNValue(out_name)}}; + cinn::common::CINNValuePack cinn_input = + cinn::common::CINNValuePack{{cinn::common::CINNValue(A), + cinn::common::CINNValue(B), + cinn::common::CINNValue(out_name)}}; std::vector input_output_names{"A", "B", out_name}; auto funcs = framework::GetFuncFromImpl( @@ -181,7 +182,7 @@ TEST(Operator, Operator_BroadcastTo) { std::vector inputs{B.tensor()}; std::vector type{Float(32)}; - common::Target target = common::DefaultHostTarget(); + cinn::common::Target target = cinn::common::DefaultHostTarget(); auto impl = OpStrategy::SelectImpl( strategy[broadcast_to](attrs, inputs, type, {out_shape}, target)); @@ -189,8 +190,8 @@ TEST(Operator, Operator_BroadcastTo) { std::string func_name = "broadcast_to"; std::string out_name = "C"; - common::CINNValuePack cinn_input = common::CINNValuePack{ - {common::CINNValue(B), common::CINNValue(out_name)}}; + cinn::common::CINNValuePack cinn_input = cinn::common::CINNValuePack{ + {cinn::common::CINNValue(B), cinn::common::CINNValue(out_name)}}; std::vector input_output_names{"B", out_name}; auto funcs = framework::GetFuncFromImpl( @@ -201,12 +202,12 @@ TEST(Operator, Operator_BroadcastTo) { } } -common::CINNValuePack GetComputeResult( +cinn::common::CINNValuePack GetComputeResult( const std::shared_ptr &impl, - std::vector &cinn_inputs, // NOLINT + std::vector &cinn_inputs, // NOLINT const std::string &output_name = "") { cinn_inputs.emplace_back(output_name); - return impl->fcompute(common::CINNValuePack{cinn_inputs}); + return impl->fcompute(cinn::common::CINNValuePack{cinn_inputs}); } TEST(Operator, Operator_BroadcastTo_0) { @@ -233,21 +234,22 @@ TEST(Operator, Operator_BroadcastTo_0) { attrs.attr_store["dim"] = dim; std::vector type{Float(32)}; - common::Target target = common::DefaultHostTarget(); + cinn::common::Target target = cinn::common::DefaultHostTarget(); auto impl_0 = OpStrategy::SelectImpl(strategy[const_scalar]( attrs, std::vector{}, type, {out_shape}, target)); - std::vector cinn_inputs; - common::CINNValuePack rets_0 = GetComputeResult(impl_0, cinn_inputs, "out_0"); + std::vector cinn_inputs; + cinn::common::CINNValuePack rets_0 = + GetComputeResult(impl_0, cinn_inputs, "out_0"); ir::Expr out_0 = rets_0[0]; auto tensor_0 = out_0.as_tensor_ref(); poly::StageMap stages_0 = rets_0.back(); auto impl_1 = OpStrategy::SelectImpl( strategy[broadcast_to](attrs, {tensor_0}, type, {out_shape}, target)); - std::vector cinn_inputs_1 = { - {common::CINNValue(tensor_0)}}; - common::CINNValuePack rets_1 = + std::vector cinn_inputs_1 = { + {cinn::common::CINNValue(tensor_0)}}; + cinn::common::CINNValuePack rets_1 = GetComputeResult(impl_1, cinn_inputs_1, "out_1"); ir::Expr out_1 = rets_1[0]; @@ -256,18 +258,18 @@ TEST(Operator, Operator_BroadcastTo_0) { auto impl_2 = OpStrategy::SelectImpl( strategy[reduce_sum](attrs, {A.tensor()}, type, {out_shape}, target)); - std::vector cinn_inputs_2 = { - {common::CINNValue(A.tensor())}}; - common::CINNValuePack rets_2 = + std::vector cinn_inputs_2 = { + {cinn::common::CINNValue(A.tensor())}}; + cinn::common::CINNValuePack rets_2 = GetComputeResult(impl_2, cinn_inputs_2, "out_2"); ir::Expr out_2 = rets_2[0]; auto tensor_2 = out_2.as_tensor_ref(); poly::StageMap stages_2 = rets_2.back(); - std::vector cinn_inputs_4 = { - {common::CINNValue(A.tensor())}}; - common::CINNValuePack rets_4 = + std::vector cinn_inputs_4 = { + {cinn::common::CINNValue(A.tensor())}}; + cinn::common::CINNValuePack rets_4 = GetComputeResult(impl_2, cinn_inputs_4, "out_4"); ir::Expr out_4 = rets_4[0]; auto tensor_4 = out_4.as_tensor_ref(); @@ -275,9 +277,9 @@ TEST(Operator, Operator_BroadcastTo_0) { auto impl_3 = OpStrategy::SelectImpl(strategy[elementwise_add]( attrs, {tensor_1, tensor_2}, type, {out_shape}, target)); - std::vector cinn_inputs_3 = { - {common::CINNValue(tensor_1), common::CINNValue(tensor_2)}}; - common::CINNValuePack rets_3 = + std::vector cinn_inputs_3 = { + {cinn::common::CINNValue(tensor_1), cinn::common::CINNValue(tensor_2)}}; + cinn::common::CINNValuePack rets_3 = GetComputeResult(impl_3, cinn_inputs_3, "out_3"); ir::Expr out_3 = rets_3[0]; diff --git a/paddle/cinn/hlir/op/op_nn_test.cc b/paddle/cinn/hlir/op/op_nn_test.cc index 1d4920439ef4f2..593c75da8cfe95 100644 --- a/paddle/cinn/hlir/op/op_nn_test.cc +++ b/paddle/cinn/hlir/op/op_nn_test.cc @@ -43,12 +43,13 @@ Module LowerToModule(const std::string test_name, std::vector input_names, const std::string &output_name, std::vector &inputs, // NOLINT - std::vector cinn_inputs, + std::vector cinn_inputs, const Target &target) { Module::Builder builder("module", target); cinn_inputs.emplace_back(output_name); - common::CINNValuePack cinn_input = common::CINNValuePack{cinn_inputs}; + cinn::common::CINNValuePack cinn_input = + cinn::common::CINNValuePack{cinn_inputs}; input_names.push_back(output_name); auto funcs = framework::GetFuncFromImpl( @@ -80,7 +81,7 @@ TEST(Operator, Operator_Pool2d_Test0) { attrs.attr_store["pool_type"] = pool_type; std::vector inputs{A.tensor()}; std::vector type{Float(32)}; - common::Target target = common::DefaultHostTarget(); + cinn::common::Target target = cinn::common::DefaultHostTarget(); auto impl = OpStrategy::SelectImpl(strategy[pool2d]( attrs, inputs, type, {{1, 3, 10, 10}, {1, 3, 5, 5}}, target)); @@ -91,7 +92,7 @@ TEST(Operator, Operator_Pool2d_Test0) { {"A"}, "B", inputs, - {common::CINNValue(A)}, + {cinn::common::CINNValue(A)}, target); auto jit = backends::ExecutionEngine::Create({}); @@ -102,11 +103,12 @@ TEST(Operator, Operator_Pool2d_Test0) { auto fn_ = reinterpret_cast(fn); cinn_buffer_t *A_buf = - common::BufferBuilder(Float(32), {1, 3, 8, 8}).set_random().Build(); - cinn_buffer_t *B_buf = - common::BufferBuilder(Float(32), {1, 3, 10, 10}).set_random().Build(); + cinn::common::BufferBuilder(Float(32), {1, 3, 8, 8}).set_random().Build(); + cinn_buffer_t *B_buf = cinn::common::BufferBuilder(Float(32), {1, 3, 10, 10}) + .set_random() + .Build(); cinn_buffer_t *C_buf = - common::BufferBuilder(Float(32), {1, 3, 5, 5}).set_random().Build(); + cinn::common::BufferBuilder(Float(32), {1, 3, 5, 5}).set_random().Build(); cinn_pod_value_t a_arg(A_buf), b_arg(B_buf), c_arg(C_buf); cinn_pod_value_t args[] = {a_arg, b_arg, c_arg}; fn_(args, 3); @@ -138,7 +140,7 @@ TEST(Operator, Operator_Pool2d_Test1) { attrs.attr_store["exclusive"] = false; std::vector inputs{A.tensor()}; std::vector type{Float(32)}; - common::Target target = common::DefaultHostTarget(); + cinn::common::Target target = cinn::common::DefaultHostTarget(); auto impl = OpStrategy::SelectImpl(strategy[pool2d]( attrs, inputs, type, {{1, 3, 11, 11}, {1, 3, 5, 5}}, target)); @@ -150,7 +152,7 @@ TEST(Operator, Operator_Pool2d_Test1) { {"A"}, "B", inputs, - {common::CINNValue(A)}, + {cinn::common::CINNValue(A)}, target); auto jit = backends::ExecutionEngine::Create({}); @@ -161,11 +163,12 @@ TEST(Operator, Operator_Pool2d_Test1) { auto fn_ = reinterpret_cast(fn); cinn_buffer_t *A_buf = - common::BufferBuilder(Float(32), {1, 3, 8, 8}).set_random().Build(); - cinn_buffer_t *B_buf = - common::BufferBuilder(Float(32), {1, 3, 11, 11}).set_random().Build(); + cinn::common::BufferBuilder(Float(32), {1, 3, 8, 8}).set_random().Build(); + cinn_buffer_t *B_buf = cinn::common::BufferBuilder(Float(32), {1, 3, 11, 11}) + .set_random() + .Build(); cinn_buffer_t *C_buf = - common::BufferBuilder(Float(32), {1, 3, 5, 5}).set_random().Build(); + cinn::common::BufferBuilder(Float(32), {1, 3, 5, 5}).set_random().Build(); cinn_pod_value_t a_arg(A_buf), b_arg(B_buf), c_arg(C_buf); cinn_pod_value_t args[] = {a_arg, b_arg, c_arg}; fn_(args, 3); @@ -199,7 +202,7 @@ TEST(Operator, Operator_Pool2d_Test2) { attrs.attr_store["data_format"] = data_format; std::vector inputs{A.tensor()}; std::vector type{Float(32)}; - common::Target target = common::DefaultHostTarget(); + cinn::common::Target target = cinn::common::DefaultHostTarget(); auto impl = OpStrategy::SelectImpl(strategy[pool2d]( attrs, inputs, type, {{1, 11, 11, 3}, {1, 5, 5, 3}}, target)); @@ -211,7 +214,7 @@ TEST(Operator, Operator_Pool2d_Test2) { {"A"}, "B", inputs, - {common::CINNValue(A)}, + {cinn::common::CINNValue(A)}, target); auto jit = backends::ExecutionEngine::Create({}); @@ -222,11 +225,12 @@ TEST(Operator, Operator_Pool2d_Test2) { auto fn_ = reinterpret_cast(fn); cinn_buffer_t *A_buf = - common::BufferBuilder(Float(32), {1, 8, 8, 3}).set_random().Build(); - cinn_buffer_t *B_buf = - common::BufferBuilder(Float(32), {1, 11, 11, 3}).set_random().Build(); + cinn::common::BufferBuilder(Float(32), {1, 8, 8, 3}).set_random().Build(); + cinn_buffer_t *B_buf = cinn::common::BufferBuilder(Float(32), {1, 11, 11, 3}) + .set_random() + .Build(); cinn_buffer_t *C_buf = - common::BufferBuilder(Float(32), {1, 5, 5, 3}).set_random().Build(); + cinn::common::BufferBuilder(Float(32), {1, 5, 5, 3}).set_random().Build(); cinn_pod_value_t a_arg(A_buf), b_arg(B_buf), c_arg(C_buf); cinn_pod_value_t args[] = {a_arg, b_arg, c_arg}; fn_(args, 3); @@ -260,7 +264,7 @@ TEST(Operator, Operator_Pool3d_Test0) { attrs.attr_store["data_format"] = data_format; std::vector inputs{A.tensor()}; std::vector type{Float(32)}; - common::Target target = common::DefaultHostTarget(); + cinn::common::Target target = cinn::common::DefaultHostTarget(); auto impl = OpStrategy::SelectImpl(strategy[pool3d]( attrs, inputs, type, {{1, 11, 11, 11, 3}, {1, 5, 5, 5, 3}}, target)); @@ -271,7 +275,7 @@ TEST(Operator, Operator_Pool3d_Test0) { {"A"}, "B", inputs, - {common::CINNValue(A)}, + {cinn::common::CINNValue(A)}, target); auto jit = backends::ExecutionEngine::Create({}); @@ -281,12 +285,16 @@ TEST(Operator, Operator_Pool3d_Test0) { CHECK(fn); auto fn_ = reinterpret_cast(fn); - cinn_buffer_t *A_buf = - common::BufferBuilder(Float(32), {1, 8, 8, 8, 3}).set_random().Build(); + cinn_buffer_t *A_buf = cinn::common::BufferBuilder(Float(32), {1, 8, 8, 8, 3}) + .set_random() + .Build(); cinn_buffer_t *B_buf = - common::BufferBuilder(Float(32), {1, 11, 11, 11, 3}).set_random().Build(); - cinn_buffer_t *C_buf = - common::BufferBuilder(Float(32), {1, 5, 5, 5, 3}).set_random().Build(); + cinn::common::BufferBuilder(Float(32), {1, 11, 11, 11, 3}) + .set_random() + .Build(); + cinn_buffer_t *C_buf = cinn::common::BufferBuilder(Float(32), {1, 5, 5, 5, 3}) + .set_random() + .Build(); cinn_pod_value_t a_arg(A_buf), b_arg(B_buf), c_arg(C_buf); cinn_pod_value_t args[] = {a_arg, b_arg, c_arg}; fn_(args, 3); @@ -320,7 +328,7 @@ TEST(Operator, Operator_Pool1d_Test0) { attrs.attr_store["data_format"] = data_format; std::vector inputs{A.tensor()}; std::vector type{Float(32)}; - common::Target target = common::DefaultHostTarget(); + cinn::common::Target target = cinn::common::DefaultHostTarget(); auto impl = OpStrategy::SelectImpl( strategy[pool1d](attrs, inputs, type, {{1, 11, 3}, {1, 5, 3}}, target)); @@ -331,7 +339,7 @@ TEST(Operator, Operator_Pool1d_Test0) { {"A"}, "B", inputs, - {common::CINNValue(A)}, + {cinn::common::CINNValue(A)}, target); auto jit = backends::ExecutionEngine::Create({}); @@ -342,11 +350,11 @@ TEST(Operator, Operator_Pool1d_Test0) { auto fn_ = reinterpret_cast(fn); cinn_buffer_t *A_buf = - common::BufferBuilder(Float(32), {1, 8, 3}).set_random().Build(); + cinn::common::BufferBuilder(Float(32), {1, 8, 3}).set_random().Build(); cinn_buffer_t *B_buf = - common::BufferBuilder(Float(32), {1, 11, 3}).set_random().Build(); + cinn::common::BufferBuilder(Float(32), {1, 11, 3}).set_random().Build(); cinn_buffer_t *C_buf = - common::BufferBuilder(Float(32), {1, 5, 3}).set_random().Build(); + cinn::common::BufferBuilder(Float(32), {1, 5, 3}).set_random().Build(); cinn_pod_value_t a_arg(A_buf), b_arg(B_buf), c_arg(C_buf); cinn_pod_value_t args[] = {a_arg, b_arg, c_arg}; fn_(args, 3); @@ -372,7 +380,7 @@ TEST(Operator, Operator_Select_Test0) { std::vector inputs{ condition.tensor(), true_value.tensor(), false_value.tensor()}; std::vector type{Float(32)}; - const common::Target target = common::DefaultHostTarget(); + const cinn::common::Target target = cinn::common::DefaultHostTarget(); const std::vector input_shapes = { {16, 64, 64}, {16, 64, 64}, {16, 64, 64}}; @@ -387,9 +395,10 @@ TEST(Operator, Operator_Select_Test0) { std::string func_name = "select"; std::vector input_names = { "condition", "true_value", "false_value"}; - std::vector cinn_inputs = {common::CINNValue(condition), - common::CINNValue(true_value), - common::CINNValue(false_value)}; + std::vector cinn_inputs = { + cinn::common::CINNValue(condition), + cinn::common::CINNValue(true_value), + cinn::common::CINNValue(false_value)}; auto module = LowerToModule("Operator_Select_Test0", func_name, @@ -408,13 +417,13 @@ TEST(Operator, Operator_Select_Test0) { auto fn_ = reinterpret_cast(fn); cinn_buffer_t *A_buf = - common::BufferBuilder(Bool(), {16, 64, 64}).set_random().Build(); + cinn::common::BufferBuilder(Bool(), {16, 64, 64}).set_random().Build(); cinn_buffer_t *B_buf = - common::BufferBuilder(Float(32), {16, 64, 64}).set_random().Build(); + cinn::common::BufferBuilder(Float(32), {16, 64, 64}).set_random().Build(); cinn_buffer_t *C_buf = - common::BufferBuilder(Float(32), {16, 64, 64}).set_random().Build(); + cinn::common::BufferBuilder(Float(32), {16, 64, 64}).set_random().Build(); cinn_buffer_t *D_buf = - common::BufferBuilder(Float(32), {16, 64, 64}).set_random().Build(); + cinn::common::BufferBuilder(Float(32), {16, 64, 64}).set_random().Build(); cinn_pod_value_t a_arg(A_buf), b_arg(B_buf), c_arg(C_buf), d_arg(D_buf); cinn_pod_value_t args[] = {a_arg, b_arg, c_arg, d_arg}; @@ -452,7 +461,7 @@ TEST(Operator, Operator_Reverse_Test0) { attrs.attr_store["axis"] = axis; std::vector inputs{A.tensor()}; std::vector type{Float(32)}; - common::Target target = common::DefaultHostTarget(); + cinn::common::Target target = cinn::common::DefaultHostTarget(); auto impl = OpStrategy::SelectImpl( strategy[reverse](attrs, inputs, type, {{c, h, w}}, target)); @@ -464,7 +473,7 @@ TEST(Operator, Operator_Reverse_Test0) { {"A"}, "B", inputs, - {common::CINNValue(A)}, + {cinn::common::CINNValue(A)}, target); auto jit = backends::ExecutionEngine::Create({}); @@ -475,9 +484,9 @@ TEST(Operator, Operator_Reverse_Test0) { auto fn_ = reinterpret_cast(fn); cinn_buffer_t *A_buf = - common::BufferBuilder(Float(32), {c, h, w}).set_random().Build(); + cinn::common::BufferBuilder(Float(32), {c, h, w}).set_random().Build(); cinn_buffer_t *B_buf = - common::BufferBuilder(Float(32), {c, h, w}).set_random().Build(); + cinn::common::BufferBuilder(Float(32), {c, h, w}).set_random().Build(); cinn_pod_value_t a_arg(A_buf), b_arg(B_buf); cinn_pod_value_t args[] = {a_arg, b_arg}; fn_(args, 2); @@ -516,7 +525,7 @@ TEST(Operator, Operator_Transpose_Test0) { attrs.attr_store["axis"] = axis; std::vector inputs{A.tensor()}; std::vector type{Float(32)}; - common::Target target = common::DefaultHostTarget(); + cinn::common::Target target = cinn::common::DefaultHostTarget(); auto infer_shape = infer_shape_func({{n, c, h, w}}, attrs.attr_store); ASSERT_EQ(infer_shape[0][0], n); @@ -551,7 +560,7 @@ TEST(Operator, Operator_Transpose_Test0) { {"A"}, "B", inputs, - {common::CINNValue(A)}, + {cinn::common::CINNValue(A)}, target); auto jit = backends::ExecutionEngine::Create({}); @@ -562,9 +571,9 @@ TEST(Operator, Operator_Transpose_Test0) { auto fn_ = reinterpret_cast(fn); cinn_buffer_t *A_buf = - common::BufferBuilder(Float(32), input_shape).set_random().Build(); + cinn::common::BufferBuilder(Float(32), input_shape).set_random().Build(); cinn_buffer_t *B_buf = - common::BufferBuilder(Float(32), output_shape).set_random().Build(); + cinn::common::BufferBuilder(Float(32), output_shape).set_random().Build(); cinn_pod_value_t a_arg(A_buf), b_arg(B_buf); cinn_pod_value_t args[] = {a_arg, b_arg}; fn_(args, 2); diff --git a/paddle/cinn/hlir/op/op_util.cc b/paddle/cinn/hlir/op/op_util.cc index 4a8ec32633dbd1..6cad9f4cb75f12 100644 --- a/paddle/cinn/hlir/op/op_util.cc +++ b/paddle/cinn/hlir/op/op_util.cc @@ -31,7 +31,7 @@ CINNSchedule GetElementwiseScheduleFunc( return CINNSchedule([=](lang::Args args, lang::RetValue* ret) { CHECK(!args.empty()) << "The input argument of ElementwiseSchedule is " "empty! Please check.\n"; - common::CINNValuePack arg_pack = args[0]; + cinn::common::CINNValuePack arg_pack = args[0]; CHECK_GT(arg_pack.size(), 0U) << "arg_pack.size() must contains at least one element."; std::vector vec_ast; @@ -46,9 +46,9 @@ CINNSchedule GetElementwiseScheduleFunc( ir::IRSchedule ir_sch(mod_expr); ir_sch.MergeExprs(); pe::IRElementwiseSchedule(ir_sch, output_shapes.front(), target); - std::vector res{ - common::CINNValue(ir_sch.GetModule().GetExprs().at(0))}; - *ret = common::CINNValuePack{res}; + std::vector res{ + cinn::common::CINNValue(ir_sch.GetModule().GetExprs().at(0))}; + *ret = cinn::common::CINNValuePack{res}; }); } @@ -59,7 +59,7 @@ CINNSchedule GetInjectiveScheduleFunc( return CINNSchedule([=](lang::Args args, lang::RetValue* ret) { CHECK(!args.empty()) << "The input argument of InjectiveSchedule is " "empty! Please check.\n"; - common::CINNValuePack arg_pack = args[0]; + cinn::common::CINNValuePack arg_pack = args[0]; std::vector vec_ast; for (int i = 0; i < arg_pack.size(); i++) { if (arg_pack[i].is_expr()) { @@ -78,14 +78,14 @@ CINNSchedule GetInjectiveScheduleFunc( pe::IRScheduleInjectiveCPU(ir_sch, output_shapes.front(), target, vectorizable); }*/ - std::vector res{ - common::CINNValue(ir_sch.GetModule().GetExprs().at(0))}; - *ret = common::CINNValuePack{res}; + std::vector res{ + cinn::common::CINNValue(ir_sch.GetModule().GetExprs().at(0))}; + *ret = cinn::common::CINNValuePack{res}; }); } -std::string GetExternFuncName(const common::Target& target, - const common::Type& type, +std::string GetExternFuncName(const cinn::common::Target& target, + const cinn::common::Type& type, const std::string& func_name, const bool need_cinn, const bool need_target, @@ -95,9 +95,9 @@ std::string GetExternFuncName(const common::Target& target, func_proto_name.append("cinn_"); } if (need_target) { - if (target.arch == common::Target::Arch::NVGPU) { + if (target.arch == cinn::common::Target::Arch::NVGPU) { func_proto_name.append("nvgpu_"); - } else if (target.arch == common::Target::Arch::X86) { + } else if (target.arch == cinn::common::Target::Arch::X86) { func_proto_name.append("host_"); } else { LOG(FATAL) << func_name diff --git a/paddle/cinn/hlir/op/op_util.h b/paddle/cinn/hlir/op/op_util.h index 082c1f258a0427..a0521e26f1b724 100644 --- a/paddle/cinn/hlir/op/op_util.h +++ b/paddle/cinn/hlir/op/op_util.h @@ -67,8 +67,9 @@ std::vector ToPodVector(const std::vector &args) { } const auto &type = args.front().type(); - CHECK_EQ(type, common::type_of()) << "Cannot get " << common::type_of() - << " value from " << type << " vector!"; + CHECK_EQ(type, cinn::common::type_of()) + << "Cannot get " << cinn::common::type_of() << " value from " << type + << " vector!"; std::vector shape_v; if (type.is_bool()) { @@ -141,8 +142,8 @@ CINNSchedule GetInjectiveScheduleFunc( const Target &target, bool vectorizable = true); -std::string GetExternFuncName(const common::Target &target, - const common::Type &type, +std::string GetExternFuncName(const cinn::common::Target &target, + const cinn::common::Type &type, const std::string &func_name, const bool need_cinn = true, const bool need_target = true, diff --git a/paddle/cinn/hlir/op/reduction.cc b/paddle/cinn/hlir/op/reduction.cc index 893c0c41fd7070..f9a019a9eea5c8 100644 --- a/paddle/cinn/hlir/op/reduction.cc +++ b/paddle/cinn/hlir/op/reduction.cc @@ -38,9 +38,9 @@ PD_DECLARE_bool(cinn_new_group_scheduler); namespace cinn { namespace hlir { namespace op { -using common::_CINNValuePack_; -using common::CINNValue; -using common::CINNValuePack; +using cinn::common::_CINNValuePack_; +using cinn::common::CINNValue; +using cinn::common::CINNValuePack; using framework::OpStrategy; using framework::shape_t; using framework::StrategyFunction; @@ -142,7 +142,7 @@ std::shared_ptr StrategyForReduce( *ret = CINNValuePack{cinn_values}; }; if (!FLAGS_cinn_enable_map_expr && !FLAGS_cinn_new_group_scheduler && - target == common::DefaultNVGPUTarget()) { + target == cinn::common::DefaultNVGPUTarget()) { if (!WithoutLastDimInReduce(inputs[0]->shape, reduce_axes)) { VLOG(3) << "Do Two Step Block Reduce Compute!"; auto res = gpu_reduce_with_last_axis_func( @@ -246,7 +246,7 @@ std::shared_ptr StrategyForReduce( reduce_tmp_out.as_tensor_ref(), tmp_out.as_tensor_ref(), out.as_tensor_ref(), - common::DefaultNVGPUTarget()); + cinn::common::DefaultNVGPUTarget()); std::vector res{ CINNValue(ir_sch.GetModule().GetExprs().at(0))}; @@ -262,7 +262,7 @@ std::shared_ptr StrategyForReduce( reduce_tmp_out.as_tensor_ref(), tmp_out.as_tensor_ref(), out.as_tensor_ref(), - common::DefaultNVGPUTarget()); + cinn::common::DefaultNVGPUTarget()); std::vector res{ CINNValue(ir_sch.GetModule().GetExprs().at(0))}; diff --git a/paddle/cinn/hlir/op/reduction_test.cc b/paddle/cinn/hlir/op/reduction_test.cc index 953dd82017d9bd..5586c323462ac6 100644 --- a/paddle/cinn/hlir/op/reduction_test.cc +++ b/paddle/cinn/hlir/op/reduction_test.cc @@ -46,9 +46,9 @@ namespace cinn { namespace hlir { namespace framework { -using common::_CINNValuePack_; -using common::CINNValue; -using common::CINNValuePack; +using cinn::common::_CINNValuePack_; +using cinn::common::CINNValue; +using cinn::common::CINNValuePack; using framework::OpStrategy; using framework::shape_t; using framework::StrategyFunction; @@ -91,7 +91,7 @@ std::pair GenReduceCode( } } - auto target = common::DefaultNVGPUTarget(); + auto target = cinn::common::DefaultNVGPUTarget(); auto impl = OpStrategy::SelectImpl( strategy(attrs, inputs, out_type, {output_shape}, target)); @@ -99,7 +99,8 @@ std::pair GenReduceCode( std::vector input_output_nodes{"X", op_name}; func = GetFuncFromImpl( impl, - common::CINNValuePack{{common::CINNValue(X), common::CINNValue(op_name)}}, + cinn::common::CINNValuePack{ + {cinn::common::CINNValue(X), cinn::common::CINNValue(op_name)}}, inputs, input_output_nodes, func_name, @@ -353,8 +354,9 @@ void TestCaseForReduce(const float init_val, // auto func_0 = reinterpret_cast(fn_reduce_sum); auto buffer_x = - common::BufferBuilder(Float(32), {n, c, h, w}).set_random().Build(); - auto buffer_z = common::BufferBuilder(Float(32), {c}).set_random().Build(); + cinn::common::BufferBuilder(Float(32), {n, c, h, w}).set_random().Build(); + auto buffer_z = + cinn::common::BufferBuilder(Float(32), {c}).set_random().Build(); void *dev_x = nullptr, *dev_z = nullptr; CUDA_CALL(cudaMalloc(&dev_x, buffer_x->memory_size)); @@ -455,8 +457,9 @@ TEST(Operator, Operator_Reduction_Case_7) { srand(time(NULL)); auto buffer_x = - common::BufferBuilder(Float(32), {n, c, h, w}).set_random().Build(); - auto buffer_y = common::BufferBuilder(Float(32), {h, w}).set_random().Build(); + cinn::common::BufferBuilder(Float(32), {n, c, h, w}).set_random().Build(); + auto buffer_y = + cinn::common::BufferBuilder(Float(32), {h, w}).set_random().Build(); void *dev_x = nullptr, *dev_y = nullptr; CUDA_CALL(cudaMalloc(&dev_x, buffer_x->memory_size)); @@ -528,9 +531,9 @@ TEST(Operator, Operator_Reduction_Case_11) { } TEST(Operator, Operator_Reduction_Case_Warp_Reduce) { - int sm_count = common::DefaultNVGPUTarget().get_multi_processor_count(); + int sm_count = cinn::common::DefaultNVGPUTarget().get_multi_processor_count(); int max_threads_per_sm = - common::DefaultNVGPUTarget().get_max_threads_per_sm(); + cinn::common::DefaultNVGPUTarget().get_max_threads_per_sm(); int warp_reduce_threshold = sm_count * max_threads_per_sm / 32; std::vector shape = {warp_reduce_threshold + 10, 256}; @@ -542,9 +545,9 @@ TEST(Operator, Operator_Reduction_Case_Warp_Reduce) { } TEST(Operator, Operator_Reduction_Case_Block_Reduce) { - int sm_count = common::DefaultNVGPUTarget().get_multi_processor_count(); + int sm_count = cinn::common::DefaultNVGPUTarget().get_multi_processor_count(); int max_threads_per_sm = - common::DefaultNVGPUTarget().get_max_threads_per_sm(); + cinn::common::DefaultNVGPUTarget().get_max_threads_per_sm(); int warp_reduce_threshold = sm_count * max_threads_per_sm / 32; std::vector shape = {warp_reduce_threshold - 10, 33}; @@ -556,9 +559,9 @@ TEST(Operator, Operator_Reduction_Case_Block_Reduce) { } TEST(Operator, Operator_Reduction_Case_Warp_Reduce_Case_1) { - int sm_count = common::DefaultNVGPUTarget().get_multi_processor_count(); + int sm_count = cinn::common::DefaultNVGPUTarget().get_multi_processor_count(); int max_threads_per_sm = - common::DefaultNVGPUTarget().get_max_threads_per_sm(); + cinn::common::DefaultNVGPUTarget().get_max_threads_per_sm(); int warp_reduce_threshold = sm_count * max_threads_per_sm / 32; std::vector shape = {(warp_reduce_threshold + 32) / 2, 2, 10, 256}; @@ -571,9 +574,9 @@ TEST(Operator, Operator_Reduction_Case_Warp_Reduce_Case_1) { } TEST(Operator, Operator_Reduction_Case_Block_Reduce_Case_1) { - int sm_count = common::DefaultNVGPUTarget().get_multi_processor_count(); + int sm_count = cinn::common::DefaultNVGPUTarget().get_multi_processor_count(); int max_threads_per_sm = - common::DefaultNVGPUTarget().get_max_threads_per_sm(); + cinn::common::DefaultNVGPUTarget().get_max_threads_per_sm(); int warp_reduce_threshold = sm_count * max_threads_per_sm / 32; std::vector shape = {(warp_reduce_threshold - 32) / 2, 2, 10, 33}; diff --git a/paddle/cinn/hlir/op/transform.cc b/paddle/cinn/hlir/op/transform.cc index ed22f50c644b09..ce1e29731a974f 100644 --- a/paddle/cinn/hlir/op/transform.cc +++ b/paddle/cinn/hlir/op/transform.cc @@ -31,9 +31,9 @@ namespace cinn { namespace hlir { namespace op { -using common::_CINNValuePack_; -using common::CINNValue; -using common::CINNValuePack; +using cinn::common::_CINNValuePack_; +using cinn::common::CINNValue; +using cinn::common::CINNValuePack; using framework::OpStrategy; using framework::shape_t; using framework::StrategyFunction; diff --git a/paddle/cinn/hlir/op/transform_test.cc b/paddle/cinn/hlir/op/transform_test.cc index 2c2612bd1865b5..0e9b6d86d2ece6 100644 --- a/paddle/cinn/hlir/op/transform_test.cc +++ b/paddle/cinn/hlir/op/transform_test.cc @@ -44,9 +44,9 @@ namespace cinn { namespace hlir { namespace framework { -using common::_CINNValuePack_; -using common::CINNValue; -using common::CINNValuePack; +using cinn::common::_CINNValuePack_; +using cinn::common::CINNValue; +using cinn::common::CINNValuePack; using framework::OpStrategy; using framework::shape_t; using framework::StrategyFunction; @@ -75,9 +75,9 @@ TEST(SliceAssign, SliceAssign_Op) { std::vector inputs{input.tensor(), assign.tensor()}; #ifdef CINN_WITH_CUDA - auto target = common::DefaultNVGPUTarget(); + auto target = cinn::common::DefaultNVGPUTarget(); #else - auto target = common::DefaultHostTarget(); + auto target = cinn::common::DefaultHostTarget(); #endif auto impl = OpStrategy::SelectImpl( strategy(attrs, inputs, out_type, {output_shape}, target)); @@ -85,10 +85,10 @@ TEST(SliceAssign, SliceAssign_Op) { std::string func_name = "slice_assign"; std::string out_name = "output"; - common::CINNValuePack cinn_input = - common::CINNValuePack{{common::CINNValue(input.tensor()), - common::CINNValue(assign.tensor()), - common::CINNValue(out_name)}}; + cinn::common::CINNValuePack cinn_input = + cinn::common::CINNValuePack{{cinn::common::CINNValue(input.tensor()), + cinn::common::CINNValue(assign.tensor()), + cinn::common::CINNValue(out_name)}}; std::vector input_output_names{"input", "assign", out_name}; auto funcs = framework::GetFuncFromImpl( diff --git a/paddle/cinn/hlir/pass/alterlayout.cc b/paddle/cinn/hlir/pass/alterlayout.cc index 3c8d775fc9befe..4e7df28e7994a4 100644 --- a/paddle/cinn/hlir/pass/alterlayout.cc +++ b/paddle/cinn/hlir/pass/alterlayout.cc @@ -25,8 +25,8 @@ namespace cinn { namespace hlir { namespace pass { -using common::GraphNode; -using common::Type; +using cinn::common::GraphNode; +using cinn::common::Type; using framework::Graph; using framework::Node; using framework::NodeData; @@ -92,7 +92,7 @@ std::vector UpdateInferInfos( const std::vector& input_shapes, const std::vector& input_types, const std::vector& input_layouts, - const common::Target& target, + const cinn::common::Target& target, const OpValueType& op_infershape, const OpValueType& op_infertype, const OpValueType& op_inferlayout, @@ -219,13 +219,13 @@ void AlterLayoutPass(Graph* graph) { // alter conv2d op to conv2d_NCHWc Node* new_node = new Node(Operator::Get(new_op_type), new_op_type, - common::UniqName(new_op_type)); + cinn::common::UniqName(new_op_type)); new_node->attrs.attr_store = node->attrs.attr_store; std::string new_data_format = "NCHWc"; new_node->attrs.attr_store["data_format"] = new_data_format; const auto& conv_inlinks = node->inlinks_in_order(); - std::vector input_nodes; + std::vector input_nodes; for (auto& link : conv_inlinks) { auto* source = link->source(); input_nodes.push_back(source); @@ -322,8 +322,8 @@ void AlterLayoutPass(Graph* graph) { 0, src_input_layout, dst_input_layout, - common::UniqName(node->op()->name + - "_input_layout_tranform")); + cinn::common::UniqName(node->op()->name + + "_input_layout_tranform")); UpdateInferInfos(input_trans_node, {input_shape}, {input_type}, @@ -370,8 +370,8 @@ void AlterLayoutPass(Graph* graph) { 1, src_kernel_layout, dst_kernel_layout, - common::UniqName(node->op()->name + - "_weight_layout_tranform")); + cinn::common::UniqName(node->op()->name + + "_weight_layout_tranform")); UpdateInferInfos(weight_trans_node, {weight_shape}, {weight_type}, @@ -427,13 +427,14 @@ void AlterLayoutPass(Graph* graph) { count++; } for (int i = 1; i < infershapes.size(); i++) { - auto* new_out = new NodeData( - node_ptr, - i, - 0, - common::UniqName(new_node->id() + "_out_" + std::to_string(i))); + auto* new_out = + new NodeData(node_ptr, + i, + 0, + cinn::common::UniqName(new_node->id() + "_out_" + + std::to_string(i))); graph->RegisterNode(new_out->id(), new_out); - new_node->as()->LinkTo(new_out); + new_node->as()->LinkTo(new_out); } graph->RegisterNode(new_node->id(), new_node); // update conv2d_NCHWc's infershape, infertype, inferlayout and set @@ -513,10 +514,10 @@ void AlterLayoutPass(Graph* graph) { CHECK(input_data); VLOG(3) << source->id() << " do layout_tranform from C to NCHW"; std::string op_type = "broadcast_to"; - auto trans_node = - new Node(Operator::Get(op_type), - op_type, - common::UniqName(source->id() + "_broadcastto")); + auto trans_node = new Node( + Operator::Get(op_type), + op_type, + cinn::common::UniqName(source->id() + "_broadcastto")); trans_node->attrs.attr_store["out_shape"] = new_shapes; std::vector broadcast_axes = {1}; trans_node->attrs.attr_store["broadcast_axes"] = broadcast_axes; @@ -551,8 +552,8 @@ void AlterLayoutPass(Graph* graph) { i, new_src_layout, new_input_layouts[i], - common::UniqName(new_input_data->id() + - "_layout_tranform")); + cinn::common::UniqName(new_input_data->id() + + "_layout_tranform")); UpdateInferInfos(new_trans_node, {shape_dict[new_input_data->id()]}, {input_types[i]}, @@ -585,7 +586,8 @@ void AlterLayoutPass(Graph* graph) { i, src_layout, new_input_layouts[i], - common::UniqName(source->id() + "_layout_tranform")); + cinn::common::UniqName(source->id() + + "_layout_tranform")); UpdateInferInfos(trans_node, {input_shapes[i]}, {input_types[i]}, @@ -618,7 +620,8 @@ void AlterLayoutPass(Graph* graph) { i, src_layout, new_input_layouts[i], - common::UniqName(source->id() + "_layout_tranform")); + cinn::common::UniqName(source->id() + + "_layout_tranform")); UpdateInferInfos(trans_node, {input_shapes[i]}, {input_types[i]}, @@ -705,7 +708,8 @@ void AlterLayoutPass(Graph* graph) { 0, src_layout, dst_layout, - common::UniqName(node->op()->name + "_final_layout_tranform")); + cinn::common::UniqName(node->op()->name + + "_final_layout_tranform")); shape_dict[temp_out->id()] = shape; type_dict[temp_out->id()] = type; layout_dict[temp_out->id()] = src_layout; diff --git a/paddle/cinn/hlir/pass/alterlayout_test.cc b/paddle/cinn/hlir/pass/alterlayout_test.cc index 293c0c07ebdde6..0936513b5e7584 100644 --- a/paddle/cinn/hlir/pass/alterlayout_test.cc +++ b/paddle/cinn/hlir/pass/alterlayout_test.cc @@ -65,7 +65,7 @@ TEST(conv, conv) { auto c = program.conv2d(A, B, attrs); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); program.SetInputs({A, B}); program.Validate(); LOG(INFO) << "Program:\n" << program; @@ -112,7 +112,7 @@ TEST(conv_relu_conv, conv_relu_conv) { auto d = program.relu(c); auto e = program.conv2d(d, D, attrs); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); program.SetInputs({A, B, D}); program.Validate(); LOG(INFO) << "Program:\n" << program; @@ -162,7 +162,7 @@ TEST(conv_add_conv, conv_add_conv) { auto d = program.elementwise_add(c, C, 1); auto e = program.conv2d(d, D, attrs); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); program.SetInputs({A, B, D}); program.Validate(); LOG(INFO) << "Program:\n" << program; @@ -219,7 +219,7 @@ TEST(conv_bn_conv, conv_bn_conv) { auto d = program.batchnorm(c, Scale, Bias, Mean, Variance, attrs1); auto e = program.conv2d(d, D, attrs); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); program.SetInputs({A, B, D}); program.Validate(); LOG(INFO) << "Program:\n" << program; @@ -276,7 +276,7 @@ TEST(conv_pool2d_conv, conv_pool2d_conv) { auto d = program.pool2d(c, attrs2); auto e = program.conv2d(d, D, attrs); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); program.SetInputs({A, B, D}); program.Validate(); LOG(INFO) << "Program:\n" << program; @@ -328,7 +328,7 @@ TEST(conv_softmax_conv, conv_softmax_conv) { auto d = program.softmax(c, attrs1); auto e = program.conv2d(d, D, attrs); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); program.SetInputs({A, B, D}); program.Validate(); LOG(INFO) << "Program:\n" << program; @@ -377,7 +377,7 @@ TEST(conv_sigmoid_conv, conv_sigmoid_conv) { auto d = program.sigmoid(c); auto e = program.conv2d(d, D, attrs); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); program.SetInputs({A, B, D}); program.Validate(); LOG(INFO) << "Program:\n" << program; @@ -430,7 +430,7 @@ TEST(conv_mul_conv, conv_mul_conv) { auto d = program.mul(c, C, 1, 1); auto e = program.softmax(d, attrs1); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); program.SetInputs({A, B, D}); program.Validate(); LOG(INFO) << "Program:\n" << program; diff --git a/paddle/cinn/hlir/pass/check_fusion_accuracy_pass.cc b/paddle/cinn/hlir/pass/check_fusion_accuracy_pass.cc index 52a5d128860d21..c0b4f6592bc27d 100644 --- a/paddle/cinn/hlir/pass/check_fusion_accuracy_pass.cc +++ b/paddle/cinn/hlir/pass/check_fusion_accuracy_pass.cc @@ -37,15 +37,15 @@ using framework::NodePtr; using framework::Operator; using framework::OpPatternKind; +using cinn::common::GraphEdge; +using cinn::common::GraphNode; using cinn::hlir::framework::GenerateAccCheckNodeId; -using common::GraphEdge; -using common::GraphNode; using GroupPtr = std::shared_ptr; using GroupList = std::vector; using ShapeDict = absl::flat_hash_map; -using DtypeDict = absl::flat_hash_map; +using DtypeDict = absl::flat_hash_map; namespace utils { class AssertMsg { @@ -325,7 +325,7 @@ std::pair CheckFusionAccuracyPass::CreateIsCloseNode( auto check_out_shape = shape_dict_.at(node_id); shape_dict_.emplace(output_data->id(), std::move(check_out_shape)); - dtype_dict_.emplace(output_data->id(), common::Bool()); + dtype_dict_.emplace(output_data->id(), cinn::common::Bool()); VLOG(4) << "Create node " << node_id << "'s isclose node success, whose id is " << is_close_node_id @@ -356,7 +356,7 @@ std::pair CheckFusionAccuracyPass::CreateAllNode( auto output_data = CreateOutputNode(all_node); shape_dict_.emplace(output_data->id(), framework::shape_t{1}); - dtype_dict_.emplace(output_data->id(), common::Bool()); + dtype_dict_.emplace(output_data->id(), cinn::common::Bool()); VLOG(4) << "Create node " << node_id << "'s all node success, whose id is " << all_node_id << ", whose output is " << DebugNodeData(output_data); @@ -387,7 +387,7 @@ std::pair CheckFusionAccuracyPass::CreateAssertNode( auto output_data = CreateOutputNode(assert_node); shape_dict_.emplace(output_data->id(), framework::shape_t{1}); - dtype_dict_.emplace(output_data->id(), common::Bool()); + dtype_dict_.emplace(output_data->id(), cinn::common::Bool()); VLOG(4) << "Create node " << node_id << "'s assert node success, whose id is " << assert_node_id << ", whose output is " diff --git a/paddle/cinn/hlir/pass/check_fusion_accuracy_pass_test.cc b/paddle/cinn/hlir/pass/check_fusion_accuracy_pass_test.cc index d523fbb6df9f63..10f5c83e6600d9 100644 --- a/paddle/cinn/hlir/pass/check_fusion_accuracy_pass_test.cc +++ b/paddle/cinn/hlir/pass/check_fusion_accuracy_pass_test.cc @@ -80,7 +80,7 @@ TEST(CheckFusionAccuracyPass, ElementWise_Fusion) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); auto graph = std::make_shared(program, target); hlir::framework::ApplyPasses(graph.get(), @@ -117,7 +117,7 @@ TEST(CheckFusionAccuracyPass, ElementWise_Fusion_1) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); auto graph = std::make_shared(program, target); @@ -158,7 +158,7 @@ TEST(CheckFusionAccuracyPass, ElementWise_Fusion_2) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); auto graph = std::make_shared(program, target); @@ -199,7 +199,7 @@ TEST(CheckFusionAccuracyPass, ElementWise_Fusion_3) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); auto graph = std::make_shared(program, target); @@ -240,7 +240,7 @@ TEST(CheckFusionAccuracyPass, ElementWise_Fusion_4) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); auto graph = std::make_shared(program, target); @@ -274,7 +274,7 @@ TEST(CheckFusionAccuracyPass, ElementWise_Fusion_5) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); auto graph = std::make_shared(program, target); @@ -311,7 +311,7 @@ TEST(CheckFusionAccuracyPass, Broadcast_Test_0) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); auto graph = std::make_shared(program, target); @@ -348,7 +348,7 @@ TEST(CheckFusionAccuracyPass, Broadcast_Test_2) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); auto graph = std::make_shared(program, target); @@ -387,7 +387,7 @@ TEST(CheckFusionAccuracyPass, Broadcast_Test_4) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); auto graph = std::make_shared(program, target); @@ -426,7 +426,7 @@ TEST(CheckFusionAccuracyPass, Broadcast_Test_5) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); auto graph = std::make_shared(program, target); @@ -462,7 +462,7 @@ TEST(CheckFusionAccuracyPass, Reduce_Test_0) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); auto graph = std::make_shared(program, target); @@ -497,7 +497,7 @@ TEST(CheckFusionAccuracyPass, Reduce_Test_1) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); auto graph = std::make_shared(program, target); @@ -535,7 +535,7 @@ TEST(CheckFusionAccuracyPass, Reduce_Test_2) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); auto graph = std::make_shared(program, target); @@ -573,7 +573,7 @@ TEST(CheckFusionAccuracyPass, Reduce_Test_3) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); auto graph = std::make_shared(program, target); @@ -612,7 +612,7 @@ TEST(CheckFusionAccuracyPass, Reduce_Test_4) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); auto graph = std::make_shared(program, target); @@ -648,7 +648,7 @@ TEST(CheckFusionAccuracyPass, Reduce_Test_5) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); auto graph = std::make_shared(program, target); diff --git a/paddle/cinn/hlir/pass/common_subexpression_elimination.cc b/paddle/cinn/hlir/pass/common_subexpression_elimination.cc index e595783c7b11b8..d50697583db08c 100644 --- a/paddle/cinn/hlir/pass/common_subexpression_elimination.cc +++ b/paddle/cinn/hlir/pass/common_subexpression_elimination.cc @@ -31,8 +31,8 @@ using framework::Graph; using framework::Node; using framework::NodeData; -using common::GraphEdge; -using common::GraphNode; +using cinn::common::GraphEdge; +using cinn::common::GraphNode; using InputToNodeMap = std::unordered_map>; @@ -99,7 +99,7 @@ bool IsSameSubexpression(Node* op1, bool op1_equal_op2 = std::any_of( op2_in_edges.begin(), op2_in_edges.end(), - [&](common::Shared& edge) { + [&](cinn::common::Shared& edge) { auto* op2_source_node = edge->source()->safe_as(); CHECK(op2_source_node); if (op1_source_node->id() == op2_source_node->id()) { diff --git a/paddle/cinn/hlir/pass/common_subexpression_elimination_test.cc b/paddle/cinn/hlir/pass/common_subexpression_elimination_test.cc index 3e7417a9c29aa2..4f326ee58f1c96 100644 --- a/paddle/cinn/hlir/pass/common_subexpression_elimination_test.cc +++ b/paddle/cinn/hlir/pass/common_subexpression_elimination_test.cc @@ -59,7 +59,7 @@ TEST(common_subexpression_elimination, common_subexpression_elimination_case1) { auto concat = program.concat({t_1, t_2, t_3}); auto max = program.reduce_max(concat, {0}, true); - Target target = common::DefaultTarget(); + Target target = cinn::common::DefaultTarget(); program.SetInputs({A, B}); program.Validate(); LOG(INFO) << "Program:\n" << program; @@ -104,7 +104,7 @@ TEST(common_subexpression_elimination, common_subexpression_elimination_case2) { auto concat_2 = program.concat({reshape_1, reshape_2}); auto concat_3 = program.concat({reshape_1, reshape_2}, 1); - Target target = common::DefaultTarget(); + Target target = cinn::common::DefaultTarget(); program.SetInputs({A, B}); program.Validate(); LOG(INFO) << "Program:\n" << program; @@ -168,7 +168,7 @@ TEST(common_subexpression_elimination, common_subexpression_elimination_case3) { fetch_list.insert(out1->id); fetch_list.insert(out2->id); - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); auto graph = std::make_shared(program, fetch_list, target); LOG(INFO) << "graph:\n" << graph->DebugGroupedGraph(fetch_list); diff --git a/paddle/cinn/hlir/pass/const_propagate.cc b/paddle/cinn/hlir/pass/const_propagate.cc index 3db1c174222943..0654cc85520eb6 100644 --- a/paddle/cinn/hlir/pass/const_propagate.cc +++ b/paddle/cinn/hlir/pass/const_propagate.cc @@ -24,7 +24,7 @@ namespace cinn { namespace hlir { namespace pass { -using common::Type; +using cinn::common::Type; using framework::Graph; using framework::Node; using framework::NodeData; diff --git a/paddle/cinn/hlir/pass/const_propagate_test.cc b/paddle/cinn/hlir/pass/const_propagate_test.cc index c1600c81aa5f9d..bf9c2f471e5402 100644 --- a/paddle/cinn/hlir/pass/const_propagate_test.cc +++ b/paddle/cinn/hlir/pass/const_propagate_test.cc @@ -46,7 +46,7 @@ TEST(const_conv, const_conv) { attrs["data_format"] = src_layout; auto c = program.conv2d(A, B, attrs); - Target target = common::DefaultTarget(); + Target target = cinn::common::DefaultTarget(); program.SetInputs({A, B}); program.Validate(); LOG(INFO) << "Program:\n" << program; @@ -91,7 +91,7 @@ TEST(const_bn, const_bn) { auto a = program.fused_batchnorm_inference(A, Scale, Bias, Mean, Variance, attrs); - Target target = common::DefaultTarget(); + Target target = cinn::common::DefaultTarget(); program.SetInputs({A, Scale, Bias, Mean, Variance}); program.Validate(); LOG(INFO) << "Program:\n" << program; diff --git a/paddle/cinn/hlir/pass/constant_folding_pass.cc b/paddle/cinn/hlir/pass/constant_folding_pass.cc index 50a76f54cb312e..0f4493ccf3e776 100644 --- a/paddle/cinn/hlir/pass/constant_folding_pass.cc +++ b/paddle/cinn/hlir/pass/constant_folding_pass.cc @@ -25,8 +25,8 @@ using framework::NodeData; using framework::OpPatternKind; using framework::shape_t; -using common::GraphEdge; -using common::GraphNode; +using cinn::common::GraphEdge; +using cinn::common::GraphNode; using AlterFunction = std::function; diff --git a/paddle/cinn/hlir/pass/constant_folding_pass_test.cc b/paddle/cinn/hlir/pass/constant_folding_pass_test.cc index 5e98a0e2bbcfe9..0cf95ea0a12e55 100644 --- a/paddle/cinn/hlir/pass/constant_folding_pass_test.cc +++ b/paddle/cinn/hlir/pass/constant_folding_pass_test.cc @@ -40,7 +40,7 @@ std::unordered_map> RunModelTest( const std::vector&& passes, const std::unordered_map>& input_data, const std::unordered_set& fetch_ids) { - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); auto graph = std::make_shared(program, fetch_ids, target); hlir::framework::ApplyPasses(graph.get(), passes); diff --git a/paddle/cinn/hlir/pass/constant_folding_pass_util.cc b/paddle/cinn/hlir/pass/constant_folding_pass_util.cc index 90aab2144065fb..748948f2206fcc 100644 --- a/paddle/cinn/hlir/pass/constant_folding_pass_util.cc +++ b/paddle/cinn/hlir/pass/constant_folding_pass_util.cc @@ -65,8 +65,8 @@ class ConstantFoldingHelper { private: Node* CreateNewNode(const std::string& op_name, const AttributeMap& attrs_map) { - auto* node = - new Node(Operator::Get(op_name), op_name, common::UniqName(op_name)); + auto* node = new Node( + Operator::Get(op_name), op_name, cinn::common::UniqName(op_name)); node->attrs.attr_store = attrs_map; graph_->RegisterNode(node->id(), node); return node; diff --git a/paddle/cinn/hlir/pass/custom_call_pass.cc b/paddle/cinn/hlir/pass/custom_call_pass.cc index 287bda3ba783c3..231d2cc7d44200 100644 --- a/paddle/cinn/hlir/pass/custom_call_pass.cc +++ b/paddle/cinn/hlir/pass/custom_call_pass.cc @@ -38,10 +38,10 @@ class GraphAlterHelper { deny_ops_ = {splited_names.begin(), splited_names.end()}; } } - void TransToCustomCall(const common::Target& target) { + void TransToCustomCall(const cinn::common::Target& target) { // collect candidate nodes auto mark_nodes = graph_->CollectNodes( - [this, &target](const common::GraphNode* graph_node) -> bool { + [this, &target](const cinn::common::GraphNode* graph_node) -> bool { if (graph_node->safe_as()) { auto node = graph_node->safe_as(); auto&& op_name = node->op()->name; @@ -63,7 +63,7 @@ class GraphAlterHelper { // codegen-registered is not consistent with cudnn if ((node->op()->name == "conv2d" || node->op()->name == "depthwise_conv2d") && - target == common::DefaultNVGPUTarget()) { + target == cinn::common::DefaultNVGPUTarget()) { auto out_links = node->outlinks_in_order(); for (int idx = 1; idx < out_links.size(); ++idx) { auto link = out_links[idx]; diff --git a/paddle/cinn/hlir/pass/dce_pass.cc b/paddle/cinn/hlir/pass/dce_pass.cc index fd439c1e97cfc7..b17f8ee4de5d9f 100644 --- a/paddle/cinn/hlir/pass/dce_pass.cc +++ b/paddle/cinn/hlir/pass/dce_pass.cc @@ -27,8 +27,8 @@ using framework::NodeData; using framework::OpPatternKind; using framework::shape_t; -using common::GraphEdge; -using common::GraphNode; +using cinn::common::GraphEdge; +using cinn::common::GraphNode; using GroupPtr = std::shared_ptr; using GroupList = std::vector; diff --git a/paddle/cinn/hlir/pass/dce_pass_test.cc b/paddle/cinn/hlir/pass/dce_pass_test.cc index 7f5c3355b00673..bb9c5d7654851f 100644 --- a/paddle/cinn/hlir/pass/dce_pass_test.cc +++ b/paddle/cinn/hlir/pass/dce_pass_test.cc @@ -30,7 +30,7 @@ TEST(DCE, Test_0) { auto fetch_ids = {D->id}; auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); auto graph = std::make_shared(program, fetch_ids, target); @@ -54,7 +54,7 @@ TEST(DCE, Test_1) { auto fetch_ids = {F->id}; auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); auto graph = std::make_shared(program, fetch_ids, target); diff --git a/paddle/cinn/hlir/pass/dense_merge_pass.cc b/paddle/cinn/hlir/pass/dense_merge_pass.cc index c8433f3a85fc7f..82341cb8469bf3 100644 --- a/paddle/cinn/hlir/pass/dense_merge_pass.cc +++ b/paddle/cinn/hlir/pass/dense_merge_pass.cc @@ -20,7 +20,7 @@ namespace cinn { namespace hlir { namespace pass { -using common::GraphNode; +using cinn::common::GraphNode; using framework::Graph; using framework::Node; using framework::NodeAttr; @@ -118,7 +118,7 @@ class DenseMergePassHelper : public FusionHelperBase { // create custom call node Node* node_tmp = new Node(Operator::Get("custom_call"), "custom_call", - common::UniqName("custom_call")); + cinn::common::UniqName("custom_call")); graph_->RegisterNode(node_tmp->id(), node_tmp); node_tmp->attrs.attr_store = dense_op.second[0]->attrs.attr_store; node_tmp->attrs.attr_store["side"] = side; diff --git a/paddle/cinn/hlir/pass/dense_merge_pass_test.cc b/paddle/cinn/hlir/pass/dense_merge_pass_test.cc index 23ce990ff6fd8b..b1eab6fadfe342 100644 --- a/paddle/cinn/hlir/pass/dense_merge_pass_test.cc +++ b/paddle/cinn/hlir/pass/dense_merge_pass_test.cc @@ -34,7 +34,7 @@ void RunModelTest(Program& program, // NOLINT &inputs_data.back(), inputs_data.back().size(), 0.0f, 1.0f, 1e-3); } - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); std::unordered_map, std::vector>> outputs; diff --git a/paddle/cinn/hlir/pass/dot_merger.cc b/paddle/cinn/hlir/pass/dot_merger.cc index 8638200180f66e..941cf6b29b66c9 100644 --- a/paddle/cinn/hlir/pass/dot_merger.cc +++ b/paddle/cinn/hlir/pass/dot_merger.cc @@ -22,7 +22,7 @@ namespace hlir { namespace pass { namespace { -using common::GraphNode; +using cinn::common::GraphNode; using framework::Node; using framework::NodeData; using framework::Operator; @@ -33,7 +33,7 @@ using infershape_t = std::function( const std::vector&, const framework::AttrMapType&)>; using inferdtype_t = std::function( const std::vector&, const framework::AttrMapType&)>; -using dtype_dict_t = absl::flat_hash_map; +using dtype_dict_t = absl::flat_hash_map; using shape_dict_t = absl::flat_hash_map; bool accessible(GraphNode* start, GraphNode* end) { @@ -130,7 +130,7 @@ class DotBuilder { const shape_dict_t& shape_dict() const { return shape_dict_; } // Currently the constructor of `NodeData` needs to pass in `Shared`. - NodeData* Var(common::Shared& producer) { // NOLINT + NodeData* Var(cinn::common::Shared& producer) { // NOLINT auto* res = new NodeData(producer, 0, 0, node_name("var"), false); graph_->RegisterNode(producer->id(), res); graph_->RegisterNode(res->id(), producer.get()); @@ -141,7 +141,7 @@ class DotBuilder { NodeData* Concat(int axis, std::vector inputs) { const std::string type{"concat"}; - auto instr = common::Shared( + auto instr = cinn::common::Shared( new Node(framework::Operator::Get(type), type, node_name(type))); instr->attrs.attr_store["axis"] = axis; for (auto* in : inputs) { @@ -158,7 +158,7 @@ class DotBuilder { NodeData* lhs, NodeData* rhs) { const std::string type{dot_type_}; - auto instr = common::Shared( + auto instr = cinn::common::Shared( new Node(framework::Operator::Get(type), type, node_name(type))); matmul_ = instr.get(); instr->attrs.attr_store["trans_a"] = trans_a; @@ -177,7 +177,7 @@ class DotBuilder { NodeData* input, NodeData* output) { const std::string type{"slice"}; - auto instr = common::Shared( + auto instr = cinn::common::Shared( new Node(framework::Operator::Get(type), type, node_name(type))); instr->attrs.attr_store["axes"] = std::move(axes); instr->attrs.attr_store["starts"] = std::move(starts); diff --git a/paddle/cinn/hlir/pass/dot_merger_test.cc b/paddle/cinn/hlir/pass/dot_merger_test.cc index 2a9bdf9d4f1470..450cd4b5c3f53c 100644 --- a/paddle/cinn/hlir/pass/dot_merger_test.cc +++ b/paddle/cinn/hlir/pass/dot_merger_test.cc @@ -34,7 +34,7 @@ void RunModelTest(Program& program, // NOLINT &inputs_data.back(), inputs_data.back().size(), 0.0f, 1.0f, 1e-3); } - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); std::unordered_map, std::vector>> outputs; diff --git a/paddle/cinn/hlir/pass/fusion_helper_base.h b/paddle/cinn/hlir/pass/fusion_helper_base.h index d3c9e5c075529a..3437b334fa5df7 100644 --- a/paddle/cinn/hlir/pass/fusion_helper_base.h +++ b/paddle/cinn/hlir/pass/fusion_helper_base.h @@ -176,7 +176,8 @@ class FusionHelperBase { for (int idx = axes.back() + 1; idx < inshape.size(); ++idx) { lane = inshape[idx]; } - int max_num_threads = common::DefaultNVGPUTarget().max_num_threads(); + int max_num_threads = + cinn::common::DefaultNVGPUTarget().max_num_threads(); if (lane > max_num_threads / 2) { return 0; } @@ -212,7 +213,7 @@ class FusionHelperBase { return 0; } // target - const common::Target& target_; + const cinn::common::Target& target_; // output node set std::unordered_set output_nodes_set_; // shape dict diff --git a/paddle/cinn/hlir/pass/fusion_merge_pass.cc b/paddle/cinn/hlir/pass/fusion_merge_pass.cc index 8d3233e23c7069..86c0e5360fc0d6 100644 --- a/paddle/cinn/hlir/pass/fusion_merge_pass.cc +++ b/paddle/cinn/hlir/pass/fusion_merge_pass.cc @@ -26,8 +26,8 @@ using framework::NodeData; using framework::OpPatternKind; using framework::shape_t; -using common::GraphEdge; -using common::GraphNode; +using cinn::common::GraphEdge; +using cinn::common::GraphNode; using Comparator = Graph::Group::SharedGroupComparator; using Hasher = Graph::Group::SharedGroupHasher; diff --git a/paddle/cinn/hlir/pass/fusion_merge_pass_test.cc b/paddle/cinn/hlir/pass/fusion_merge_pass_test.cc index f4582a5ce65be7..f6f9ecee97c430 100755 --- a/paddle/cinn/hlir/pass/fusion_merge_pass_test.cc +++ b/paddle/cinn/hlir/pass/fusion_merge_pass_test.cc @@ -34,7 +34,7 @@ TEST(FusionMergePass, ElementWise_Fusion_0) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); RunDecomposer(&program, target); auto graph = std::make_shared(program, target); @@ -60,7 +60,7 @@ TEST(FusionMergePass, ElementWise_Fusion_1) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); RunDecomposer(&program, target); auto graph = std::make_shared(program, target); @@ -89,7 +89,7 @@ TEST(FusionMergePass, ElementWise_Fusion_2) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); RunDecomposer(&program, target); auto graph = std::make_shared(program, target); @@ -118,7 +118,7 @@ TEST(FusionMergePass, ElementWise_Fusion_3) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); RunDecomposer(&program, target); auto graph = std::make_shared(program, target); @@ -147,7 +147,7 @@ TEST(FusionMergePass, ElementWise_Fusion_4) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); RunDecomposer(&program, target); auto graph = std::make_shared(program, target); @@ -169,7 +169,7 @@ TEST(FusionMergePass, ElementWise_Fusion_5) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); RunDecomposer(&program, target); auto graph = std::make_shared(program, target); @@ -194,7 +194,7 @@ TEST(FusionMergePass, Broadcast_Test_0) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); RunDecomposer(&program, target); auto graph = std::make_shared(program, target); @@ -219,7 +219,7 @@ TEST(FusionMergePass, Broadcast_Test_1) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); RunDecomposer(&program, target); auto graph = std::make_shared(program, target); @@ -244,7 +244,7 @@ TEST(FusionMergePass, Broadcast_Test_2) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); RunDecomposer(&program, target); auto graph = std::make_shared(program, target); @@ -269,7 +269,7 @@ TEST(FusionMergePass, Broadcast_Test_3) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); RunDecomposer(&program, target); auto graph = std::make_shared(program, target); @@ -296,7 +296,7 @@ TEST(FusionMergePass, Broadcast_Test_4) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); RunDecomposer(&program, target); auto graph = std::make_shared(program, target); @@ -323,7 +323,7 @@ TEST(FusionMergePass, Broadcast_Test_5) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); RunDecomposer(&program, target); auto graph = std::make_shared(program, target); @@ -347,7 +347,7 @@ TEST(FusionMergePass, Reduce_Test_0) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); RunDecomposer(&program, target); auto graph = std::make_shared(program, target); @@ -370,7 +370,7 @@ TEST(FusionMergePass, Reduce_Test_1) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); RunDecomposer(&program, target); auto graph = std::make_shared(program, target); @@ -396,7 +396,7 @@ TEST(FusionMergePass, Reduce_Test_2) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); RunDecomposer(&program, target); auto graph = std::make_shared(program, target); @@ -422,7 +422,7 @@ TEST(FusionMergePass, Reduce_Test_3) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); RunDecomposer(&program, target); auto graph = std::make_shared(program, target); @@ -449,7 +449,7 @@ TEST(FusionMergePass, Reduce_Test_4) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); RunDecomposer(&program, target); auto graph = std::make_shared(program, target); @@ -473,7 +473,7 @@ TEST(FusionMergePass, Reduce_Test_5) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); RunDecomposer(&program, target); auto graph = std::make_shared(program, target); diff --git a/paddle/cinn/hlir/pass/fusion_merge_pass_util.h b/paddle/cinn/hlir/pass/fusion_merge_pass_util.h index 6b6f786cab4a09..bc14748f5f6484 100644 --- a/paddle/cinn/hlir/pass/fusion_merge_pass_util.h +++ b/paddle/cinn/hlir/pass/fusion_merge_pass_util.h @@ -138,7 +138,7 @@ CONDITION_FUNC(honrizontal_elementwise_fuse_reduce) { } CONDITION_FUNC(elementwise_fuse_reduce) { - if (helper->target_ == common::DefaultHostTarget()) { + if (helper->target_ == cinn::common::DefaultHostTarget()) { return true; } // if same shape with horizontal relation @@ -427,7 +427,7 @@ CONDITION_FUNC(reduce_fuse_broadcast) { reduce_size *= reducer_input_shape[idx - 1]; } // Check if the reduce size exceeds the hardware limit - if (helper->target_ == common::DefaultNVGPUTarget() && + if (helper->target_ == cinn::common::DefaultNVGPUTarget() && reduce_size > helper->target_.max_num_threads()) { return false; } diff --git a/paddle/cinn/hlir/pass/general_fusion_merge_pass.cc b/paddle/cinn/hlir/pass/general_fusion_merge_pass.cc index 6ea908ed31f318..cf1b91fcc13573 100644 --- a/paddle/cinn/hlir/pass/general_fusion_merge_pass.cc +++ b/paddle/cinn/hlir/pass/general_fusion_merge_pass.cc @@ -38,8 +38,8 @@ using framework::NodeData; using framework::OpPatternKind; using framework::shape_t; -using common::GraphEdge; -using common::GraphNode; +using cinn::common::GraphEdge; +using cinn::common::GraphNode; using GroupPtr = std::shared_ptr; using GroupList = std::vector; diff --git a/paddle/cinn/hlir/pass/general_fusion_merge_pass/graph_group_fuse_helper.h b/paddle/cinn/hlir/pass/general_fusion_merge_pass/graph_group_fuse_helper.h index cd0ac4b0138422..3859ad88ff0169 100644 --- a/paddle/cinn/hlir/pass/general_fusion_merge_pass/graph_group_fuse_helper.h +++ b/paddle/cinn/hlir/pass/general_fusion_merge_pass/graph_group_fuse_helper.h @@ -97,7 +97,7 @@ class GraphGroupFuseHelper final : public FuseHelper { Visit(node_producer); } }; - common::IsReachablePredicator is_reachable( + cinn::common::IsReachablePredicator is_reachable( MinDepth4Node, MaxDepth4Node, VisitNextNodes); return is_reachable(consumer, producer, [](OpGroupPtr) {}); } @@ -120,7 +120,7 @@ class GraphGroupFuseHelper final : public FuseHelper { Visit(node_producer); } }; - common::IsReachablePredicator is_reachable( + cinn::common::IsReachablePredicator is_reachable( MinDepth4Node, MaxDepth4Node, VisitNextNodes); return is_reachable(consumer, producer, [](OpGroupPtr) {}); } diff --git a/paddle/cinn/hlir/pass/general_fusion_merge_pass_utils.h b/paddle/cinn/hlir/pass/general_fusion_merge_pass_utils.h index 168edb3a97a4a0..2195d4a4f947bd 100644 --- a/paddle/cinn/hlir/pass/general_fusion_merge_pass_utils.h +++ b/paddle/cinn/hlir/pass/general_fusion_merge_pass_utils.h @@ -143,7 +143,7 @@ static int GetSharedSize(const api::OpNode& op_node) { for (int idx = axes.back() + 1; idx < inshape.size(); ++idx) { lane = inshape[idx]; } - int max_num_threads = common::DefaultNVGPUTarget().max_num_threads(); + int max_num_threads = cinn::common::DefaultNVGPUTarget().max_num_threads(); if (lane > max_num_threads / 2) { return 0; } diff --git a/paddle/cinn/hlir/pass/infershape.cc b/paddle/cinn/hlir/pass/infershape.cc index b082c98a0fcf69..041a63b42b57c0 100644 --- a/paddle/cinn/hlir/pass/infershape.cc +++ b/paddle/cinn/hlir/pass/infershape.cc @@ -24,7 +24,7 @@ namespace cinn { namespace hlir { namespace pass { -using common::Type; +using cinn::common::Type; using framework::Graph; using framework::Node; using framework::NodeData; @@ -34,7 +34,7 @@ using infershape_t = std::function( const std::vector&, const framework::AttrMapType&)>; using inferdtype_t = std::function( const std::vector&, const framework::AttrMapType&)>; -using dtype_dict_t = absl::flat_hash_map; +using dtype_dict_t = absl::flat_hash_map; using shape_dict_t = absl::flat_hash_map; void InferShape(Node* node, diff --git a/paddle/cinn/hlir/pass/infershape.h b/paddle/cinn/hlir/pass/infershape.h index db9b8e21e70208..12faf7b8de3aea 100644 --- a/paddle/cinn/hlir/pass/infershape.h +++ b/paddle/cinn/hlir/pass/infershape.h @@ -24,7 +24,7 @@ namespace pass { void InferShape( framework::Node* node, - absl::flat_hash_map& dtype_dict, // NOLINT + absl::flat_hash_map& dtype_dict, // NOLINT absl::flat_hash_map& shape_dict); // NOLINT diff --git a/paddle/cinn/hlir/pass/op_fusion_pass.cc b/paddle/cinn/hlir/pass/op_fusion_pass.cc index 84a95dfe277ddd..242b72f77e77f6 100644 --- a/paddle/cinn/hlir/pass/op_fusion_pass.cc +++ b/paddle/cinn/hlir/pass/op_fusion_pass.cc @@ -25,8 +25,8 @@ using framework::NodeData; using framework::OpPatternKind; using framework::shape_t; -using common::GraphEdge; -using common::GraphNode; +using cinn::common::GraphEdge; +using cinn::common::GraphNode; using GroupPtr = std::shared_ptr; using GroupList = std::vector; diff --git a/paddle/cinn/hlir/pass/op_fusion_pass_test.cc b/paddle/cinn/hlir/pass/op_fusion_pass_test.cc index f433cac8ca43dd..885afd929ba87e 100755 --- a/paddle/cinn/hlir/pass/op_fusion_pass_test.cc +++ b/paddle/cinn/hlir/pass/op_fusion_pass_test.cc @@ -34,7 +34,7 @@ TEST(OpFusionPass, ElementWise_Fusion_0) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); RunDecomposer(&program, target); auto graph = std::make_shared(program, target); @@ -58,7 +58,7 @@ TEST(OpFusionPass, ElementWise_Fusion_1) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); RunDecomposer(&program, target); auto graph = std::make_shared(program, target); @@ -81,7 +81,7 @@ TEST(OpFusionPass, Brodcast_Test_0) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); RunDecomposer(&program, target); auto graph = std::make_shared(program, target); @@ -106,7 +106,7 @@ TEST(OpFusionPass, Brodcast_Test_1) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); RunDecomposer(&program, target); auto graph = std::make_shared(program, target); @@ -126,7 +126,7 @@ TEST(OpFusionPass, Brodcast_Test_2) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); RunDecomposer(&program, target); auto graph = std::make_shared(program, target); @@ -150,7 +150,7 @@ TEST(OpFusionPass, Reduce_Test_0) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); RunDecomposer(&program, target); auto graph = std::make_shared(program, target); @@ -175,7 +175,7 @@ TEST(OpFusionPass, Reduce_Test_1) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); RunDecomposer(&program, target); auto graph = std::make_shared(program, target); @@ -200,7 +200,7 @@ TEST(OpFusionPass, Reduce_Test_2) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); RunDecomposer(&program, target); auto graph = std::make_shared(program, target); @@ -224,7 +224,7 @@ TEST(OpFusionPass, Injective_Test_0) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); RunDecomposer(&program, target); auto graph = std::make_shared(program, target); @@ -242,7 +242,7 @@ TEST(OP_LOWERING, Injective_Test_1) { auto F = net_builder.Add(D, E); auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); RunDecomposer(&program, target); auto graph = std::make_shared(program, target); @@ -264,7 +264,7 @@ TEST(OpFusionPass, Test_Insert_BroadcastTo) { } auto program = net_builder.Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); auto graph = std::make_shared(program, target); hlir::framework::ApplyPass(graph.get(), "OpFusionPass"); diff --git a/paddle/cinn/hlir/pass/op_fusion_pass_util.h b/paddle/cinn/hlir/pass/op_fusion_pass_util.h index 796b6fcb0e10a8..a3a7365024de07 100644 --- a/paddle/cinn/hlir/pass/op_fusion_pass_util.h +++ b/paddle/cinn/hlir/pass/op_fusion_pass_util.h @@ -216,7 +216,7 @@ CONDITION_FUNC(horizontal_or_vertical_reduce_relation) { break; } - return helper->target_ == common::DefaultNVGPUTarget() + return helper->target_ == cinn::common::DefaultNVGPUTarget() ? (succesive_reduce_dimension <= helper->target_.max_num_threads() ? true : false) @@ -263,7 +263,7 @@ CONDITION_FUNC(reduce_fuse_broadcast) { return false; } - if (helper->target_ != common::DefaultNVGPUTarget()) { + if (helper->target_ != cinn::common::DefaultNVGPUTarget()) { return true; } diff --git a/paddle/cinn/hlir/pass/opfusion.cc b/paddle/cinn/hlir/pass/opfusion.cc index f95eed9873d959..537b9abb458817 100644 --- a/paddle/cinn/hlir/pass/opfusion.cc +++ b/paddle/cinn/hlir/pass/opfusion.cc @@ -26,8 +26,8 @@ namespace cinn { namespace hlir { namespace pass { -using common::GraphNode; -using common::Type; +using cinn::common::GraphNode; +using cinn::common::Type; using framework::Graph; using framework::Node; using framework::NodeData; @@ -203,8 +203,8 @@ class DomTree { struct GroupNode { GroupNode* parent{nullptr}; OpPatternKind pattern; - common::GraphNode* ref_node{nullptr}; - common::GraphNode* master_node{nullptr}; + cinn::common::GraphNode* ref_node{nullptr}; + cinn::common::GraphNode* master_node{nullptr}; int index{0}; int nodes_count{1}; int op_nodes_count{0}; @@ -518,7 +518,7 @@ class GraphPartition { } } } - void SplitGroups(const std::vector& graph_nodes) { + void SplitGroups(const std::vector& graph_nodes) { // split groups sorted by topo order CHECK_EQ(graph_nodes.size(), group_nodes_.size()); absl::flat_hash_map> group_maps; diff --git a/paddle/cinn/hlir/pass/opfusion_test.cc b/paddle/cinn/hlir/pass/opfusion_test.cc index 0dc87573cceb39..5df145453abd14 100644 --- a/paddle/cinn/hlir/pass/opfusion_test.cc +++ b/paddle/cinn/hlir/pass/opfusion_test.cc @@ -69,7 +69,7 @@ TEST(complex2, complex2) { auto e = program.relu(c); auto f = program.elementwise_add(d, e); - Target target = common::DefaultTarget(); + Target target = cinn::common::DefaultTarget(); program.SetInputs({A, B, C, D, E}); program.Validate(); LOG(INFO) << "Program:\n" << program; @@ -125,7 +125,7 @@ TEST(complex1, complex1) { auto e = program.relu(c); auto f = program.elementwise_add(d, e); - Target target = common::DefaultTarget(); + Target target = cinn::common::DefaultTarget(); program.SetInputs({A, B, C, D, E}); program.Validate(); LOG(INFO) << "Program:\n" << program; @@ -163,7 +163,7 @@ TEST(fuse_add_relu, fuse_add_relu) { auto c = program.elementwise_add(A, B, 1); auto d = program.relu(c); - Target target = common::DefaultTarget(); + Target target = cinn::common::DefaultTarget(); program.SetInputs({A, B}); program.Validate(); LOG(INFO) << "Program:\n" << program; @@ -202,7 +202,7 @@ TEST(fuse_add, fuse_add) { auto c = program.elementwise_add(A, B, 1); auto d = program.elementwise_add(c, C, 1); - Target target = common::DefaultTarget(); + Target target = cinn::common::DefaultTarget(); program.SetInputs({A, B, C}); program.Validate(); LOG(INFO) << "Program:\n" << program; @@ -261,7 +261,7 @@ TEST(conv_bn_conv, conv_bn_conv) { auto f = program.elementwise_mul(e, D); auto g = program.relu(f); - Target target = common::DefaultTarget(); + Target target = cinn::common::DefaultTarget(); program.SetInputs({A, B, C, D, E}); program.Validate(); LOG(INFO) << "Program:\n" << program; @@ -313,7 +313,7 @@ TEST(fuse_conv_add, fuse_conv_add) { auto c = program.conv2d(A, B, attrs); auto d = program.elementwise_add(c, C, 1); - Target target = common::DefaultTarget(); + Target target = cinn::common::DefaultTarget(); program.SetInputs({A, B, C}); program.Validate(); LOG(INFO) << "Program:\n" << program; @@ -372,7 +372,7 @@ TEST(conv_add_mul, conv_add_mul) { auto d = program.elementwise_add(c, Scale); auto e = program.elementwise_mul(d, Bias, 1); - Target target = common::DefaultTarget(); + Target target = cinn::common::DefaultTarget(); program.SetInputs({A, B, D}); program.Validate(); LOG(INFO) << "Program:\n" << program; @@ -421,7 +421,7 @@ TEST(fuse_conv_add1, fuse_conv_add1) { auto c = program.conv2d(A, B, attrs); auto d = program.elementwise_add(c, C); - Target target = common::DefaultTarget(); + Target target = cinn::common::DefaultTarget(); program.SetInputs({A, B, C}); program.Validate(); LOG(INFO) << "Program:\n" << program; @@ -462,7 +462,7 @@ TEST(transpose_reshape_concat, transpose_reshape_concat) { auto d = program.reshape(b, {4, 32}); auto e = program.concat({c, d}); - Target target = common::DefaultTarget(); + Target target = cinn::common::DefaultTarget(); program.SetInputs({A, B}); program.Validate(); LOG(INFO) << "Program:\n" << program; @@ -515,7 +515,7 @@ TEST(conv_bn, conv_bn) { auto d = program.fused_batchnorm_inference(c, Scale, Bias, Mean, Variance, attrs1); - Target target = common::DefaultTarget(); + Target target = cinn::common::DefaultTarget(); program.SetInputs({A, B, Scale, Bias, Mean, Variance}); program.Validate(); LOG(INFO) << "Program:\n" << program; diff --git a/paddle/cinn/hlir/pass/reduce_split_pass.cc b/paddle/cinn/hlir/pass/reduce_split_pass.cc index bfcfde59ba0426..1f8c500cc9be05 100644 --- a/paddle/cinn/hlir/pass/reduce_split_pass.cc +++ b/paddle/cinn/hlir/pass/reduce_split_pass.cc @@ -24,7 +24,7 @@ namespace hlir { namespace pass { namespace { -using common::GraphNode; +using cinn::common::GraphNode; using framework::Node; using framework::NodeData; using framework::Operator; @@ -73,7 +73,7 @@ class ReduceSplitPass { public: // Find the reduce op with nwhc format and large shape, split it into two ops static int Apply(framework::Graph* graph) { - int MAX_NUM_THREADS = common::DefaultNVGPUTarget().max_num_threads(); + int MAX_NUM_THREADS = cinn::common::DefaultNVGPUTarget().max_num_threads(); constexpr int MAX_ITER_PER_THREAD = 32; // empirical value int cnt = 0; @@ -159,7 +159,7 @@ class ReduceSplitPass { // 1. reshape_loop > split_loop // 2. reshape thread > max_threads. if (shape[0] <= reduce_numel0 && - shape[1] * shape[2] <= common::GetMaxThreads()) { + shape[1] * shape[2] <= cinn::common::GetMaxThreads()) { VLOG(3) << " Don't Do Reduce Split!"; continue; } @@ -173,7 +173,7 @@ class ReduceSplitPass { // create reshape node0 Node* reshape0 = new Node(Operator::Get("reshape"), "reshape", - common::UniqName("reshape_split")); + cinn::common::UniqName("reshape_split")); reshape0->attrs.attr_store["shape"] = std::vector{ reduce_numel0, reduce_numel1, in_shape[in_shape.size() - 1]}; graph->RegisterNode(reshape0->id(), reshape0); @@ -181,24 +181,24 @@ class ReduceSplitPass { in->UnLinkSingleTo(node); node->UnLinkSingleTo(out); auto reshape0_data = new NodeData( - Shared(reshape0), 0, 0, common::UniqName("var"), false); + Shared(reshape0), 0, 0, cinn::common::UniqName("var"), false); graph->RegisterNode(reshape0_data->id(), reshape0_data); reshape0->LinkTo(reshape0_data); shape_dict[reshape0_data->id()] = absl::get>(reshape0->attrs.attr_store.at("shape")); - dtype_dict[reshape0_data->id()] = - common::Str2Type(common::Type2Str(dtype_dict[in->id()])); + dtype_dict[reshape0_data->id()] = cinn::common::Str2Type( + cinn::common::Type2Str(dtype_dict[in->id()])); // create reduce node0 Node* reduce0 = new Node( - Operator::Get(name), name, common::UniqName(name + "_split")); + Operator::Get(name), name, cinn::common::UniqName(name + "_split")); reduce0->attrs.attr_store["dim"] = std::vector{0}; reduce0->attrs.attr_store["keep_dim"] = absl::get(n->attrs.attr_store.at("keep_dim")); graph->RegisterNode(reduce0->id(), reduce0); reshape0_data->LinkTo(reduce0); auto reduce0_data = new NodeData( - Shared(reduce0), 0, 0, common::UniqName("var"), false); + Shared(reduce0), 0, 0, cinn::common::UniqName("var"), false); graph->RegisterNode(reduce0_data->id(), reduce0_data); reduce0->LinkTo(reduce0_data); shape_dict[reduce0_data->id()] = @@ -207,12 +207,12 @@ class ReduceSplitPass { in_shape[in_shape.size() - 1]} : std::vector{reduce_numel1, in_shape[in_shape.size() - 1]}; - dtype_dict[reduce0_data->id()] = - common::Str2Type(common::Type2Str(dtype_dict[in->id()])); + dtype_dict[reduce0_data->id()] = cinn::common::Str2Type( + cinn::common::Type2Str(dtype_dict[in->id()])); // create reduce node1 Node* reduce1 = new Node( - Operator::Get(name), name, common::UniqName(name + "_split")); + Operator::Get(name), name, cinn::common::UniqName(name + "_split")); reduce1->attrs.attr_store["dim"] = keep_dim ? std::vector{0, 1} : std::vector{0}; reduce1->attrs.attr_store["keep_dim"] = @@ -220,24 +220,24 @@ class ReduceSplitPass { graph->RegisterNode(reduce1->id(), reduce1); reduce0_data->LinkTo(reduce1); auto reduce1_data = new NodeData( - Shared(reduce1), 0, 0, common::UniqName("var"), false); + Shared(reduce1), 0, 0, cinn::common::UniqName("var"), false); graph->RegisterNode(reduce1_data->id(), reduce1_data); reduce1->LinkTo(reduce1_data); shape_dict[reduce1_data->id()] = keep_dim ? std::vector{1, 1, in_shape[in_shape.size() - 1]} : std::vector{in_shape[in_shape.size() - 1]}; - dtype_dict[reduce1_data->id()] = - common::Str2Type(common::Type2Str(dtype_dict[in->id()])); + dtype_dict[reduce1_data->id()] = cinn::common::Str2Type( + cinn::common::Type2Str(dtype_dict[in->id()])); // create reshape node1 Node* reshape1 = new Node(Operator::Get("reshape"), "reshape", - common::UniqName("reshape_split")); + cinn::common::UniqName("reshape_split")); reshape1->attrs.attr_store["shape"] = out_shape; graph->RegisterNode(reshape1->id(), reshape1); reduce1_data->LinkTo(reshape1); reshape1->LinkTo(out); - out->source_node = common::Shared(reshape1); + out->source_node = cinn::common::Shared(reshape1); // drop old node graph->DropNode(node); diff --git a/paddle/cinn/hlir/pass/reduce_split_pass_test.cc b/paddle/cinn/hlir/pass/reduce_split_pass_test.cc index 4285c93dd75926..8319c6ae13ec4b 100644 --- a/paddle/cinn/hlir/pass/reduce_split_pass_test.cc +++ b/paddle/cinn/hlir/pass/reduce_split_pass_test.cc @@ -24,7 +24,7 @@ std::unordered_map> RunModelTest( const std::vector&& passes, const std::unordered_map>& input_data, const std::unordered_set& fetch_ids) { - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); auto graph = std::make_shared(program, fetch_ids, target); hlir::framework::ApplyPasses(graph.get(), passes); diff --git a/paddle/cinn/hlir/pass/single_group_optimize_pass.cc b/paddle/cinn/hlir/pass/single_group_optimize_pass.cc index 1f8982192cddc5..816943b38cee08 100644 --- a/paddle/cinn/hlir/pass/single_group_optimize_pass.cc +++ b/paddle/cinn/hlir/pass/single_group_optimize_pass.cc @@ -25,14 +25,14 @@ namespace cinn::hlir::pass { using framework::Graph; using Group = framework::Graph::Group; -using common::GraphEdge; -using common::GraphNode; +using cinn::common::GraphEdge; +using cinn::common::GraphNode; using framework::Node; using framework::NodeData; using ShapeDict = absl::flat_hash_map; -using DtypeDict = absl::flat_hash_map; +using DtypeDict = absl::flat_hash_map; namespace utils { template @@ -179,7 +179,7 @@ bool SingleGroupOptimizePass::CanReplaceToMemcpy(Node* node) const { } void SingleGroupOptimizePassImpl(Graph* graph) { - if (graph->target_ != common::DefaultNVGPUTarget()) { + if (graph->target_ != cinn::common::DefaultNVGPUTarget()) { return; } graph->fusion_groups = SingleGroupOptimizePass(graph).Apply(); diff --git a/paddle/cinn/hlir/pass/test_dot_merger.cc b/paddle/cinn/hlir/pass/test_dot_merger.cc index ee4586571ec06e..bb7c832214750a 100644 --- a/paddle/cinn/hlir/pass/test_dot_merger.cc +++ b/paddle/cinn/hlir/pass/test_dot_merger.cc @@ -52,7 +52,7 @@ TEST(DotMerger, lhs) { auto h1 = builder.Add(e1, h); auto p = builder.Build(); - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform( std::vector{a.id(), b.id(), c.id(), c1.id()}, @@ -92,7 +92,7 @@ TEST(DotMerger, rhs) { auto e = builder.Matmul(b, c); auto f = builder.Concat({d, e}, axis); auto p = builder.Build(); - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); std::vector input_ids; absl::c_transform(std::vector{a.id(), b.id(), c.id()}, std::back_inserter(input_ids), diff --git a/paddle/cinn/hlir/pass/test_primitive_ops.cc b/paddle/cinn/hlir/pass/test_primitive_ops.cc index 2b39b5c57f27b8..c44eab12edd2cb 100644 --- a/paddle/cinn/hlir/pass/test_primitive_ops.cc +++ b/paddle/cinn/hlir/pass/test_primitive_ops.cc @@ -52,7 +52,7 @@ TEST(batch_norm_meta, batch_norm_meta) { auto b = program.fused_batchnorm_inference(A, Scale, Bias, Mean, Variance, attrs); - Target target = common::DefaultTarget(); + Target target = cinn::common::DefaultTarget(); program.SetInputs({A}); program.Validate(); LOG(INFO) << "Program:\n" << program; @@ -91,7 +91,7 @@ TEST(reduction, reduce) { auto c = program.reduce_prod(A, axis, keep_dim); auto d = program.reduce_sum(A, {0, 1, 2, 3}, keep_dim); - Target target = common::DefaultTarget(); + Target target = cinn::common::DefaultTarget(); program.SetInputs({A}); program.Validate(); LOG(INFO) << "Program:\n" << program; @@ -124,7 +124,7 @@ TEST(Compare, Compare) { Program program; auto a = program.primitive_equal(A, B); - Target target = common::DefaultTarget(); + Target target = cinn::common::DefaultTarget(); program.SetInputs({A, B}); program.Validate(); LOG(INFO) << "Program:\n" << program; diff --git a/paddle/cinn/hlir/pe/broadcast.cc b/paddle/cinn/hlir/pe/broadcast.cc index 0378e5fe4be214..0d7824955ade07 100644 --- a/paddle/cinn/hlir/pe/broadcast.cc +++ b/paddle/cinn/hlir/pe/broadcast.cc @@ -29,7 +29,7 @@ namespace cinn { namespace hlir { namespace pe { -using common::make_zero; +using cinn::common::make_zero; using ir::Tensor; using lang::Compute; @@ -323,8 +323,8 @@ Tensor Atan2(const Tensor& A, auto fn = [&](const Expr& elem_a, const Expr& elem_b) { auto atan = lang::Atan(elem_a / elem_b); - auto pi = common::make_const(atan->type(), PI); - auto half_pi = common::make_const(atan->type(), PI / 2); + auto pi = cinn::common::make_const(atan->type(), PI); + auto half_pi = cinn::common::make_const(atan->type(), PI / 2); auto zero = ir::Zero(atan->type()); return ir::Select::Make( ir::EQ::Make(elem_b, zero), diff --git a/paddle/cinn/hlir/pe/broadcast.h b/paddle/cinn/hlir/pe/broadcast.h index bc7a7da0e3d694..adae2fe33a4b63 100644 --- a/paddle/cinn/hlir/pe/broadcast.h +++ b/paddle/cinn/hlir/pe/broadcast.h @@ -43,12 +43,12 @@ void GetBroadcastOutShape(const std::vector& input_shape1, * shape(A) = (2, 3, 4, 5), shape(B) = (2), with axis=0 * shape(A) = (2, 3, 4, 5), shape(B) = (2, 1), with axis=0 */ -#define HLIR_DCL_BC_PE(name__) \ - ir::Tensor name__( \ - const ir::Tensor& A, \ - const ir::Tensor& B, \ - const std::string& out_name = common::UniqName("T_" #name__ "_out"), \ - const Expr& axis = Expr()); +#define HLIR_DCL_BC_PE(name__) \ + ir::Tensor name__(const ir::Tensor& A, \ + const ir::Tensor& B, \ + const std::string& out_name = \ + cinn::common::UniqName("T_" #name__ "_out"), \ + const Expr& axis = Expr()); //! Compute A + B with auto-broadcasting. HLIR_DCL_BC_PE(Add); @@ -107,13 +107,13 @@ ir::Tensor Pow(const ir::Tensor& A, const ir::Tensor& B, const std::string& output_name, const Expr& axis, - const common::Target& target); + const cinn::common::Target& target); ir::Tensor BroadcastTo( const ir::Tensor& A, const std::vector& out_shape, const std::vector& broadcast_axes, - const std::string& out_name = common::UniqName("T_broadcast_to_out")); + const std::string& out_name = cinn::common::UniqName("T_broadcast_to_out")); // This operator checks if all x and y satisfy the condition: |x - y| <= atol + // rtol * |y| @@ -124,7 +124,7 @@ ir::Tensor IsClose( float rtol = 1e-05f, float atol = 1e-08f, bool equal_nan = false, - const std::string& out_name = common::UniqName("IsClose_output")); + const std::string& out_name = cinn::common::UniqName("IsClose_output")); } // namespace pe } // namespace hlir diff --git a/paddle/cinn/hlir/pe/elementwise.cc b/paddle/cinn/hlir/pe/elementwise.cc index 6a147a21b9a084..05455a85299569 100644 --- a/paddle/cinn/hlir/pe/elementwise.cc +++ b/paddle/cinn/hlir/pe/elementwise.cc @@ -250,7 +250,7 @@ ir::Tensor Arange(const float start, return ir::Cast::Make( dtype, Expr(start) + - Expr(step) * ir::Cast::Make(common::F32(), indices[0])); + Expr(step) * ir::Cast::Make(cinn::common::F32(), indices[0])); }, output_name); return res; diff --git a/paddle/cinn/hlir/pe/elementwise.h b/paddle/cinn/hlir/pe/elementwise.h index 95e93c39d5c27e..e212fa9487a9c2 100644 --- a/paddle/cinn/hlir/pe/elementwise.h +++ b/paddle/cinn/hlir/pe/elementwise.h @@ -84,25 +84,26 @@ HLIR_DCL_UNARY_PE(Clz); HLIR_DCL_UNARY_PE(Popc); template -ir::Tensor AssignValue(const std::vector& values, - const common::Type& type = common::type_of(), - const std::string& output_name = "T_assign_value_out") { +ir::Tensor AssignValue( + const std::vector& values, + const cinn::common::Type& type = cinn::common::type_of(), + const std::string& output_name = "T_assign_value_out") { CHECK(!values.empty()) << "The input of pe::AssignValue should not empty! Please check."; auto out = lang::Compute( {ir::Expr(static_cast(values.size()))}, [=](const std::vector& indice) { - auto init_value = (type == common::type_of()) + auto init_value = (type == cinn::common::type_of()) ? ir::Expr(values[0]) - : common::cast(ir::Expr(values[0]), type); + : cinn::common::cast(ir::Expr(values[0]), type); ir::Expr previous = ir::Select::Make( ir::EQ::Make(indice[0], ir::Expr(0)), init_value, lang::Zero(type)); for (int i = 1; i < values.size(); ++i) { - auto val = (type == common::type_of()) + auto val = (type == cinn::common::type_of()) ? ir::Expr(values[i]) - : common::cast(ir::Expr(values[i]), type); + : cinn::common::cast(ir::Expr(values[i]), type); previous = ir::Select::Make( ir::EQ::Make(indice[0], ir::Expr(i)), val, previous); } diff --git a/paddle/cinn/hlir/pe/ir_schedule_pe.cc b/paddle/cinn/hlir/pe/ir_schedule_pe.cc index b8f6d170996b38..2c27c98d5faf90 100644 --- a/paddle/cinn/hlir/pe/ir_schedule_pe.cc +++ b/paddle/cinn/hlir/pe/ir_schedule_pe.cc @@ -66,10 +66,10 @@ void SetReduceAxis(ir::Expr loop, ir::Expr block) { void IRElementwiseSchedule(ir::IRSchedule &ir_sch, // NOLINT const std::vector &output_shape, - const common::Target &target) { + const cinn::common::Target &target) { VLOG(3) << "Before IRElementwiseSchedule, new ir is : " << ir_sch.GetModule().GetExprs().at(0); - if (target == common::DefaultNVGPUTarget()) { + if (target == cinn::common::DefaultNVGPUTarget()) { auto blocks = ir_sch.GetAllBlocks(); std::vector loops = ir_sch.GetLoops(blocks[0]); ir::Expr loop = ir_sch.Fuse(loops); @@ -94,10 +94,10 @@ void IRElementwiseSchedule(ir::IRSchedule &ir_sch, // NOLINT void IRInjectiveSchedule(ir::IRSchedule &ir_sch, // NOLINT const std::vector &output_shape, - const common::Target &target) { + const cinn::common::Target &target) { VLOG(3) << "Before IRInjectiveSchedule, new ir is : " << ir_sch.GetModule().GetExprs().at(0); - if (target == common::DefaultNVGPUTarget()) { + if (target == cinn::common::DefaultNVGPUTarget()) { auto blocks = ir_sch.GetAllBlocks(); std::vector loops = ir_sch.GetLoops(blocks[0]); ir::Expr loop = ir_sch.Fuse(loops); @@ -122,7 +122,7 @@ void IRInjectiveSchedule(ir::IRSchedule &ir_sch, // NOLINT void IRScheduleInjectiveCPU(ir::IRSchedule &ir_sch, // NOLINT const std::vector &output_shape, - const common::Target &target, + const cinn::common::Target &target, bool vectorizable) { VLOG(3) << "Begin IRScheduleInjectiveCPU" << ir_sch.GetModule().GetExprs().at(0); @@ -159,7 +159,7 @@ void IRScheduleInjectiveCPU(ir::IRSchedule &ir_sch, // NOLINT void IRCudaScheduleInjective(ir::IRSchedule &ir_sch, // NOLINT const std::vector &output_shape, - const common::Target &target) { + const cinn::common::Target &target) { VLOG(3) << "Begin IRCudaScheduleInjective "; auto all_blocks = ir_sch.GetAllBlocks(); auto loops = ir_sch.GetLoops(all_blocks[0]); @@ -180,10 +180,10 @@ void IRCudaScheduleInjective(ir::IRSchedule &ir_sch, // NOLINT << ir_sch.GetModule().GetExprs().at(0); } -std::vector IRCudaScheduleMatMul( - const common::CINNValuePack &arg_pack, +std::vector IRCudaScheduleMatMul( + const cinn::common::CINNValuePack &arg_pack, const std::vector &output_shape, - const common::Target &target) { + const cinn::common::Target &target) { if (target.arch == Target::Arch::X86) { CINN_NOT_IMPLEMENTED } @@ -230,12 +230,12 @@ std::vector IRCudaScheduleMatMul( } } - return {common::CINNValue(ir_sch.GetModule().GetExprs().at(0))}; + return {cinn::common::CINNValue(ir_sch.GetModule().GetExprs().at(0))}; } void IRCudaScheduleMul(ir::IRSchedule &ir_sch, // NOLINT const std::vector &output_shape, - const common::Target &target) { + const cinn::common::Target &target) { auto all_blocks = ir_sch.GetAllBlocks(); auto loops = ir_sch.GetLoops(all_blocks.back()); CHECK_GE(loops.size(), 2U); @@ -248,7 +248,7 @@ void IRCudaScheduleMul(ir::IRSchedule &ir_sch, // NOLINT void IRMulScheduleCPU(ir::IRSchedule &ir_sch, // NOLINT const std::vector &reduce_first_shape, - const common::Target &target) { + const cinn::common::Target &target) { ir_sch.MergeExprs(); auto all_blocks = ir_sch.GetAllBlocks(); CHECK_EQ(all_blocks.size(), 4U); @@ -266,7 +266,7 @@ void IRMulScheduleCPU(ir::IRSchedule &ir_sch, // NOLINT void IRCudaSplitSchedule(ir::IRSchedule &ir_sch, // NOLINT const std::vector> &output_shapes, int axis, - const common::Target &target) { + const cinn::common::Target &target) { VLOG(3) << "In IRCudaSplitSchedule, Before schedule expr is : " << ir_sch.GetModule().GetExprs().at(0); ir_sch.MergeExprs(); @@ -294,7 +294,7 @@ void IRCudaSplitSchedule(ir::IRSchedule &ir_sch, // NOLINT block_names.push_back(get_block_name(block)); } // if output with same shape. - if (with_same_shape && target == common::DefaultNVGPUTarget()) { + if (with_same_shape && target == cinn::common::DefaultNVGPUTarget()) { // flat loops. { auto tsize = std::accumulate(output_shapes[0].begin(), @@ -326,7 +326,7 @@ void IRCudaSplitSchedule(ir::IRSchedule &ir_sch, // NOLINT master_loops[1]); } } - } else if (target == common::DefaultNVGPUTarget()) { + } else if (target == cinn::common::DefaultNVGPUTarget()) { // flat loops. { for (int idx = 0; idx < block_names.size(); ++idx) { @@ -362,7 +362,7 @@ void IRCudaSplitSchedule(ir::IRSchedule &ir_sch, // NOLINT void IRCudaScheduleReduce(ir::IRSchedule &ir_sch, // NOLINT ir::Tensor output, int last_dimension_num, - const common::Target &target) { + const cinn::common::Target &target) { VLOG(3) << "Before IRCudaScheduleReduce : " << ir_sch.GetModule().GetExprs().at(0); int parallel_thread_num = 1; @@ -418,7 +418,7 @@ void IRCudaScheduleReduce(ir::IRSchedule &ir_sch, // NOLINT void IRCudaScheduleBlockReduceInternal(ir::IRSchedule &ir_sch, // NOLINT ir::Tensor tmp_out, ir::Tensor out, - const common::Target &target) { + const cinn::common::Target &target) { VLOG(3) << "Before IRCudaScheduleBlockReduceInternal : " << ir_sch.GetModule().GetExprs().at(0); int fuse_times = ir_sch.GetLoops(tmp_out->name).size() - 2; @@ -443,7 +443,7 @@ void IRCudaScheduleBlockReduceInternal(ir::IRSchedule &ir_sch, // NOLINT ->schedule_block->as()); // create var - auto var = ir::Var(ir::Expr(0), ir::Expr(1), common::UniqName("i")); + auto var = ir::Var(ir::Expr(0), ir::Expr(1), cinn::common::UniqName("i")); out_block->as()->iter_values.push_back(var); out_block->as() ->schedule_block->as() @@ -517,7 +517,7 @@ void IRCudaScheduleBlockReduce(ir::IRSchedule &ir_sch, // NOLINT ir::Tensor reduce_tmp_out, ir::Tensor tmp_out, ir::Tensor out, - const common::Target &target) { + const cinn::common::Target &target) { VLOG(3) << "Before IRCudaScheduleBlockReduce : " << ir_sch.GetModule().GetExprs().at(0); int tmp_put_shape_size_without_reduce = 0; @@ -667,7 +667,7 @@ void IRCudaScheduleBlockShuffleReduce(ir::IRSchedule &ir_sch, // NOLINT ir::Tensor reshape, ir::Tensor internal, ir::Tensor reduce_out, - const common::Target &target) { + const cinn::common::Target &target) { VLOG(3) << "Before IRCudaScheduleBlockShuffleReduce : " << ir_sch.GetModule().GetExprs().at(0); // reshape compute inline @@ -929,7 +929,7 @@ void IRCudaTwoStepReduceSchedule(ir::IRSchedule &ir_sch, // NOLINT ir::Tensor internal, ir::Tensor tmp_out, ir::Tensor out, - const common::Target &target) { + const cinn::common::Target &target) { VLOG(3) << "Before IRCudaTwoStepReduceSchedule : " << ir_sch.GetModule().GetExprs().at(0); // fuse axis @@ -1065,7 +1065,7 @@ void IRSoftmaxScheduleCPU(ir::IRSchedule &ir_sch, int axis) { // NOLINT } void IRPoolScheduleGPU(ir::IRSchedule &ir_sch, // NOLINT - const common::Target &target, + const cinn::common::Target &target, int arg_pack_size) { VLOG(3) << "Before IRPoolScheduleGPU: " << ir_sch.GetModule().GetExprs().at(0); @@ -1083,7 +1083,7 @@ void IRPoolScheduleGPU(ir::IRSchedule &ir_sch, // NOLINT } void IRGlobalPoolScheduleGPU(ir::IRSchedule &ir_sch, // NOLINT - const common::Target &target) { + const cinn::common::Target &target) { VLOG(3) << "Before IRGlobalPoolScheduleGPU: " << ir_sch.GetModule().GetExprs().at(0); auto all_blocks = ir_sch.GetAllBlocks(); @@ -1152,7 +1152,7 @@ void IRCudaScheduleDepthwiseConv(ir::IRSchedule &ir_sch, // NOLINT } void IRCudaScheduleConv(ir::IRSchedule &ir_sch, // NOLINT - const common::Target &target) { + const cinn::common::Target &target) { VLOG(3) << "Begin IRCudaScheduleConv with expr: " << ir_sch.GetModule().GetExprs().at(0); auto &res = ScheduleParam::get_cuda_instance().GetParam(); @@ -1297,7 +1297,7 @@ void IRCudaScheduleConv2(ir::IRSchedule &ir_sch, // NOLINT ir::Tensor &input_pad, // NOLINT ir::Tensor &weights, // NOLINT ir::Tensor &output, // NOLINT - const common::Target &target, + const cinn::common::Target &target, const std::string &key) { auto &res = ScheduleParam::get_cuda_instance().GetParam(); diff --git a/paddle/cinn/hlir/pe/ir_schedule_pe.h b/paddle/cinn/hlir/pe/ir_schedule_pe.h index 5a7e32197220f1..4deb4a22277b9a 100644 --- a/paddle/cinn/hlir/pe/ir_schedule_pe.h +++ b/paddle/cinn/hlir/pe/ir_schedule_pe.h @@ -33,89 +33,89 @@ namespace pe { void IRElementwiseSchedule(ir::IRSchedule &ir_sch, // NOLINT const std::vector &output_shape, - const common::Target &target); + const cinn::common::Target &target); void IRInjectiveSchedule(ir::IRSchedule &ir_sch, // NOLINT const std::vector &output_shape, - const common::Target &target); + const cinn::common::Target &target); void IRScheduleInjectiveCPU(ir::IRSchedule &ir_sch, // NOLINT const std::vector &output_shape, - const common::Target &target, + const cinn::common::Target &target, bool vectorizable = true); void IRCudaScheduleInjective(ir::IRSchedule &ir_sch, // NOLINT const std::vector &output_shape, - const common::Target &target); + const cinn::common::Target &target); -std::vector IRCudaScheduleMatMul( - const common::CINNValuePack &arg_pack, +std::vector IRCudaScheduleMatMul( + const cinn::common::CINNValuePack &arg_pack, const std::vector &output_shape, - const common::Target &target); + const cinn::common::Target &target); void IRCudaScheduleMul(ir::IRSchedule &ir_sch, // NOLINT const std::vector &output_shape, - const common::Target &target); + const cinn::common::Target &target); void IRMulScheduleCPU(ir::IRSchedule &ir_sch, // NOLINT const std::vector &reduce_first_shape, - const common::Target &target); + const cinn::common::Target &target); void IRCudaSplitSchedule(ir::IRSchedule &ir_sch, // NOLINT const std::vector> &output_shapes, int axis, - const common::Target &target); + const cinn::common::Target &target); void IRCudaScheduleReduce(ir::IRSchedule &ir_sch, // NOLINT ir::Tensor out, int last_dimension_num, - const common::Target &target); + const cinn::common::Target &target); void IRCudaScheduleBlockReduce(ir::IRSchedule &ir_sch, // NOLINT ir::Tensor reduce_tmp_out, ir::Tensor tmp_out, ir::Tensor out, - const common::Target &target); + const cinn::common::Target &target); void IRCudaScheduleBlockReduceInternal(ir::IRSchedule &ir_sch, // NOLINT ir::Tensor tmp_out, ir::Tensor out, - const common::Target &target); + const cinn::common::Target &target); void IRCudaScheduleBlockShuffleReduce(ir::IRSchedule &ir_sch, // NOLINT ir::Tensor reshape, ir::Tensor internal, ir::Tensor out, - const common::Target &target); + const cinn::common::Target &target); void IRCudaTwoStepReduceSchedule(ir::IRSchedule &ir_sch, // NOLINT ir::Tensor reshape, ir::Tensor internal, ir::Tensor tmp_out, ir::Tensor out, - const common::Target &target); + const cinn::common::Target &target); void IRSoftmaxScheduleCPU(ir::IRSchedule &ir_sch, int axis = -1); // NOLINT void IRPoolScheduleGPU(ir::IRSchedule &ir_sch, // NOLINT - const common::Target &target, + const cinn::common::Target &target, int arg_pack_size = 3); void IRCudaScheduleDepthwiseConv(ir::IRSchedule &ir_sch, // NOLINT const std::vector &tensors); void IRGlobalPoolScheduleGPU(ir::IRSchedule &ir_sch, // NOLINT - const common::Target &target); + const cinn::common::Target &target); void IRCudaScheduleConv2(ir::IRSchedule &ir_sch, // NOLINT ir::Tensor &input_pad, // NOLINT ir::Tensor &weights, // NOLINT ir::Tensor &output, // NOLINT - const common::Target &target, + const cinn::common::Target &target, const std::string &key); void IRCudaScheduleConv(ir::IRSchedule &ir_sch, // NOLINT - const common::Target &target); + const cinn::common::Target &target); } // namespace pe } // namespace hlir diff --git a/paddle/cinn/hlir/pe/load_params_test.cc b/paddle/cinn/hlir/pe/load_params_test.cc index 897e8186db4eba..cc76519472b27e 100644 --- a/paddle/cinn/hlir/pe/load_params_test.cc +++ b/paddle/cinn/hlir/pe/load_params_test.cc @@ -29,7 +29,7 @@ TEST(load_x86_params, load_x86_params) { ASSERT_EQ(res.count(key), 1); absl::flat_hash_map conv2d_factors; - auto target = common::DefaultHostTarget(); + auto target = cinn::common::DefaultHostTarget(); std::vector shape_input = {1, 64, 56, 56}; std::vector shape_weights = {64, 64, 3, 3}; std::vector strides = {1, 1}; diff --git a/paddle/cinn/hlir/pe/map_expr_to_ir.cc b/paddle/cinn/hlir/pe/map_expr_to_ir.cc index 84dd3e7d302eee..4385f543ccda5d 100644 --- a/paddle/cinn/hlir/pe/map_expr_to_ir.cc +++ b/paddle/cinn/hlir/pe/map_expr_to_ir.cc @@ -51,7 +51,7 @@ class MapExprToIrTranslator { public: explicit MapExprToIrTranslator(const MapExpr& map_expr, const Node2LoweredFuncs& node2lowered_funcs, - const common::Target& target) + const cinn::common::Target& target) : map_expr_(map_expr), node2lowered_funcs_(&node2lowered_funcs), target_(target) { @@ -744,7 +744,7 @@ class MapExprToIrTranslator { MapExpr map_expr_; const Node2LoweredFuncs* node2lowered_funcs_; - const common::Target target_; + const cinn::common::Target target_; TensorIteratorExpr4TensorT TensorIteratorExpr4Tensor; LoopDescriptor4LoopIteratorT LoopDescriptor4LoopIterator; }; @@ -752,7 +752,7 @@ class MapExprToIrTranslator { } // namespace ir::Expr MapExprToIr(const MapExprCtx& map_expr_ctx, - const common::Target& target) { + const cinn::common::Target& target) { const auto& expr = MapExprToIrTranslator( map_expr_ctx.map_expr(), map_expr_ctx.node2lowered_funcs(), target) diff --git a/paddle/cinn/hlir/pe/map_expr_to_ir.h b/paddle/cinn/hlir/pe/map_expr_to_ir.h index f4d37ad05c3527..32dd771cf5e2ae 100644 --- a/paddle/cinn/hlir/pe/map_expr_to_ir.h +++ b/paddle/cinn/hlir/pe/map_expr_to_ir.h @@ -25,6 +25,6 @@ struct Target; namespace cinn::adt { ir::Expr MapExprToIr(const MapExprCtx& map_expr_ctx, - const common::Target& target); + const cinn::common::Target& target); } diff --git a/paddle/cinn/hlir/pe/nn.cc b/paddle/cinn/hlir/pe/nn.cc index a3bae2149297f8..9c10e1ad137c24 100644 --- a/paddle/cinn/hlir/pe/nn.cc +++ b/paddle/cinn/hlir/pe/nn.cc @@ -43,7 +43,7 @@ using ir::Min; using ir::Select; using ir::Tensor; -std::string Type2StrForNN(common::Type type) { +std::string Type2StrForNN(cinn::common::Type type) { std::string suffix; if (type.is_float(64)) { return "fp64"; @@ -139,7 +139,7 @@ std::vector Conv2d_winograd_NCHW(const ir::Tensor &input, return ir::Select::Make( cond, weights(nn, cc, (yy / dilation_h), (xx / dilation_w)), - common::make_const(weights->type(), 0)); + cinn::common::make_const(weights->type(), 0)); }, UniqName("weights_dilation")); @@ -184,12 +184,12 @@ std::vector Conv2d_winograd_NCHW(const ir::Tensor &input, output_shape = { input->shape[0], // B weights->shape[0], // O - common::AutoSimplify( + cinn::common::AutoSimplify( (input->shape[2] - ((weights_dilation->shape[2] - 1) * dilation_h + 1) + 2 * pad_h) / stride_h + 1), // H - common::AutoSimplify( + cinn::common::AutoSimplify( (input->shape[3] - ((weights_dilation->shape[3] - 1) * dilation_w + 1) + 2 * pad_w) / stride_w + @@ -202,8 +202,8 @@ std::vector Conv2d_winograd_NCHW(const ir::Tensor &input, ir::Tensor B = winograd_transform[1]; ir::Tensor G = winograd_transform[2]; - int nH = (common::AutoSimplify(output_shape[2]).as_int32() + m - 1) / m; - int nW = (common::AutoSimplify(output_shape[3]).as_int32() + m - 1) / m; + int nH = (cinn::common::AutoSimplify(output_shape[2]).as_int32() + m - 1) / m; + int nW = (cinn::common::AutoSimplify(output_shape[3]).as_int32() + m - 1) / m; int P = input->shape[0].as_int32() * nH * nW; @@ -431,7 +431,7 @@ std::vector Conv2d_NCHW_5D(const ir::Tensor &input, int dilation_w, std::string key, const std::string &output_name, - const common::Target &target) { + const cinn::common::Target &target) { // input: 4D to 5D, NCHW->NCHWc // [batch, in_channel, in_height, in_width] -> // [batch, in_channel_chunk, in_height, in_width, in_channel_block] @@ -440,9 +440,9 @@ std::vector Conv2d_NCHW_5D(const ir::Tensor &input, std::vector shape_weights = weights->shape; CHECK_EQ(shape_input.size(), 4U) << "input's shape size should be 4"; CHECK_EQ(shape_weights.size(), 4U) << "weight's shape size should be 4"; - Expr c_in = common::AutoSimplify(shape_input[1]); - Expr c_filter = common::AutoSimplify(shape_weights[1]); - Expr c_out = common::AutoSimplify(shape_weights[0]); + Expr c_in = cinn::common::AutoSimplify(shape_input[1]); + Expr c_filter = cinn::common::AutoSimplify(shape_weights[1]); + Expr c_out = cinn::common::AutoSimplify(shape_weights[0]); absl::flat_hash_map conv2d_factors; int oc = c_out.as_int32(); int ic = c_in.as_int32(); @@ -507,12 +507,12 @@ std::vector Conv2d_NCHW_5D(const ir::Tensor &input, std::vector output_shape = { batch, // B c_out, // O - common::AutoSimplify((h_in - ((h_f - 1) * dilation_h + 1) + 2 * pad_h) / - stride_h + - 1), // H - common::AutoSimplify((w_in - ((w_f - 1) * dilation_w + 1) + 2 * pad_w) / - stride_w + - 1) // W + cinn::common::AutoSimplify( + (h_in - ((h_f - 1) * dilation_h + 1) + 2 * pad_h) / stride_h + + 1), // H + cinn::common::AutoSimplify( + (w_in - ((w_f - 1) * dilation_w + 1) + 2 * pad_w) / stride_w + + 1) // W }; auto res = Compute( output_shape, @@ -532,7 +532,7 @@ std::vector Conv2d_NCHWc(const ir::Tensor &input, int dilation_h, int dilation_w, const std::string &output_name, - const common::Target &target) { + const cinn::common::Target &target) { // input: [N, c_in_outer, H, W, c_in_inner] // weight: [c_out_outer, c_filter_outer, filter_h, filter_w, c_filter_inner, // c_out_inner] @@ -545,33 +545,33 @@ std::vector Conv2d_NCHWc(const ir::Tensor &input, << "Conv2d_NCHWc weight's shape size should be 6"; Expr batch = shape_input[0]; - Expr c_in_outer = common::AutoSimplify(shape_input[1]); + Expr c_in_outer = cinn::common::AutoSimplify(shape_input[1]); Expr h_in = shape_input[2]; Expr w_in = shape_input[3]; - Expr c_in_inner = common::AutoSimplify(shape_input[4]); + Expr c_in_inner = cinn::common::AutoSimplify(shape_input[4]); Expr c_out_outer = shape_weights[0]; - Expr c_filter_outer = common::AutoSimplify(shape_weights[1]); + Expr c_filter_outer = cinn::common::AutoSimplify(shape_weights[1]); Expr h_f = shape_weights[2]; Expr w_f = shape_weights[3]; - Expr c_filter_inner = common::AutoSimplify(shape_weights[4]); - Expr c_out_inner = common::AutoSimplify(shape_weights[5]); + Expr c_filter_inner = cinn::common::AutoSimplify(shape_weights[4]); + Expr c_out_inner = cinn::common::AutoSimplify(shape_weights[5]); - Expr c_filter = common::AutoSimplify(c_filter_outer * c_filter_inner); - Expr c_out = common::AutoSimplify(c_out_outer * c_out_inner); - Expr c_in = common::AutoSimplify(c_in_outer * c_in_inner); + Expr c_filter = cinn::common::AutoSimplify(c_filter_outer * c_filter_inner); + Expr c_out = cinn::common::AutoSimplify(c_out_outer * c_out_inner); + Expr c_in = cinn::common::AutoSimplify(c_in_outer * c_in_inner); Var fc(c_filter, UniqName("fc")); Var fy(h_f, UniqName("fy")); Var fx(w_f, UniqName("fx")); std::vector output_shape = { batch, // B c_out_outer, // O - common::AutoSimplify((h_in - ((h_f - 1) * dilation_h + 1) + 2 * pad_h) / - stride_h + - 1), // H - common::AutoSimplify((w_in - ((w_f - 1) * dilation_w + 1) + 2 * pad_w) / - stride_w + - 1), // W + cinn::common::AutoSimplify( + (h_in - ((h_f - 1) * dilation_h + 1) + 2 * pad_h) / stride_h + + 1), // H + cinn::common::AutoSimplify( + (w_in - ((w_f - 1) * dilation_w + 1) + 2 * pad_w) / stride_w + + 1), // W c_out_inner}; ir::Tensor input_pad; @@ -583,18 +583,18 @@ std::vector Conv2d_NCHWc(const ir::Tensor &input, }, UniqName("input_pad")); } else { - auto pad_h_bound = common::AutoSimplify((output_shape[2] - 1) * stride_h + - (h_f - 1) * dilation_h + 1); - auto pad_w_bound = common::AutoSimplify((output_shape[3] - 1) * stride_w + - (w_f - 1) * dilation_w + 1); + auto pad_h_bound = cinn::common::AutoSimplify( + (output_shape[2] - 1) * stride_h + (h_f - 1) * dilation_h + 1); + auto pad_w_bound = cinn::common::AutoSimplify( + (output_shape[3] - 1) * stride_w + (w_f - 1) * dilation_w + 1); auto pad_out_h = std::min(pad_h_bound.as_int32(), - common::AutoSimplify(h_in + 2 * pad_h).as_int32()); + cinn::common::AutoSimplify(h_in + 2 * pad_h).as_int32()); auto pad_out_w = std::min(pad_w_bound.as_int32(), - common::AutoSimplify(w_in + 2 * pad_w).as_int32()); - auto h_in_pad = common::AutoSimplify(h_in + pad_h); - auto w_in_pad = common::AutoSimplify(w_in + pad_w); + cinn::common::AutoSimplify(w_in + 2 * pad_w).as_int32()); + auto h_in_pad = cinn::common::AutoSimplify(h_in + pad_h); + auto w_in_pad = cinn::common::AutoSimplify(w_in + pad_w); input_pad = Compute( {batch, c_in_outer, Expr(pad_out_h), Expr(pad_out_w), c_in_inner}, [=](Expr n, Expr icc, Expr yy, Expr xx, Expr icb) { @@ -614,20 +614,23 @@ std::vector Conv2d_NCHWc(const ir::Tensor &input, auto packed_out = Compute( output_shape, [=](Expr n, Expr oc_chunk, Expr oh, Expr ow, Expr oc_block) { - Expr c_out_per_group = common::AutoSimplify(c_out * c_filter / c_in); + Expr c_out_per_group = + cinn::common::AutoSimplify(c_out * c_filter / c_in); Expr ic_outer, ic_inner; if (c_in == c_filter) { - ic_outer = common::AutoSimplify(fc / c_in_inner); - ic_inner = common::AutoSimplify(fc % c_in_inner); + ic_outer = cinn::common::AutoSimplify(fc / c_in_inner); + ic_inner = cinn::common::AutoSimplify(fc % c_in_inner); } else { - ic_outer = common::AutoSimplify(((oc_chunk * c_out_inner + oc_block) / - c_out_per_group * c_filter + - fc) / - c_in_inner); - ic_inner = common::AutoSimplify(((oc_chunk * c_out_inner + oc_block) / - c_out_per_group * c_filter + - fc) % - c_in_inner); + ic_outer = + cinn::common::AutoSimplify(((oc_chunk * c_out_inner + oc_block) / + c_out_per_group * c_filter + + fc) / + c_in_inner); + ic_inner = + cinn::common::AutoSimplify(((oc_chunk * c_out_inner + oc_block) / + c_out_per_group * c_filter + + fc) % + c_in_inner); } return lang::ReduceSum(input_pad(n, ic_outer, @@ -754,7 +757,7 @@ std::vector Conv2d_NHWC(const ir::Tensor &input, return ir::Select::Make( cond, weights(nn, cc, yy / dilation_h, xx / dilation_w), - common::make_const(weights->type(), 0)); + cinn::common::make_const(weights->type(), 0)); }, UniqName("weights_dilation")); @@ -923,8 +926,8 @@ ir::Tensor BatchNorm_NCHW(const ir::Tensor &input, input->shape, [=](Expr n, Expr c, Expr h, Expr w) { return (input(n, c, h, w) - mean(c)) * scale(c) / - lang::Sqrt(variance(c) + - common::make_const(input->type(), epsilon)) + + lang::Sqrt(variance(c) + cinn::common::make_const( + input->type(), epsilon)) + bias(c); }, UniqName(output_name)); @@ -954,8 +957,8 @@ ir::Tensor BatchNorm_NCHWc(const ir::Tensor &input, [=](Expr n, Expr icc, Expr h, Expr w, Expr icb) { Expr new_c = icc * ic_bn + icb; return (input(n, icc, h, w, icb) - mean(new_c)) * scale(new_c) / - lang::Sqrt(variance(new_c) + - common::make_const(input->type(), epsilon)) + + lang::Sqrt(variance(new_c) + cinn::common::make_const( + input->type(), epsilon)) + bias(new_c); }, UniqName(output_name)); @@ -1101,8 +1104,8 @@ Tensor Pad(const Tensor &tensor, if (i >= pad_before.size()) { output_shape.push_back(tensor->shape[i]); } else { - auto shape = - common::AutoSimplify(tensor->shape[i] + pad_before[i] + pad_after[i]); + auto shape = cinn::common::AutoSimplify(tensor->shape[i] + pad_before[i] + + pad_after[i]); output_shape.push_back(shape); } } @@ -1128,8 +1131,8 @@ Tensor Pad(const Tensor &tensor, } Expr sel_after; if (!MathEqual(pad_after[i], Expr(0))) { - sel_after = - common::AutoSimplify(ovars[i] < pad_before[i] + tensor->shape[i]); + sel_after = cinn::common::AutoSimplify(ovars[i] < pad_before[i] + + tensor->shape[i]); sel.push_back(sel_after); } if (pad_mode == "edge") { @@ -1229,7 +1232,7 @@ std::vector PoolImpl(const Tensor &tensor, do_pad = (do_pad) ? do_pad : (padding_size[i] || padding_size[i + k_size]); if (ceil_mode) { - pad_tail[i] = common::AutoSimplify(pad_tail[i] + stride[i] - 1); + pad_tail[i] = cinn::common::AutoSimplify(pad_tail[i] + stride[i] - 1); } daxis.emplace_back(Var(kernel[i], UniqName("kernel_idx"))); @@ -1237,7 +1240,7 @@ std::vector PoolImpl(const Tensor &tensor, pad_before[ii] = pad_head[i]; pad_after[ii] = pad_tail[i]; - auto out_dim = common::AutoSimplify( + auto out_dim = cinn::common::AutoSimplify( (tensor->shape[ii] - kernel[i] + pad_head[i] + pad_tail[i]) / stride[i] + 1); @@ -1292,13 +1295,13 @@ std::vector PoolImpl(const Tensor &tensor, auto temp_factor = make_const(Int(32), 1); for (int i = 0; i < k_size; i++) { int ii = axis[i]; - start[i] = - common::AutoSimplify(output[ii] * stride[i] - pad_head[i]); + start[i] = cinn::common::AutoSimplify(output[ii] * stride[i] - + pad_head[i]); end[i] = Min::Make(start[i] + kernel[i], tensor->shape[ii]); start[i] = Max::Make(start[i], make_const(Int(32), 0)); temp_factor = temp_factor * (end[i] - start[i]); } - common::AutoSimplify(temp_factor); + cinn::common::AutoSimplify(temp_factor); Expr divide_factor = Max::Make(temp_factor, make_const(Int(32), 1)); return lang::ReduceSum( ir::Div::Make(temp(indices), @@ -1309,7 +1312,7 @@ std::vector PoolImpl(const Tensor &tensor, for (int i = 0; i < k_size; i++) { temp_factor = temp_factor * kernel[i]; } - common::AutoSimplify(temp_factor); + cinn::common::AutoSimplify(temp_factor); return lang::ReduceSum( ir::Div::Make(temp(indices), ir::Cast::Make(temp->type(), temp_factor)), @@ -1363,7 +1366,7 @@ std::vector PoolImpl(const Tensor &tensor, Expr(static_cast(tensor->shape[axis[i]].get_constant()) / kernel_size[i]); } - common::AutoSimplify(temp_factor); + cinn::common::AutoSimplify(temp_factor); Expr divide_factor = Max::Make(temp_factor, make_const(Int(32), 1)); return lang::ReduceSum( ir::Div::Make(temp(indices), @@ -1421,8 +1424,8 @@ std::vector GlobalPool2d(const Tensor &tensor, auto temp = Compute( {tensor->shape[0], tensor->shape[1], Expr(32)}, [=](Expr n, Expr c, Expr k) -> Expr { - Expr offset = common::IndiceToAbsOffset(tensor->shape, - {n, c, Expr(0), Expr(0)}); + Expr offset = cinn::common::IndiceToAbsOffset( + tensor->shape, {n, c, Expr(0), Expr(0)}); return lang::CallExtern( "cinn_warp_reduce_max_" + Type2StrForNN(tensor->type()), {tensor, offset, extend}); @@ -1440,8 +1443,8 @@ std::vector GlobalPool2d(const Tensor &tensor, auto temp = Compute( {tensor->shape[0], tensor->shape[1], Expr(32)}, [=](Expr n, Expr c, Expr k) -> Expr { - Expr offset = common::IndiceToAbsOffset(tensor->shape, - {n, c, Expr(0), Expr(0)}); + Expr offset = cinn::common::IndiceToAbsOffset( + tensor->shape, {n, c, Expr(0), Expr(0)}); return lang::CallExtern( "cinn_warp_reduce_avg_" + Type2StrForNN(tensor->type()), {tensor, offset, extend}); @@ -1547,7 +1550,7 @@ Tensor DropoutInfer(const ir::Tensor &tensor, tensor->shape, [=](const std::vector &indice) { return tensor(indice) * - common::make_const(tensor->type(), 1 - dropout_prob); + cinn::common::make_const(tensor->type(), 1 - dropout_prob); }, output_name); } else if (dropout_implementation == "upscale_in_train") { @@ -1572,7 +1575,7 @@ ir::Tensor Select(const ir::Tensor &condition, return lang::Compute( condition->shape, [=](const std::vector &indice) { - return common::select( + return cinn::common::select( condition(indice), true_value(indice), false_value(indice)); }, output_name); diff --git a/paddle/cinn/hlir/pe/nn.h b/paddle/cinn/hlir/pe/nn.h index 609bb9ade329f4..32e2db2dc38f71 100755 --- a/paddle/cinn/hlir/pe/nn.h +++ b/paddle/cinn/hlir/pe/nn.h @@ -147,7 +147,7 @@ std::vector Conv2d_NCHW_5D( int dilation_w, std::string key, const std::string &output_name = UniqName("T_Conv2d_NCHW_5D_out"), - const common::Target &target = common::DefaultHostTarget()); + const cinn::common::Target &target = cinn::common::DefaultHostTarget()); /** * @brief Perform a 2-D convolution with an NCHWc-layout. @@ -176,7 +176,7 @@ std::vector Conv2d_NCHWc( int dilation_h, int dilation_w, const std::string &output_name = UniqName("T_Conv2d_NCHWc_out"), - const common::Target &target = common::DefaultHostTarget()); + const cinn::common::Target &target = cinn::common::DefaultHostTarget()); #ifdef CINN_WITH_DNNL std::vector Conv2d_NCHW_MKLDNN( diff --git a/paddle/cinn/hlir/pe/nn_util.cc b/paddle/cinn/hlir/pe/nn_util.cc index 22ae26d03aea7a..8ea958c5a499eb 100644 --- a/paddle/cinn/hlir/pe/nn_util.cc +++ b/paddle/cinn/hlir/pe/nn_util.cc @@ -387,13 +387,14 @@ ir::Tensor const_matrix(const std::vector>& input, auto now = cinn::common::make_const(1.0f); for (int ii = 0; ii < row; ii++) { for (int jj = 0; jj < col; jj++) { - // if (common::is_zero(Expr(ii)-yy) && common::is_zero(Expr(jj)-xx)) + // if (cinn::common::is_zero(Expr(ii)-yy) && + // cinn::common::is_zero(Expr(jj)-xx)) // { // now = cinn::common::make_const(input[ii][jj]); // } auto cond = - common::and_all({Expr(ii) - yy == 0, Expr(jj) - xx == 0}); - now = common::select( + cinn::common::and_all({Expr(ii) - yy == 0, Expr(jj) - xx == 0}); + now = cinn::common::select( cond, cinn::common::make_const(input[ii][jj]), now); } } @@ -461,7 +462,8 @@ std::vector GetFirstStepReduceShape(const std::vector& shape, // post parallel size int post_parallel_size = GetPostParallelSize(shape, axes); // the size to unfold las reduce axis - int unfold_size = common::GetMaxThreads() / GetParallelSize(shape, axes); + int unfold_size = + cinn::common::GetMaxThreads() / GetParallelSize(shape, axes); CHECK_GT(unfold_size, 1); // fuse reduce axis. diff --git a/paddle/cinn/hlir/pe/pe_broadcast_test.cc b/paddle/cinn/hlir/pe/pe_broadcast_test.cc index 533c3ebdd97068..865f0f0bb475bd 100644 --- a/paddle/cinn/hlir/pe/pe_broadcast_test.cc +++ b/paddle/cinn/hlir/pe/pe_broadcast_test.cc @@ -43,7 +43,7 @@ void TestBroadcastPE(const std::string &fn_name, auto stages = CreateStages({C}); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); Module::Builder builder("module0", target); auto func = Lower("fn", stages, {A, B, C}); builder.AddFunction(func); @@ -60,23 +60,24 @@ void TestBroadcastPE(const std::string &fn_name, cinn_buffer_t *A_buf; cinn_buffer_t *B_buf; if (set_value != 0) { - A_buf = common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) + A_buf = cinn::common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) .set_val(set_value) .Build(); - B_buf = common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) + B_buf = cinn::common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) .set_val(set_value) .Build(); } else { - A_buf = common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) + A_buf = cinn::common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) .set_random() .Build(); - B_buf = common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) + B_buf = cinn::common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) .set_random() .Build(); } - auto *C_buf = common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) - .set_zero() - .Build(); + auto *C_buf = + cinn::common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) + .set_zero() + .Build(); cinn_pod_value_t a_arg(A_buf), b_arg(B_buf), c_arg(C_buf); cinn_pod_value_t args[] = {a_arg, b_arg, c_arg}; @@ -102,7 +103,7 @@ void TestBroadcastPE1(const std::string &fn_name, Placeholder B("B", {N}); auto C = func_op(A.tensor(), B.tensor(), "C", Expr(1)); auto stages = CreateStages({C}); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); Module::Builder builder("module0", target); auto func = Lower("fn", stages, {A, B, C}); builder.AddFunction(func); @@ -116,22 +117,23 @@ void TestBroadcastPE1(const std::string &fn_name, cinn_buffer_t *A_buf; cinn_buffer_t *B_buf; if (set_value != 0) { - A_buf = common::BufferBuilder(Float(32), - {M.as_int32(), N.as_int32(), K.as_int32()}) + A_buf = cinn::common::BufferBuilder( + Float(32), {M.as_int32(), N.as_int32(), K.as_int32()}) .set_val(set_value) .Build(); - B_buf = common::BufferBuilder(Float(32), {N.as_int32()}) + B_buf = cinn::common::BufferBuilder(Float(32), {N.as_int32()}) .set_val(set_value) .Build(); } else { - A_buf = common::BufferBuilder(Float(32), - {M.as_int32(), N.as_int32(), K.as_int32()}) + A_buf = cinn::common::BufferBuilder( + Float(32), {M.as_int32(), N.as_int32(), K.as_int32()}) + .set_random() + .Build(); + B_buf = cinn::common::BufferBuilder(Float(32), {N.as_int32()}) .set_random() .Build(); - B_buf = - common::BufferBuilder(Float(32), {N.as_int32()}).set_random().Build(); } - auto *C_buf = common::BufferBuilder( + auto *C_buf = cinn::common::BufferBuilder( Float(32), {M.as_int32(), N.as_int32(), K.as_int32()}) .set_zero() .Build(); @@ -163,7 +165,7 @@ void TestBroadcastPE2(const std::string &fn_name, Placeholder B("B", {N, K}); auto C = func_op(A.tensor(), B.tensor(), "C", Expr(1)); auto stages = CreateStages({C}); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); Module::Builder builder("module0", target); auto func = Lower("fn", stages, {A, B, C}); builder.AddFunction(func); @@ -178,25 +180,25 @@ void TestBroadcastPE2(const std::string &fn_name, cinn_buffer_t *B_buf; if (set_value != 0) { A_buf = - common::BufferBuilder( + cinn::common::BufferBuilder( Float(32), {M.as_int32(), N.as_int32(), K.as_int32(), R.as_int32()}) .set_val(set_value) .Build(); - B_buf = common::BufferBuilder(Float(32), {N.as_int32(), K.as_int32()}) + B_buf = cinn::common::BufferBuilder(Float(32), {N.as_int32(), K.as_int32()}) .set_val(set_value) .Build(); } else { A_buf = - common::BufferBuilder( + cinn::common::BufferBuilder( Float(32), {M.as_int32(), N.as_int32(), K.as_int32(), R.as_int32()}) .set_random() .Build(); - B_buf = common::BufferBuilder(Float(32), {N.as_int32(), K.as_int32()}) + B_buf = cinn::common::BufferBuilder(Float(32), {N.as_int32(), K.as_int32()}) .set_random() .Build(); } auto *C_buf = - common::BufferBuilder( + cinn::common::BufferBuilder( Float(32), {M.as_int32(), N.as_int32(), K.as_int32(), R.as_int32()}) .set_zero() .Build(); diff --git a/paddle/cinn/hlir/pe/pe_elementwise_test.cc b/paddle/cinn/hlir/pe/pe_elementwise_test.cc index c96a28a19762b1..806c340d791e2f 100644 --- a/paddle/cinn/hlir/pe/pe_elementwise_test.cc +++ b/paddle/cinn/hlir/pe/pe_elementwise_test.cc @@ -51,7 +51,7 @@ void TestElementwisePE(const std::string &fn_name, stages[A_out[0]]->Parallel(0); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); Module::Builder builder("module0", target); for (auto &tensor : A_out) { stages->InsertLazily(tensor); @@ -70,15 +70,15 @@ void TestElementwisePE(const std::string &fn_name, cinn_buffer_t *A_buf; if (set_value != 0) { - A_buf = common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) + A_buf = cinn::common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) .set_val(set_value) .Build(); } else { - A_buf = common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) + A_buf = cinn::common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) .set_random() .Build(); } - auto *B_buf = common::BufferBuilder(type, {M.as_int32(), N.as_int32()}) + auto *B_buf = cinn::common::BufferBuilder(type, {M.as_int32(), N.as_int32()}) .set_align(type.bits()) .Build(); diff --git a/paddle/cinn/hlir/pe/pe_transform_test.cc b/paddle/cinn/hlir/pe/pe_transform_test.cc index b69b48b4b85bf2..852cc26211298e 100644 --- a/paddle/cinn/hlir/pe/pe_transform_test.cc +++ b/paddle/cinn/hlir/pe/pe_transform_test.cc @@ -52,7 +52,7 @@ TEST(MatmulPE, MatmulCase1) { tensor_args.push_back(C[i]); stages->InsertLazily(C[i]); } - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); Module::Builder builder("module0", target); auto func = Lower("fn", stages, tensor_args); builder.AddFunction(func); @@ -66,9 +66,9 @@ TEST(MatmulPE, MatmulCase1) { CHECK(fn); auto fn_ = reinterpret_cast(fn); cinn_buffer_t *A_buf = - common::BufferBuilder(Float(32), {m, k}).set_random().Build(); + cinn::common::BufferBuilder(Float(32), {m, k}).set_random().Build(); cinn_buffer_t *B_buf = - common::BufferBuilder(Float(32), {k, n}).set_random().Build(); + cinn::common::BufferBuilder(Float(32), {k, n}).set_random().Build(); cinn_pod_value_t a_arg(A_buf), b_arg(B_buf); std::vector args = {a_arg, b_arg}; std::vector C_buf; @@ -77,7 +77,8 @@ TEST(MatmulPE, MatmulCase1) { for (auto &shape : C[i]->shape) { shapes.push_back(shape.as_int32()); } - auto *buffer = common::BufferBuilder(Float(32), shapes).set_zero().Build(); + auto *buffer = + cinn::common::BufferBuilder(Float(32), shapes).set_zero().Build(); CHECK(buffer); C_buf.push_back(buffer); cinn_pod_value_t arg(buffer); @@ -115,9 +116,9 @@ TEST(ScatterAssign, ScatterAssign) { int axis = 0; #ifdef CINN_WITH_CUDA - auto target = common::DefaultNVGPUTarget(); + auto target = cinn::common::DefaultNVGPUTarget(); #else - auto target = common::DefaultHostTarget(); + auto target = cinn::common::DefaultHostTarget(); #endif auto output = hlir::pe::ScatterAssign( @@ -170,7 +171,7 @@ TEST(SliceAssign, SliceAssign) { LOG(INFO) << "func:\n" << func; #ifdef CINN_WITH_CUDA - auto target = common::DefaultNVGPUTarget(); + auto target = cinn::common::DefaultNVGPUTarget(); Module::Builder builder("SliceAssign_Builder", target); builder.AddFunction(func); @@ -211,7 +212,7 @@ TEST(Concat, ConcatCase0) { LOG(INFO) << "func:\n" << func; #ifdef CINN_WITH_CUDA - auto target = common::DefaultNVGPUTarget(); + auto target = cinn::common::DefaultNVGPUTarget(); Module::Builder builder("Concat_Builder", target); builder.AddFunction(func); diff --git a/paddle/cinn/hlir/pe/reduction.cc b/paddle/cinn/hlir/pe/reduction.cc index f809efbd13e67d..e4850e96dabcd7 100644 --- a/paddle/cinn/hlir/pe/reduction.cc +++ b/paddle/cinn/hlir/pe/reduction.cc @@ -73,7 +73,7 @@ void GetRealAxes(int ndim, } } -std::string Type2StrForReduce(common::Type type) { +std::string Type2StrForReduce(cinn::common::Type type) { std::string suffix; if (type.is_int(32)) { return "_int32"; @@ -114,7 +114,7 @@ void GetOutputShape(const std::vector& real_axes, if (keep_dims) { for (size_t i = 0; i < ndim; ++i) { if (std::find(real_axes.begin(), real_axes.end(), i) != real_axes.end()) { - output_shape->push_back(common::make_one()); + output_shape->push_back(cinn::common::make_one()); } else { output_shape->push_back(tensor->shape[i]); } @@ -127,7 +127,7 @@ void GetOutputShape(const std::vector& real_axes, } } if (output_shape->empty()) { - output_shape->push_back(common::make_one()); + output_shape->push_back(cinn::common::make_one()); } } @@ -300,7 +300,7 @@ std::vector WarpReduce(const ir::Tensor& A, tmp_indexs.push_back(Expr(0)); } CHECK_EQ(A->shape.size(), tmp_indexs.size()); - Expr offset = common::IndiceToAbsOffset(A->shape, tmp_indexs); + Expr offset = cinn::common::IndiceToAbsOffset(A->shape, tmp_indexs); return lang::CallExtern(reduce_type, {A, offset, reduce_width}); }, UniqName(output_name + "_" + reduce_type)); @@ -530,7 +530,7 @@ std::vector BlockReduce(const ir::Tensor& A, // checkout input shape size equals tmp indexs size. CHECK_EQ(A->shape.size(), tmp_indexs.size()); // compute offset. - Expr offset = common::IndiceToAbsOffset(A->shape, tmp_indexs); + Expr offset = cinn::common::IndiceToAbsOffset(A->shape, tmp_indexs); // call block reduce sum return lang::CallExtern(reduce_type, {A, offset, reduce_width}); }, @@ -753,7 +753,7 @@ std::vector ReduceInternal(const ir::Tensor& A, const std::vector& axes, \ const bool keep_dim, \ const std::string& output_name) { \ - if (common::GetMaxThreads() / GetParallelSize(A, axes) <= 1) { \ + if (cinn::common::GetMaxThreads() / GetParallelSize(A, axes) <= 1) { \ return {Reduce##name(A, axes, keep_dim, output_name)}; \ } else { \ auto rs = ReduceInternal( \ @@ -824,7 +824,7 @@ std::vector TwoStepBlockReduceInternal( // If the number of current device SM is smaller than the number of SM // required by Warp Reduce, the performance of Warp Reduce is better. // Otherwise, use Block Reduce. - auto max_num_threads = common::DefaultNVGPUTarget().max_num_threads(); + auto max_num_threads = cinn::common::DefaultNVGPUTarget().max_num_threads(); int need_reduce_last_count = 1; for (int i = 0; i < A->shape.size(); i++) { if (find(axes.begin(), axes.end(), i) == axes.end()) { @@ -834,9 +834,9 @@ std::vector TwoStepBlockReduceInternal( int warp_reduce_need_sm_count = ceil((need_reduce_last_count * 32) / static_cast( - common::DefaultNVGPUTarget().get_max_threads_per_sm())); + cinn::common::DefaultNVGPUTarget().get_max_threads_per_sm())); // Set Num_max_threads to 32 is Warp Reduce - if (common::DefaultNVGPUTarget().get_multi_processor_count() < + if (cinn::common::DefaultNVGPUTarget().get_multi_processor_count() < warp_reduce_need_sm_count) { max_num_threads = 32; } diff --git a/paddle/cinn/hlir/pe/reduction.h b/paddle/cinn/hlir/pe/reduction.h index a3a5f02915ef9f..4779007daba426 100644 --- a/paddle/cinn/hlir/pe/reduction.h +++ b/paddle/cinn/hlir/pe/reduction.h @@ -471,7 +471,7 @@ std::vector TwoStepBlockReduceAny( std::string CrossThreadReduceExternalFuncName(const ir::Expr& op, const ir::Expr& tensor); -std::string Type2StrForReduce(common::Type type); +std::string Type2StrForReduce(cinn::common::Type type); } // namespace pe } // namespace hlir } // namespace cinn diff --git a/paddle/cinn/hlir/pe/schedule.cc b/paddle/cinn/hlir/pe/schedule.cc index 6e9cfe6d887566..c75f9aefccf29c 100644 --- a/paddle/cinn/hlir/pe/schedule.cc +++ b/paddle/cinn/hlir/pe/schedule.cc @@ -36,13 +36,13 @@ namespace cinn { namespace hlir { namespace pe { -ScheduleParam::ScheduleParam(common::Target::Arch arch) { +ScheduleParam::ScheduleParam(cinn::common::Target::Arch arch) { switch (arch) { - case common::Target::Arch::X86: { + case cinn::common::Target::Arch::X86: { param_data = CreateX86Params(); break; } - case common::Target::Arch::NVGPU: { + case cinn::common::Target::Arch::NVGPU: { param_data = CreateCudaParams(); break; } @@ -85,7 +85,7 @@ int SplitEven(int origin) { return res; } -int GetBasicFactor(const Type &type, const common::Target &target) { +int GetBasicFactor(const Type &type, const cinn::common::Target &target) { int target_native_vector_bits = target.get_target_bits() * 8; int type_bits = type.bits(); return target_native_vector_bits / type_bits; @@ -114,7 +114,7 @@ int GetVectorizeFactor(int shape, int split_factor) { void ScheduleInjectiveCPU(poly::Stage *stage, const std::vector &output_shape, - const common::Target &target, + const cinn::common::Target &target, bool vectorizable) { int dims = stage->n_out_dims(); int factor = GetBasicFactor(stage->tensor()->type(), target); @@ -142,7 +142,7 @@ void ScheduleInjectiveCPU(poly::Stage *stage, void ScheduleInjectiveCPU1(poly::Stage *stage, const std::vector &output_shape, - const common::Target &target, + const cinn::common::Target &target, bool vectorizable) { int dims = stage->n_out_dims(); if (dims > 1) { @@ -187,7 +187,7 @@ void ScheduleInjectiveCPU1(poly::Stage *stage, int GetArrayPackingFactor(int shape, const Type &type, - const common::Target &target) { + const cinn::common::Target &target) { int split_base = GetBasicFactor(type, target); int split_factor = 1; // temporily use shape-1 instead of shape for isl wrong for1 elimination @@ -203,7 +203,7 @@ int GetArrayPackingFactor(int shape, void MatmulScheduleCUDA(poly::StageMap stages, const ir::Tensor &output, - const common::Target &target) { + const cinn::common::Target &target) { stages[output]->Split(1, 2); stages[output]->Bind(0, "blockIdx.x"); stages[output]->Bind(1, "threadIdx.x"); @@ -212,7 +212,7 @@ void MatmulScheduleCUDA(poly::StageMap stages, void MatmulScheduleCPU(poly::StageMap stages, const ir::Tensor &output, const ir::Tensor &packedB, - const common::Target &target) { + const cinn::common::Target &target) { CHECK_EQ(output->type(), packedB->type()); int basic_split_factor = GetBasicFactor(packedB->type(), target); // packedB @@ -324,7 +324,7 @@ void MatmulScheduleCPU(poly::StageMap stages, void MulScheduleCPU(poly::StageMap stages, const ir::Tensor &output, const ir::Tensor &reduce_first, - const common::Target &target) { + const cinn::common::Target &target) { int split_factor = GetBasicFactor(output->type(), target); auto out_reduce_axis = output->reduce_axis; std::vector reduce_first_shape = reduce_first->shape; @@ -384,7 +384,7 @@ int GetBlockBindAxis(const std::vector &shape, void CudaReduceSchedule(poly::StageMap stages, ir::Tensor output, int last_dimension_num, - const common::Target &target) { + const cinn::common::Target &target) { int parallel_thread_num = 1; for (int idx = output->shape.size() - 1; idx >= static_cast(output->shape.size()) - last_dimension_num; @@ -419,7 +419,7 @@ void CudaReduceSchedule(poly::StageMap stages, void CudaWarpReduceSchedule(poly::StageMap stages, ir::Tensor tmp_out, ir::Tensor out, - const common::Target &target) { + const cinn::common::Target &target) { int sum_out_dim = 1; for (int idx = 0; idx < static_cast(tmp_out->shape.size()) - 2; ++idx) { stages[out]->Fuse(0, 1); @@ -456,7 +456,7 @@ void CudaWarpReduceSchedule(poly::StageMap stages, void CudaBlockReduceInternalSchedule(poly::StageMap stages, ir::Tensor tmp_out, ir::Tensor out, - const common::Target &target) { + const cinn::common::Target &target) { for (int idx = 0; idx < static_cast(tmp_out->shape.size()) - 2; ++idx) { stages[tmp_out]->Fuse(0, 1); stages[out]->Fuse(0, 1); @@ -479,7 +479,7 @@ void CudaBlockReduceSchedule(poly::StageMap stages, ir::Tensor reduce_tmp_out, ir::Tensor tmp_out, ir::Tensor out, - const common::Target &target) { + const cinn::common::Target &target) { int output_shape_size_without_reduce = tmp_out->shape.size() - 1; // fuse last parallel dimension for (int idx = 0; idx < reduce_tmp_out->shape.size() - tmp_out->shape.size(); @@ -518,7 +518,7 @@ void CudaBlockShuffleReduceSchedule(poly::StageMap stages, ir::Tensor reshape, ir::Tensor internal, ir::Tensor out, - const common::Target &target) { + const cinn::common::Target &target) { int fuse_times = internal->shape.size() - 2; for (int idx = 0; idx < fuse_times; ++idx) { stages[internal]->Fuse(0, 1); @@ -557,7 +557,7 @@ void CudaTwoStepReduceSchedule(poly::StageMap stages, ir::Tensor internal, ir::Tensor tmp_out, ir::Tensor out, - const common::Target &target) { + const cinn::common::Target &target) { // fuse axis for (int idx = 0; idx < static_cast(internal->shape.size()) - 2; ++idx) { stages[internal]->Fuse(0, 1); @@ -604,7 +604,7 @@ void SoftmaxScheduleCPU(poly::StageMap stage, void GlobalPoolScheduleGPU(poly::StageMap stages, const std::vector &output, - const common::Target &target) { + const cinn::common::Target &target) { auto &out = output[0]; auto &reduce = output[1]; stages[out]->Fuse(0, 1); @@ -617,7 +617,7 @@ void GlobalPoolScheduleGPU(poly::StageMap stages, } void PoolScheduleCPU(poly::StageMap stages, const ir::Tensor &output, - const common::Target &target) { + const cinn::common::Target &target) { CHECK_GE(stages[output]->n_out_dims(), 2); stages[output]->Fuse({0, 1}); stages[output]->Parallel(0); @@ -625,7 +625,7 @@ void PoolScheduleCPU(poly::StageMap stages, void PoolScheduleGPU(poly::StageMap stages, const ir::Tensor &output, - const common::Target &target) { + const cinn::common::Target &target) { CHECK_GE(stages[output]->axis_names().size(), 4); stages[output]->Fuse({0, 1, 2, 3}); stages[output]->Split(0, 1024); @@ -640,7 +640,7 @@ void GetConv2dFactors(absl::flat_hash_map *factors, int oh, int ow, const Type &type, - const common::Target &target, + const cinn::common::Target &target, const std::string &key, bool import_params) { if (import_params) { @@ -742,7 +742,7 @@ void GetConv2d1x1Factors(absl::flat_hash_map *factors, int oh, int ow, const Type &type, - const common::Target &target) { + const cinn::common::Target &target) { int bn_base = GetBasicFactor(type, target); int oc_bn = 1; for (int i = bn_base; i > 1; i--) { @@ -870,7 +870,7 @@ void Conv2d_NCHWc_1X1_Schedule_CPU(poly::StageMap stages, const ir::Tensor &input_pad, const ir::Tensor &weights_dilation, const ir::Tensor &data, - const common::Target &target, + const cinn::common::Target &target, const std::string &key, bool do_padding) { CHECK(target.arch == Target::Arch::X86) @@ -881,8 +881,8 @@ void Conv2d_NCHWc_1X1_Schedule_CPU(poly::StageMap stages, absl::flat_hash_map conv2d_factors; CHECK_EQ(packed_out->shape.size(), 5U) << "packed_out's shape size should be 5"; - Expr h_out = common::AutoSimplify(packed_out->shape[2]); - Expr w_out = common::AutoSimplify(packed_out->shape[3]); + Expr h_out = cinn::common::AutoSimplify(packed_out->shape[2]); + Expr w_out = cinn::common::AutoSimplify(packed_out->shape[3]); int oh = h_out.as_int32(); int ow = w_out.as_int32(); int basic_split_factor = GetBasicFactor(type, target); @@ -892,8 +892,8 @@ void Conv2d_NCHWc_1X1_Schedule_CPU(poly::StageMap stages, auto input_shape = input_pad->shape; CHECK_EQ(input_shape.size(), 5U) << "input shape size should be 5"; - Expr oc_bn = common::AutoSimplify(packed_out->shape.back()); - Expr ic_bn = common::AutoSimplify(input_shape.back()); + Expr oc_bn = cinn::common::AutoSimplify(packed_out->shape.back()); + Expr ic_bn = cinn::common::AutoSimplify(input_shape.back()); int oc_bn_size = oc_bn.as_int32(); int ic_bn_size = ic_bn.as_int32(); VLOG(3) << "oh_bn_size " << oh_bn_size; @@ -1021,7 +1021,7 @@ void Conv2d_NCHWc_1X1_Schedule_CPU_Nofuse(poly::StageMap stages, const ir::Tensor &input_pad, const ir::Tensor &weights_dilation, const ir::Tensor &data, - const common::Target &target) { + const cinn::common::Target &target) { CHECK(target.arch == Target::Arch::X86) << "Conv2d_NCHWc_1X1_Schedule_CPU_Nofuse schedule only used in x86"; CHECK(packed_out.defined()); @@ -1030,8 +1030,8 @@ void Conv2d_NCHWc_1X1_Schedule_CPU_Nofuse(poly::StageMap stages, absl::flat_hash_map conv2d_factors; CHECK_EQ(packed_out->shape.size(), 5U) << "packed_out's shape size should be 5"; - Expr h_out = common::AutoSimplify(packed_out->shape[2]); - Expr w_out = common::AutoSimplify(packed_out->shape[3]); + Expr h_out = cinn::common::AutoSimplify(packed_out->shape[2]); + Expr w_out = cinn::common::AutoSimplify(packed_out->shape[3]); int oh = h_out.as_int32(); int ow = w_out.as_int32(); int basic_split_factor = GetBasicFactor(type, target); @@ -1042,8 +1042,8 @@ void Conv2d_NCHWc_1X1_Schedule_CPU_Nofuse(poly::StageMap stages, auto input_shape = input_pad->shape; int shape_size = input_shape.size(); CHECK_EQ(shape_size, 5U) << "input shape size should be 5"; - Expr oc_bn = common::AutoSimplify(packed_out->shape.back()); - Expr ic_bn = common::AutoSimplify(input_shape.back()); + Expr oc_bn = cinn::common::AutoSimplify(packed_out->shape.back()); + Expr ic_bn = cinn::common::AutoSimplify(input_shape.back()); int oc_bn_size = oc_bn.as_int32(); int ic_bn_size = ic_bn.as_int32(); VLOG(3) << "ow_bn_size" << ow_bn_size; @@ -1143,7 +1143,7 @@ void Conv2d_NCHWc_Schedule_CPU_Nofuse(poly::StageMap stages, const ir::Tensor &input_pad, const ir::Tensor &weights_dilation, const ir::Tensor &data, - const common::Target &target) { + const cinn::common::Target &target) { CHECK(target.arch == Target::Arch::X86) << "Conv2d_NCHWc_Schedule_CPU_Nofuse schedule only used in x86"; CHECK(packed_out.defined()); @@ -1152,7 +1152,7 @@ void Conv2d_NCHWc_Schedule_CPU_Nofuse(poly::StageMap stages, absl::flat_hash_map conv2d_factors; CHECK_EQ(packed_out->shape.size(), 5U) << "packed_out's shape size should be 5"; - Expr w_out = common::AutoSimplify(packed_out->shape[3]); + Expr w_out = cinn::common::AutoSimplify(packed_out->shape[3]); int ow = w_out.as_int32(); int basic_split_factor = GetBasicFactor(type, target); GetConv2dFactors(&conv2d_factors, -1, -1, -1, -1, ow, type, target); @@ -1161,8 +1161,8 @@ void Conv2d_NCHWc_Schedule_CPU_Nofuse(poly::StageMap stages, auto input_shape = input_pad->shape; int shape_size = input_shape.size(); CHECK_EQ(shape_size, 5U) << "input shape size should be 5"; - Expr oc_bn = common::AutoSimplify(packed_out->shape.back()); - Expr ic_bn = common::AutoSimplify(input_shape.back()); + Expr oc_bn = cinn::common::AutoSimplify(packed_out->shape.back()); + Expr ic_bn = cinn::common::AutoSimplify(input_shape.back()); int oc_bn_size = oc_bn.as_int32(); int ic_bn_size = ic_bn.as_int32(); VLOG(3) << "ow_bn_size " << ow_bn_size; @@ -1248,7 +1248,7 @@ void Conv2d_NCHWc_Schedule_CPU(poly::StageMap stages, const ir::Tensor &input_pad, const ir::Tensor &weights_dilation, const ir::Tensor &data, - const common::Target &target, + const cinn::common::Target &target, const std::string &key, bool do_padding) { CHECK(target.arch == Target::Arch::X86) @@ -1258,13 +1258,13 @@ void Conv2d_NCHWc_Schedule_CPU(poly::StageMap stages, auto type = packed_out->type(); CHECK_EQ(packed_out->shape.size(), 5U) << "packed_out's shape size should be 5"; - Expr w_out = common::AutoSimplify(packed_out->shape[3]); + Expr w_out = cinn::common::AutoSimplify(packed_out->shape[3]); int ow = w_out.as_int32(); auto input_shape = input_pad->shape; int shape_size = input_shape.size(); CHECK_EQ(shape_size, 5U) << "input shape size should be 5"; - Expr oc_bn = common::AutoSimplify(packed_out->shape.back()); - Expr ic_bn = common::AutoSimplify(input_shape.back()); + Expr oc_bn = cinn::common::AutoSimplify(packed_out->shape.back()); + Expr ic_bn = cinn::common::AutoSimplify(input_shape.back()); int oc_bn_size = oc_bn.as_int32(); int ic_bn_size = ic_bn.as_int32(); @@ -1381,7 +1381,7 @@ void Depthwise_Conv2d_NCHWc_Schedule_CPU_Nofuse( const ir::Tensor &input_pad, const ir::Tensor &weights_dilation, const ir::Tensor &data, - const common::Target &target, + const cinn::common::Target &target, bool do_padding) { CHECK(target.arch == Target::Arch::X86) << "Depthwise_Conv2d_NCHWc_Schedule_CPU_Nofuse schedule only used in x86"; @@ -1391,7 +1391,7 @@ void Depthwise_Conv2d_NCHWc_Schedule_CPU_Nofuse( absl::flat_hash_map conv2d_factors; CHECK_EQ(packed_out->shape.size(), 5U) << "packed_out's shape size should be 5"; - Expr w_out = common::AutoSimplify(packed_out->shape[3]); + Expr w_out = cinn::common::AutoSimplify(packed_out->shape[3]); int ow = w_out.as_int32(); int basic_split_factor = GetBasicFactor(type, target); GetConv2dFactors(&conv2d_factors, -1, -1, -1, -1, ow, type, target); @@ -1400,8 +1400,8 @@ void Depthwise_Conv2d_NCHWc_Schedule_CPU_Nofuse( auto input_shape = input_pad->shape; int shape_size = input_shape.size(); CHECK_EQ(shape_size, 5U) << "input shape size should be 5"; - Expr oc_bn = common::AutoSimplify(packed_out->shape.back()); - Expr ic_bn = common::AutoSimplify(input_shape.back()); + Expr oc_bn = cinn::common::AutoSimplify(packed_out->shape.back()); + Expr ic_bn = cinn::common::AutoSimplify(input_shape.back()); int oc_bn_size = oc_bn.as_int32(); int ic_bn_size = ic_bn.as_int32(); VLOG(3) << "ow_bn_size " << ow_bn_size; @@ -1482,7 +1482,7 @@ void Depthwise_Conv2d_NCHWc_Schedule_CPU_Nofuse( void CudaScheduleMul(poly::StageMap stages, ir::Tensor output, const std::vector &output_shape, - const common::Target &target) { + const cinn::common::Target &target) { stages[output]->Split(1, 2); stages[output]->Bind(0, "blockIdx.x"); stages[output]->Bind(1, "threadIdx.x"); @@ -2301,7 +2301,7 @@ void SaveSerialData( void CudaScheduleDepthwiseConv(poly::StageMap stages, ir::Tensor &output, // NOLINT - const common::Target &target) { + const cinn::common::Target &target) { auto OL = stages[output]->CacheWrite("local", stages, output); stages[output]->Bind(0, "blockIdx.x"); stages[output]->Bind(1, "blockIdx.y"); @@ -2316,7 +2316,7 @@ void CudaScheduleConv(poly::StageMap stages, ir::Tensor &input_pad, // NOLINT ir::Tensor &weights, // NOLINT ir::Tensor &output, // NOLINT - const common::Target &target) { + const cinn::common::Target &target) { auto &res = ScheduleParam::get_cuda_instance().GetParam(); int n = output->shape[0].as_int32(); int c = output->shape[1].as_int32(); @@ -2385,7 +2385,7 @@ void CudaScheduleConv2(poly::StageMap stages, ir::Tensor &input_pad, // NOLINT ir::Tensor &weights, // NOLINT ir::Tensor &output, // NOLINT - const common::Target &target, + const cinn::common::Target &target, const std::string &key) { auto &res = ScheduleParam::get_cuda_instance().GetParam(); stages[input_pad]->ComputeInline(); @@ -2517,7 +2517,7 @@ void CudaScheduleConv2(poly::StageMap stages, void CudaScheduleWinogradConv(poly::StageMap wino_stages, std::vector &all_tensors, // NOLINT - const common::Target &target) { + const cinn::common::Target &target) { auto &res = ScheduleParam::get_cuda_instance().GetParam(); auto &wino_weights_dilation = all_tensors[0]; auto &wino_input_pad = all_tensors[1]; @@ -2673,7 +2673,7 @@ int MaxFactorLessThan(int a, int b) { void CudaScheduleInjectiveWithVectorize(poly::Stage *stage, const std::vector &output_shape, - const common::Target &target) { + const cinn::common::Target &target) { int dims = stage->n_out_dims(); int prod_size = std::accumulate( output_shape.begin(), output_shape.end(), 1, std::multiplies()); @@ -2745,7 +2745,7 @@ void CudaScheduleInjectiveWithVectorize(poly::Stage *stage, void CudaScheduleInjective(poly::Stage *stage, const std::vector &output_shape, - const common::Target &target) { + const cinn::common::Target &target) { CHECK_EQ(stage->n_out_dims(), stage->n_in_dims()) << "The dims of op are not equal"; if (FLAGS_cinn_use_cuda_vectorize) { @@ -2777,10 +2777,10 @@ void CudaScheduleInjective(poly::Stage *stage, stage->Bind(1, "threadIdx.x"); } -void CudaSplitSchedule(common::CINNValuePack *arg_pack, +void CudaSplitSchedule(cinn::common::CINNValuePack *arg_pack, const std::vector> &output_shapes, int axis, - const common::Target &target) { + const cinn::common::Target &target) { poly::StageMap stages = arg_pack->back(); std::vector out_tensors; int dims = output_shapes[0].size(); diff --git a/paddle/cinn/hlir/pe/schedule.h b/paddle/cinn/hlir/pe/schedule.h index cd3262cf3ad18e..8e863c50e5b6c2 100644 --- a/paddle/cinn/hlir/pe/schedule.h +++ b/paddle/cinn/hlir/pe/schedule.h @@ -35,11 +35,11 @@ class ScheduleParam { ScheduleParam(const ScheduleParam &) = delete; ScheduleParam &operator=(const ScheduleParam &) = delete; static ScheduleParam &get_cuda_instance() { - static ScheduleParam instance{common::Target::Arch::NVGPU}; + static ScheduleParam instance{cinn::common::Target::Arch::NVGPU}; return instance; } static ScheduleParam &get_x86_instance() { - static ScheduleParam instance{common::Target::Arch::X86}; + static ScheduleParam instance{cinn::common::Target::Arch::X86}; return instance; } absl::flat_hash_map>> param_data; @@ -66,37 +66,37 @@ int GetVectorizeFactor(int shape, int split_factor); int SplitEven(int origin); -int GetBasicFactor(const Type &type, const common::Target &target); +int GetBasicFactor(const Type &type, const cinn::common::Target &target); int GetBetterSplitFactor(int shape, int split_factor); int GetArrayPackingFactor(int shape, const Type &type, - const common::Target &target); + const cinn::common::Target &target); void ScheduleInjectiveCPU(poly::Stage *stage, const std::vector &output_shape, - const common::Target &target, + const cinn::common::Target &target, bool vectorizable = true); // to deprecate void ScheduleInjectiveCPU1(poly::Stage *stage, const std::vector &output_shape, - const common::Target &target, + const cinn::common::Target &target, bool vectorizable = true); void MatmulScheduleCUDA(poly::StageMap stages, const ir::Tensor &output, - const common::Target &target); + const cinn::common::Target &target); void MatmulScheduleCPU(poly::StageMap stage, const ir::Tensor &output, const ir::Tensor &packedB, - const common::Target &target); + const cinn::common::Target &target); void MulScheduleCPU(poly::StageMap stage, const ir::Tensor &output, const ir::Tensor &input_tensor, - const common::Target &target); + const cinn::common::Target &target); void SoftmaxScheduleCPU(poly::StageMap stage, const ir::Tensor &output, @@ -110,7 +110,7 @@ void GetConv2dFactors(absl::flat_hash_map *factors, int oh, int ow, const Type &type, - const common::Target &target, + const cinn::common::Target &target, const std::string &key = "", bool import_params = true); @@ -120,7 +120,7 @@ void GetConv2d1x1Factors(absl::flat_hash_map *factors, int oh, int ow, const Type &type, - const common::Target &target); + const cinn::common::Target &target); void Conv2d_NCHWc_Schedule_CPU(poly::StageMap stages, const ir::Tensor &res, @@ -128,18 +128,18 @@ void Conv2d_NCHWc_Schedule_CPU(poly::StageMap stages, const ir::Tensor &input_pad, const ir::Tensor &weights_dilation, const ir::Tensor &data, - const common::Target &target, + const cinn::common::Target &target, const std::string &key, bool do_padding); void GlobalPoolScheduleGPU(poly::StageMap stages, const std::vector &output, - const common::Target &target); + const cinn::common::Target &target); void PoolScheduleCPU(poly::StageMap stages, const ir::Tensor &output, - const common::Target &target); + const cinn::common::Target &target); void PoolScheduleGPU(poly::StageMap stages, const ir::Tensor &output, - const common::Target &target); + const cinn::common::Target &target); void Conv2d_NCHWc_Schedule_CPU_Nofuse(poly::StageMap stages, const ir::Tensor &res, @@ -147,7 +147,7 @@ void Conv2d_NCHWc_Schedule_CPU_Nofuse(poly::StageMap stages, const ir::Tensor &input_pad, const ir::Tensor &weights_dilation, const ir::Tensor &data, - const common::Target &target); + const cinn::common::Target &target); void Conv2d_NCHWc_1X1_Schedule_CPU(poly::StageMap stages, const ir::Tensor &res, @@ -155,7 +155,7 @@ void Conv2d_NCHWc_1X1_Schedule_CPU(poly::StageMap stages, const ir::Tensor &input_pad, const ir::Tensor &weights_dilation, const ir::Tensor &data, - const common::Target &target, + const cinn::common::Target &target, const std::string &key, bool do_padding); @@ -165,7 +165,7 @@ void Conv2d_NCHWc_1X1_Schedule_CPU_Nofuse(poly::StageMap stages, const ir::Tensor &input_pad, const ir::Tensor &weights_dilation, const ir::Tensor &data, - const common::Target &target); + const cinn::common::Target &target); void Depthwise_Conv2d_NCHWc_Schedule_CPU_Nofuse( poly::StageMap stages, @@ -174,78 +174,78 @@ void Depthwise_Conv2d_NCHWc_Schedule_CPU_Nofuse( const ir::Tensor &input_pad, const ir::Tensor &weights_dilation, const ir::Tensor &data, - const common::Target &target, + const cinn::common::Target &target, bool do_padding); void CudaScheduleMul(poly::StageMap stages, ir::Tensor output, const std::vector &output_shape, - const common::Target &target); + const cinn::common::Target &target); // reduce shedules. void CudaReduceSchedule(poly::StageMap stages, ir::Tensor output, int last_dimension_num, - const common::Target &target); + const cinn::common::Target &target); void CudaWarpReduceSchedule(poly::StageMap stages, ir::Tensor tmp_out, ir::Tensor out, - const common::Target &target); + const cinn::common::Target &target); void CudaBlockReduceInternalSchedule(poly::StageMap stages, ir::Tensor tmp_out, ir::Tensor out, - const common::Target &target); + const cinn::common::Target &target); void CudaBlockReduceSchedule(poly::StageMap stages, ir::Tensor reduce_tmp_out, ir::Tensor tmp_out, ir::Tensor out, - const common::Target &target); + const cinn::common::Target &target); void CudaBlockShuffleReduceSchedule(poly::StageMap stages, ir::Tensor reduce_reshape, ir::Tensor reduce_internal, ir::Tensor reduce_out, - const common::Target &target); + const cinn::common::Target &target); void CudaTwoStepReduceSchedule(poly::StageMap stages, ir::Tensor reshape, ir::Tensor internal, ir::Tensor tmp_out, ir::Tensor out, - const common::Target &target); + const cinn::common::Target &target); void CudaScheduleDepthwiseConv(poly::StageMap stages, ir::Tensor &output, // NOLINT - const common::Target &target); + const cinn::common::Target &target); void CudaScheduleConv(poly::StageMap stages, ir::Tensor &input_pad, // NOLINT ir::Tensor &weights, // NOLINT ir::Tensor &output, // NOLINT - const common::Target &target); + const cinn::common::Target &target); void CudaScheduleWinogradConv(poly::StageMap wino_stages, std::vector &all_tensors, // NOLINT - const common::Target &target); + const cinn::common::Target &target); void CudaScheduleConv2(poly::StageMap stages, ir::Tensor &input_pad, // NOLINT ir::Tensor &weights, // NOLINT ir::Tensor &output, // NOLINT - const common::Target &target, + const cinn::common::Target &target, const std::string &key); void CudaScheduleInjective(poly::Stage *stage, const std::vector &output_shape, - const common::Target &target); + const cinn::common::Target &target); -void CudaSplitSchedule(common::CINNValuePack *arg_pack, +void CudaSplitSchedule(cinn::common::CINNValuePack *arg_pack, const std::vector> &output_shapes, int axis, - const common::Target &target); + const cinn::common::Target &target); void CreateCudaSerialData(const std::string &file_name = "default_serial.log"); diff --git a/paddle/cinn/hlir/pe/transform.cc b/paddle/cinn/hlir/pe/transform.cc index 5c02b4a8493135..81df619097eeff 100644 --- a/paddle/cinn/hlir/pe/transform.cc +++ b/paddle/cinn/hlir/pe/transform.cc @@ -391,7 +391,7 @@ std::vector Split( out_shape[i], [=](const std::vector& indice) { auto temp = indice; - temp[axis] = common::AutoSimplify(temp[axis] + Expr(start[i])); + temp[axis] = cinn::common::AutoSimplify(temp[axis] + Expr(start[i])); return A(temp); }, names[i]); @@ -410,7 +410,7 @@ ir::Tensor Concat(const ir::Tensor& A, std::vector output_shape = A->shape; Expr pivot = A->shape[axis]; output_shape[axis] = - common::AutoSimplify(output_shape[axis] + B->shape[axis]); + cinn::common::AutoSimplify(output_shape[axis] + B->shape[axis]); auto res = Compute( output_shape, [=](const std::vector& indice) { @@ -438,8 +438,8 @@ ir::Tensor Concat(const std::vector& input_tensors, CHECK_EQ(input_tensors[i]->shape.size(), input_dim) << "Dimensions of inputs tensors in Concat should be equal! Please " "check."; - output_shape[axis] = common::AutoSimplify(output_shape[axis] + - input_tensors[i]->shape[axis]); + output_shape[axis] = cinn::common::AutoSimplify( + output_shape[axis] + input_tensors[i]->shape[axis]); } auto res = Compute( @@ -448,7 +448,7 @@ ir::Tensor Concat(const std::vector& input_tensors, auto ret = input_tensors[0](indice); Expr accumulate_shape = Expr(0); for (int i = 0; i < input_size - 1; i++) { - accumulate_shape = common::AutoSimplify( + accumulate_shape = cinn::common::AutoSimplify( accumulate_shape + input_tensors[i]->shape[axis]); std::vector new_indice = indice; new_indice[axis] = indice[axis] - accumulate_shape; @@ -468,7 +468,7 @@ std::vector MatmulV2(const Tensor& A, bool trans_b, float alpha, const std::string& name, - const common::Target& target) { + const cinn::common::Target& target) { std::vector shape_A = A->shape; std::vector shape_B = B->shape; int a_dim = shape_A.size(); @@ -564,7 +564,7 @@ std::vector MatmulMKL(const Tensor& A, bool trans_b, float alpha, const std::string& name, - const common::Target& target) { + const cinn::common::Target& target) { CHECK(target.arch == Target::Arch::X86) << "mkl should be used in the cpu environment"; std::vector shape_A = A->shape; @@ -597,18 +597,18 @@ std::vector MatmulMKL(const Tensor& A, [=]() -> Expr { return lang::CallExtern("cinn_cpu_mkl_gemm_fp32", { - Expr(alpha), // alpha - M, // M - N, // N - x_width, // K - common::make_bool(trans_a), // ta - common::make_bool(trans_b), // tb - shape_A.back(), // lda - shape_B.back(), // ldb - N, // ldc - common::make_zero(), // beta - A, // A - B, // B + Expr(alpha), // alpha + M, // M + N, // N + x_width, // K + cinn::common::make_bool(trans_a), // ta + cinn::common::make_bool(trans_b), // tb + shape_A.back(), // lda + shape_B.back(), // ldb + N, // ldc + cinn::common::make_zero(), // beta + A, // A + B, // B }); }, UniqName("matmul_mkl_out")); @@ -619,22 +619,22 @@ std::vector MatmulMKL(const Tensor& A, [=]() -> Expr { return lang::CallExtern("cinn_cpu_mkl_gemm_batch_fp32", { - Expr(alpha), // alpha - shape_A.front(), // batch - M, // M - N, // N - x_width, // K - common::make_bool(trans_a), // ta - common::make_bool(trans_b), // tb - shape_A.back(), // lda - shape_B.back(), // ldb - N, // ldc - M * x_width, // a_stride - N * x_width, // b_stride - M * N, // c_stride - common::make_zero(), // beta - A, // A - B, // B + Expr(alpha), // alpha + shape_A.front(), // batch + M, // M + N, // N + x_width, // K + cinn::common::make_bool(trans_a), // ta + cinn::common::make_bool(trans_b), // tb + shape_A.back(), // lda + shape_B.back(), // ldb + N, // ldc + M * x_width, // a_stride + N * x_width, // b_stride + M * N, // c_stride + cinn::common::make_zero(), // beta + A, // A + B, // B }); }, UniqName("batch_matmul_mkl_out")); @@ -644,7 +644,9 @@ std::vector MatmulMKL(const Tensor& A, return {out, call}; } -int GetMulFactor(int shape, const Type& type, const common::Target& target) { +int GetMulFactor(int shape, + const Type& type, + const cinn::common::Target& target) { int split_base = GetBasicFactor(type, target); int split_factor = 1; for (size_t i = split_base; i >= 1; --i) { @@ -659,7 +661,7 @@ int GetMulFactor(int shape, const Type& type, const common::Target& target) { std::vector MulBase(const Tensor& A, const Tensor& B, const std::string& name, - const common::Target& target) { + const cinn::common::Target& target) { std::vector output_shape; CHECK_EQ(A->shape.size(), 2U) << "tensor_A's shape size should be two while current shape size is " @@ -748,7 +750,7 @@ std::vector Mul(const Tensor& A, std::vector MulMKL(const Tensor& A, const Tensor& B, const std::string& name, - const common::Target& target) { + const cinn::common::Target& target) { CHECK(target.arch == Target::Arch::X86) << "mkl should be used in the cpu environment"; std::vector shape_A = A->shape; @@ -776,18 +778,18 @@ std::vector MulMKL(const Tensor& A, [=]() -> Expr { return lang::CallExtern("cinn_cpu_mkl_gemm_fp32", { - Expr(1.0f), // alpha - M, // M - N, // N - x_width, // K - common::make_bool(false), // ta - common::make_bool(true), // tb - shape_A.back(), // lda - shape_B.back(), // ldb - N, // ldc - common::make_zero(), // beta - A, // A - B, // B + Expr(1.0f), // alpha + M, // M + N, // N + x_width, // K + cinn::common::make_bool(false), // ta + cinn::common::make_bool(true), // tb + shape_A.back(), // lda + shape_B.back(), // ldb + N, // ldc + cinn::common::make_zero(), // beta + A, // A + B, // B }); }, UniqName("mul_mkl_out")); @@ -847,7 +849,7 @@ std::vector InferShapeLayoutTransform( int dst_prim_index = (*split_index_map)[i][0]; int dst_sub_index = (*split_index_map)[i][1]; int factor = (*split_index_map)[i][2]; - Expr chunk_shape = common::AutoSimplify(input_shapes[i] / factor); + Expr chunk_shape = cinn::common::AutoSimplify(input_shapes[i] / factor); Expr block_shape = Expr(factor); output_shape[dst_prim_index] = chunk_shape; output_shape[dst_sub_index] = block_shape; @@ -867,7 +869,7 @@ std::vector InferShapeLayoutTransform( CHECK_GE(input_shapes.size(), src_sub_index); CHECK_EQ(input_shapes[src_sub_index].as_int32(), factor); output_shape[i] = - common::AutoSimplify(input_shapes[src_prim_index] * factor); + cinn::common::AutoSimplify(input_shapes[src_prim_index] * factor); } else if ((*split_index_map)[i].size() == 1) { int src_prim_index = (*split_index_map)[i][0]; output_shape[i] = input_shapes[src_prim_index]; @@ -915,11 +917,13 @@ ir::Tensor LayoutTransform(const Tensor& input, int sub_index = split_infos[1]; int factor = split_infos[2]; if (dst_dim > src_dim) { - new_indice[i] = common::AutoSimplify(indice[prim_index] * factor + - indice[sub_index]); + new_indice[i] = cinn::common::AutoSimplify( + indice[prim_index] * factor + indice[sub_index]); } else { - new_indice[prim_index] = common::AutoSimplify(indice[i] / factor); - new_indice[sub_index] = common::AutoSimplify(indice[i] % factor); + new_indice[prim_index] = + cinn::common::AutoSimplify(indice[i] / factor); + new_indice[sub_index] = + cinn::common::AutoSimplify(indice[i] % factor); } } else if (split_infos.size() == 1) { @@ -1186,7 +1190,7 @@ ir::Tensor Gather(const ir::Tensor& x, // to int32 in CINN. See the below link for more details: // https://github.com/PaddlePaddle/CINN/blob/85ab4981a38926dc5c1dbf672762cec335d2b857/cinn/ir/ir.cc#L477 transformed_indice[axis] = - ir::Cast::Make(common::Int(32), index(indice)); + ir::Cast::Make(cinn::common::Int(32), index(indice)); return x(transformed_indice); }, name); @@ -1196,15 +1200,15 @@ ir::Tensor Gather(const ir::Tensor& x, ir::Tensor ScatterAssign(const ir::Tensor& input, const ir::Tensor& updates, const ir::Tensor& index, - const common::Target& target, + const cinn::common::Target& target, const int axis, const std::string& output_name) { - CHECK_EQ(index->type(), common::Int(32)) + CHECK_EQ(index->type(), cinn::common::Int(32)) << "Param [Index] of ScatterAssign only support int32 ! Please Check.\n"; std::string extern_fun_name; - if (target.arch == common::Target::Arch::NVGPU) { + if (target.arch == cinn::common::Target::Arch::NVGPU) { extern_fun_name.assign("cinn_cuda_find_int"); - } else if (target.arch == common::Target::Arch::X86) { + } else if (target.arch == cinn::common::Target::Arch::X86) { extern_fun_name.assign("cinn_host_find_int"); } else { LOG(FATAL) << "ScatterAssign only support X86 and NVGPU ! Please Check.\n"; @@ -1236,13 +1240,13 @@ ir::Tensor ScatterAssign(const ir::Tensor& input, ir::Tensor ScatterAdd(const ir::Tensor& input, const ir::Tensor& updates, const ir::Tensor& index, - const common::Target& target, + const cinn::common::Target& target, const int axis, const std::string& output_name) { - CHECK_EQ(target.arch, common::Target::Arch::NVGPU) + CHECK_EQ(target.arch, cinn::common::Target::Arch::NVGPU) << "Op IndexAdd only support NVGPU now ! Please Check.\n"; - CHECK_EQ(index->type(), common::Int(32)) + CHECK_EQ(index->type(), cinn::common::Int(32)) << "Param [index] of IndexAdd only support int32 ! Please Check.\n"; CHECK_EQ(index->shape.size(), 1) << "The dimension of param [index] of " "IndexAdd should be 1 ! Please Check.\n"; diff --git a/paddle/cinn/hlir/pe/transform.h b/paddle/cinn/hlir/pe/transform.h index e6dffa42e803b2..dfc090b0cad444 100644 --- a/paddle/cinn/hlir/pe/transform.h +++ b/paddle/cinn/hlir/pe/transform.h @@ -83,7 +83,7 @@ std::vector MatmulV2( bool trans_b = false, float alpha = 1, const std::string& name = UniqName("T_Transform_MatmulV2_out"), - const common::Target& target = common::DefaultHostTarget()); + const cinn::common::Target& target = cinn::common::DefaultHostTarget()); std::vector MatmulMKL( const ir::Tensor& A, @@ -92,9 +92,11 @@ std::vector MatmulMKL( bool trans_b = false, float alpha = 1, const std::string& name = UniqName("T_Transform_MatmulMKL_out"), - const common::Target& target = common::DefaultHostTarget()); + const cinn::common::Target& target = cinn::common::DefaultHostTarget()); -int GetMulFactor(int shape, const Type& type, const common::Target& target); +int GetMulFactor(int shape, + const Type& type, + const cinn::common::Target& target); /** * @brief basic PE that calculates a matrix multiplication @@ -112,7 +114,7 @@ std::vector MulBase( const ir::Tensor& A, const ir::Tensor& B, const std::string& name = UniqName("T_Transform_MulBase_out"), - const common::Target& target = common::DefaultHostTarget()); + const cinn::common::Target& target = cinn::common::DefaultHostTarget()); std::vector Mul(const ir::Tensor& A, const ir::Tensor& B, @@ -125,7 +127,7 @@ std::vector MulMKL( const ir::Tensor& A, const ir::Tensor& B, const std::string& name = UniqName("T_Transform_MulMKL_out"), - const common::Target& target = common::DefaultHostTarget()); + const cinn::common::Target& target = cinn::common::DefaultHostTarget()); ir::Tensor LayoutTransform( const ir::Tensor& input, @@ -223,7 +225,7 @@ ir::Tensor ScatterAssign( const ir::Tensor& input, const ir::Tensor& updates, const ir::Tensor& index, - const common::Target& target, + const cinn::common::Target& target, const int axis = 0, const std::string& output_name = UniqName("T_Transform_ScatterAssign_out")); @@ -237,7 +239,7 @@ ir::Tensor ScatterAssign( ir::Tensor ScatterAdd(const ir::Tensor& input, const ir::Tensor& updates, const ir::Tensor& index, - const common::Target& target, + const cinn::common::Target& target, const int axis, const std::string& output_name); diff --git a/paddle/cinn/ir/buffer.cc b/paddle/cinn/ir/buffer.cc index ee816d2b0bd716..ada0d4487b7f02 100644 --- a/paddle/cinn/ir/buffer.cc +++ b/paddle/cinn/ir/buffer.cc @@ -55,7 +55,7 @@ Buffer _Buffer_::Make(Var data, CHECK(dtype.valid()); CHECK(!dtype.is_unk()); CHECK(!dtype.is_void()); - auto *node = common::make_shared<_Buffer_>(); + auto *node = cinn::common::make_shared<_Buffer_>(); node->shape = shape; node->strides = strides; node->elem_offset = elem_offset; @@ -69,7 +69,7 @@ Buffer _Buffer_::Make(Var data, } Buffer _Buffer_::Make(const std::string &name, const std::vector &shape) { - auto *node = common::make_shared<_Buffer_>(); + auto *node = cinn::common::make_shared<_Buffer_>(); node->name = name; node->shape = shape; node->dtype = Void(); @@ -77,7 +77,7 @@ Buffer _Buffer_::Make(const std::string &name, const std::vector &shape) { } Buffer _Buffer_::Make() { - auto *node = common::make_shared<_Buffer_>(); + auto *node = cinn::common::make_shared<_Buffer_>(); node->dtype = Void(); return Buffer(node); } diff --git a/paddle/cinn/ir/dim.cc b/paddle/cinn/ir/dim.cc index fe5b288850d4e2..0b488e641161cd 100644 --- a/paddle/cinn/ir/dim.cc +++ b/paddle/cinn/ir/dim.cc @@ -38,7 +38,7 @@ Dim _Dim_::Make(const std::string& name, const SymbolicDimOp& sym_dim) { n->name = name; n->sym_dim = sym_dim; if (sym_dim.IsDynamic()) { - n->dim_expr = Expr(Var(sym_dim.GetSymName(), common::Int(32))); + n->dim_expr = Expr(Var(sym_dim.GetSymName(), cinn::common::Int(32))); } else { n->dim_expr = Expr(static_cast(sym_dim.GetDimSize())); } diff --git a/paddle/cinn/ir/group_schedule/base_group_scheduler.cc b/paddle/cinn/ir/group_schedule/base_group_scheduler.cc index ab215ee952b8f4..687122741aa2e1 100644 --- a/paddle/cinn/ir/group_schedule/base_group_scheduler.cc +++ b/paddle/cinn/ir/group_schedule/base_group_scheduler.cc @@ -22,7 +22,7 @@ namespace ir { std::unique_ptr GroupScheduler::Make( ir::IRSchedule* ir_sch, const std::unordered_set& output_tensor_names, - const common::Target& target, + const cinn::common::Target& target, bool is_dy_shape) { if (is_dy_shape) { return std::make_unique( diff --git a/paddle/cinn/ir/group_schedule/base_group_scheduler.h b/paddle/cinn/ir/group_schedule/base_group_scheduler.h index f941d13e30f149..6a277f01d43bf4 100644 --- a/paddle/cinn/ir/group_schedule/base_group_scheduler.h +++ b/paddle/cinn/ir/group_schedule/base_group_scheduler.h @@ -29,7 +29,7 @@ class GroupScheduler { public: GroupScheduler(ir::IRSchedule* ir_sch, const std::unordered_set& output_tensor_names, - const common::Target& target) + const cinn::common::Target& target) : ir_sch_(ir_sch), output_tensor_names_(output_tensor_names), target_(target) { @@ -39,7 +39,7 @@ class GroupScheduler { static std::unique_ptr Make( ir::IRSchedule* ir_sch, const std::unordered_set& output_tensor_names, - const common::Target& target, + const cinn::common::Target& target, bool is_dy_shape = false); virtual ~GroupScheduler() = default; @@ -51,7 +51,7 @@ class GroupScheduler { protected: ir::IRSchedule* ir_sch_; const std::unordered_set& output_tensor_names_; - const common::Target& target_; + const cinn::common::Target& target_; // Graph in units of ScheduleBlockNode, each node corresponds to a // ScheduleBlock in IR. std::unique_ptr schedule_block_graph_; diff --git a/paddle/cinn/ir/group_schedule/dy_shape_group_scheduler.h b/paddle/cinn/ir/group_schedule/dy_shape_group_scheduler.h index 1026ee095425df..6b89a0eff00033 100644 --- a/paddle/cinn/ir/group_schedule/dy_shape_group_scheduler.h +++ b/paddle/cinn/ir/group_schedule/dy_shape_group_scheduler.h @@ -27,7 +27,7 @@ class DynamicShapeGroupScheduler : public GroupScheduler { DynamicShapeGroupScheduler( ir::IRSchedule* ir_sch, const std::unordered_set& output_tensor_names, - const common::Target& target) + const cinn::common::Target& target) : GroupScheduler(ir_sch, output_tensor_names, target) {} void Schedule() override; diff --git a/paddle/cinn/ir/group_schedule/st_shape_group_scheduler.cc b/paddle/cinn/ir/group_schedule/st_shape_group_scheduler.cc index 92c674ccd9e132..bced79128ca497 100644 --- a/paddle/cinn/ir/group_schedule/st_shape_group_scheduler.cc +++ b/paddle/cinn/ir/group_schedule/st_shape_group_scheduler.cc @@ -672,7 +672,7 @@ void StaticShapeGroupScheduler::AllocateStorage() { int extent = 1; for (int idx = tensor->shape.size() - 1; idx >= 0; --idx) { strides.insert(strides.begin(), extent); - tensor->shape[idx] = common::AutoSimplify(tensor->shape[idx]); + tensor->shape[idx] = cinn::common::AutoSimplify(tensor->shape[idx]); CHECK(tensor->shape[idx].is_constant()) << "Shape of tensor: " << tensor << " is not constant"; extent *= tensor->shape[idx].get_constant(); @@ -681,12 +681,12 @@ void StaticShapeGroupScheduler::AllocateStorage() { for (int idx = 0; idx < indices.size(); ++idx) { flatten_indice = flatten_indice + ir::Expr(strides[idx]) * indices[idx]; } - flatten_indice = common::AutoSimplify(flatten_indice); + flatten_indice = cinn::common::AutoSimplify(flatten_indice); for (int idx = 0; idx < iter_vars.size(); ++idx) { optim::ReplaceVarWithExpr( &flatten_indice, iter_vars[idx], iter_values[idx]); } - flatten_indice = common::AutoSimplify(flatten_indice); + flatten_indice = cinn::common::AutoSimplify(flatten_indice); VLOG(6) << "flatten_indice of " << load_or_store << " : " << flatten_indice; return flatten_indice; }; @@ -781,12 +781,12 @@ void StaticShapeGroupScheduler::AllocateStorage() { } VLOG(6) << "lower_bound before simplify of " << indice_value << " = " << copy_for_lower_bound; - copy_for_lower_bound = - common::AutoSimplify(common::AutoSimplify(copy_for_lower_bound)); + copy_for_lower_bound = cinn::common::AutoSimplify( + cinn::common::AutoSimplify(copy_for_lower_bound)); VLOG(6) << "upper_bound before simplify of " << indice_value << " = " << copy_for_upper_bound; - copy_for_upper_bound = - common::AutoSimplify(common::AutoSimplify(copy_for_upper_bound)); + copy_for_upper_bound = cinn::common::AutoSimplify( + cinn::common::AutoSimplify(copy_for_upper_bound)); VLOG(6) << "lower_bound of " << indice_value << " = " << copy_for_lower_bound; VLOG(6) << "upper_bound of " << indice_value << " = " @@ -839,7 +839,7 @@ void StaticShapeGroupScheduler::AllocateStorage() { << indice_value << " = " << indice_copies[i] << ", range = (" << coef_and_ranges[i].second.min << ", " << coef_and_ranges[i].second.max << ")"; - indice_copies[i] = common::AutoSimplify(indice_copies[i]); + indice_copies[i] = cinn::common::AutoSimplify(indice_copies[i]); VLOG(6) << "after simplify [" << i << "], the coefficient of " << indice_value << " = " << indice_copies << ", range = (" << coef_and_ranges[i].second.min << ", " diff --git a/paddle/cinn/ir/group_schedule/st_shape_group_scheduler.h b/paddle/cinn/ir/group_schedule/st_shape_group_scheduler.h index 0187d171b06e7c..be27d59b18f0bb 100644 --- a/paddle/cinn/ir/group_schedule/st_shape_group_scheduler.h +++ b/paddle/cinn/ir/group_schedule/st_shape_group_scheduler.h @@ -46,7 +46,7 @@ class StaticShapeGroupScheduler : public GroupScheduler { StaticShapeGroupScheduler( ir::IRSchedule* ir_sch, const std::unordered_set& output_tensor_names, - const common::Target& target) + const cinn::common::Target& target) : GroupScheduler(ir_sch, output_tensor_names, target) {} void Schedule() override; diff --git a/paddle/cinn/ir/ir.cc b/paddle/cinn/ir/ir.cc index 4b4979bc86169f..b556dad00cb324 100644 --- a/paddle/cinn/ir/ir.cc +++ b/paddle/cinn/ir/ir.cc @@ -29,7 +29,7 @@ namespace cinn { namespace ir { -using common::make_shared; +using cinn::common::make_shared; Expr Cast::Make(Type t, Expr v) { CHECK(!t.is_unk()); @@ -389,7 +389,7 @@ Expr Store::index() const { if (indices.size() == 1) { return indices[0]; } - Expr res = common::IndiceToAbsOffset(tensor_n->shape, indices); + Expr res = cinn::common::IndiceToAbsOffset(tensor_n->shape, indices); optim::Simplify(&res); return res; } @@ -477,7 +477,7 @@ Expr Call::Make(Type type, CHECK(read_args[i].defined()); } - auto node = common::make_shared(type); + auto node = cinn::common::make_shared(type); node->name = name; node->read_args = read_args; node->write_args = write_args; @@ -623,7 +623,7 @@ Expr Load::index() const { if (indices.size() == 1) { return indices[0]; } - Expr res = common::IndiceToAbsOffset(tensor_n->shape, indices); + Expr res = cinn::common::IndiceToAbsOffset(tensor_n->shape, indices); VLOG(3) << "Begin Load::index Simplify"; optim::Simplify(&res); return res; @@ -747,7 +747,7 @@ Expr Reduce::Make(Reduce::ReduceType reduce_type, const std::vector &reduce_aixs) { CHECK(body.defined()); CHECK(init.defined()); - auto n = common::make_shared(); + auto n = cinn::common::make_shared(); n->init = init; n->body = body; n->reduce_type = reduce_type; diff --git a/paddle/cinn/ir/ir.h b/paddle/cinn/ir/ir.h index 9a40d3fb32f0c7..4b510a3b156fb7 100644 --- a/paddle/cinn/ir/ir.h +++ b/paddle/cinn/ir/ir.h @@ -44,8 +44,8 @@ class BufferRange; struct LoweredFunc; class Module; -using common::Object; -using common::Shared; +using cinn::common::Object; +using cinn::common::Shared; // NOTE attr_t only support POD, can not contain Expr or other IR nodes, or the // IRVisitor or IRCopy on PrimitiveNode will result in undefined behavior. using attr_t = absl::variant; diff --git a/paddle/cinn/ir/ir_analyzer/ir_analyzer.cc b/paddle/cinn/ir/ir_analyzer/ir_analyzer.cc index 29c503255c1876..724cca3e6279ce 100644 --- a/paddle/cinn/ir/ir_analyzer/ir_analyzer.cc +++ b/paddle/cinn/ir/ir_analyzer/ir_analyzer.cc @@ -173,7 +173,7 @@ Expr AddUnitLoop(const std::vector& exprs, const Expr& block) { ->schedule_block.As() ->name == block_name) { auto block = ir::Block::Make({GetBlock(exprs, block_name)}); - auto loop = ir::For::Make(ir::Var(common::UniqName("ix")), + auto loop = ir::For::Make(ir::Var(cinn::common::UniqName("ix")), ir::Expr(0), ir::Expr(1), ir::ForType::Serial, @@ -186,7 +186,7 @@ Expr AddUnitLoop(const std::vector& exprs, const Expr& block) { } } else if (visitor.target_->As()) { auto block = ir::Block::Make({visitor.target_->As()->body}); - auto loop = ir::For::Make(ir::Var(common::UniqName("ix")), + auto loop = ir::For::Make(ir::Var(cinn::common::UniqName("ix")), ir::Expr(0), ir::Expr(1), ir::ForType::Serial, @@ -197,7 +197,7 @@ Expr AddUnitLoop(const std::vector& exprs, const Expr& block) { } else if (visitor.target_->As()) { auto block = ir::Block::Make({visitor.target_->As()->body}); - auto loop = ir::For::Make(ir::Var(common::UniqName("ix")), + auto loop = ir::For::Make(ir::Var(cinn::common::UniqName("ix")), ir::Expr(0), ir::Expr(1), ir::ForType::Serial, diff --git a/paddle/cinn/ir/ir_base.h b/paddle/cinn/ir/ir_base.h index b6a94259fbb85f..c333448d029ae0 100644 --- a/paddle/cinn/ir/ir_base.h +++ b/paddle/cinn/ir/ir_base.h @@ -29,12 +29,12 @@ namespace cinn { namespace ir { -using common::BFloat16; -using common::Float; -using common::Float16; -using common::Int; -using common::Type; -using common::type_of; +using cinn::common::BFloat16; +using cinn::common::Float; +using cinn::common::Float16; +using cinn::common::Int; +using cinn::common::Type; +using cinn::common::type_of; class Module; class IRVisitor; @@ -144,7 +144,7 @@ struct Expr; /** * The base of all the nodes in the IR. */ -class IrNode : public common::Object { +class IrNode : public cinn::common::Object { public: //! The operands of this operator. std::vector operands; @@ -177,7 +177,7 @@ class IrNode : public common::Object { /** * A handle to store any IRNode. */ -class IrNodeRef : public common::Shared { +class IrNodeRef : public cinn::common::Shared { public: IrNodeRef() = default; IrNodeRef(const IrNodeRef& other) : Shared(other.p_) {} diff --git a/paddle/cinn/ir/ir_printer.cc b/paddle/cinn/ir/ir_printer.cc index 25ac1daca49e70..b8676cb078960a 100644 --- a/paddle/cinn/ir/ir_printer.cc +++ b/paddle/cinn/ir/ir_printer.cc @@ -28,8 +28,8 @@ namespace cinn { namespace ir { -using common::bfloat16; -using common::float16; +using cinn::common::bfloat16; +using cinn::common::float16; void IrPrinter::Print(const Expr &e) { IRVisitorRequireReImpl::Visit(&e); diff --git a/paddle/cinn/ir/lowered_func.cc b/paddle/cinn/ir/lowered_func.cc index 13f0fe9eaabc9b..129fc5d6e32782 100644 --- a/paddle/cinn/ir/lowered_func.cc +++ b/paddle/cinn/ir/lowered_func.cc @@ -33,8 +33,8 @@ namespace cinn { namespace ir { -using common::bfloat16; -using common::float16; +using cinn::common::bfloat16; +using cinn::common::float16; const _LoweredFunc_* LoweredFunc::operator->() const { return As<_LoweredFunc_>(); @@ -171,7 +171,7 @@ std::vector _LoweredFunc_::PrepareCreateTempBufferExprs() const { auto expr = ir::intrinsics::BufferCreate::Make(temp_buf); auto buffer_ptr_type = Type() - .set_customized_type(common::customized_type::kbuffer_t) + .set_customized_type(cinn::common::customized_type::kbuffer_t) .set_cpp_handle(); Var variable = ir::_Var_::Make(temp_buf->name, buffer_ptr_type); expr = ir::Let::Make(variable, expr); @@ -301,7 +301,7 @@ void _LoweredFunc_::PrepareArgumentExprs() { // type of `cinn_buffer_t*` auto buffer_ptr_type = Type() - .set_customized_type(common::customized_type::kbuffer_t) + .set_customized_type(cinn::common::customized_type::kbuffer_t) .set_cpp_handle(); // type of `const cinn_buffer_t*` auto const_buffer_ptr_type = buffer_ptr_type.with_cpp_const(); @@ -309,13 +309,13 @@ void _LoweredFunc_::PrepareArgumentExprs() { Var args_passed_in("_args", type_of()); auto pod_value_ptr = - common::CastIfNeeded(args_passed_in, type_of()); + cinn::common::CastIfNeeded(args_passed_in, type_of()); if (FLAGS_cinn_runtime_display_debug_info) { argument_prepare_exprs.push_back(runtime::IntrinsicCall( Void(), runtime::intrinsic::print_debug_args_repr, - {pod_value_ptr, common::make_const(Int(32), args.size())})); + {pod_value_ptr, cinn::common::make_const(Int(32), args.size())})); } /* @@ -333,7 +333,7 @@ void _LoweredFunc_::PrepareArgumentExprs() { // cast arg to cinn_pod_value_t* // something like `_args[0]` - Expr load_expr = Load::Make(pod_value_ptr, {common::make_const(i)}); + Expr load_expr = Load::Make(pod_value_ptr, {cinn::common::make_const(i)}); CHECK_EQ(load_expr.type(), type_of()); load_expr = ir::intrinsics::GetAddr::Make(load_expr); diff --git a/paddle/cinn/ir/module.h b/paddle/cinn/ir/module.h index 6d122a2b8d764d..fad8377e6b0158 100644 --- a/paddle/cinn/ir/module.h +++ b/paddle/cinn/ir/module.h @@ -36,7 +36,7 @@ class Module : public ir::IrNodeRef { public: struct Builder { Builder(const std::string& name, const Target& target) - : module_(common::make_shared()) { + : module_(cinn::common::make_shared()) { module_->name = name; module_->target = target; } diff --git a/paddle/cinn/ir/op/ir_operators.cc b/paddle/cinn/ir/op/ir_operators.cc index 69bdea2378a747..fcb0e19a6bb95a 100644 --- a/paddle/cinn/ir/op/ir_operators.cc +++ b/paddle/cinn/ir/op/ir_operators.cc @@ -82,9 +82,9 @@ Expr operator|(Expr a, Expr b) { } } auto target = cinn::runtime::CurrentTarget::GetCurrentTarget(); - if (target.arch == common::Target::Arch::X86) { + if (target.arch == cinn::common::Target::Arch::X86) { return lang::CallExtern("bitwise_or", {a, b}, {{"vectorizable", false}}); - } else if (target.arch == common::Target::Arch::NVGPU) { + } else if (target.arch == cinn::common::Target::Arch::NVGPU) { auto func_name = hlir::GetExternFuncName(target, t_a, "bitwise_or"); return lang::CallExtern(func_name, {a, b}, {{"vectorizable", false}}); } else { @@ -105,9 +105,9 @@ Expr operator&(Expr a, Expr b) { } } auto target = cinn::runtime::CurrentTarget::GetCurrentTarget(); - if (target.arch == common::Target::Arch::X86) { + if (target.arch == cinn::common::Target::Arch::X86) { return lang::CallExtern("bitwise_and", {a, b}, {{"vectorizable", false}}); - } else if (target.arch == common::Target::Arch::NVGPU) { + } else if (target.arch == cinn::common::Target::Arch::NVGPU) { auto func_name = hlir::GetExternFuncName(target, t_a, "bitwise_and"); return lang::CallExtern(func_name, {a, b}, {{"vectorizable", false}}); } else { @@ -129,9 +129,9 @@ Expr operator^(Expr a, Expr b) { } } auto target = cinn::runtime::CurrentTarget::GetCurrentTarget(); - if (target.arch == common::Target::Arch::X86) { + if (target.arch == cinn::common::Target::Arch::X86) { return lang::CallExtern("bitwise_xor", {a, b}, {{"vectorizable", false}}); - } else if (target.arch == common::Target::Arch::NVGPU) { + } else if (target.arch == cinn::common::Target::Arch::NVGPU) { auto func_name = hlir::GetExternFuncName(target, t_a, "bitwise_xor"); return lang::CallExtern(func_name, {a, b}, {{"vectorizable", false}}); } else { @@ -143,9 +143,9 @@ Expr operator^(Expr a, Expr b) { Expr operator~(Expr a) { CHECK(a.type().is_int() || a.type().is_uint()); auto target = cinn::runtime::CurrentTarget::GetCurrentTarget(); - if (target.arch == common::Target::Arch::X86) { + if (target.arch == cinn::common::Target::Arch::X86) { return lang::CallExtern("bitwise_not", {a}, {{"vectorizable", false}}); - } else if (target.arch == common::Target::Arch::NVGPU) { + } else if (target.arch == cinn::common::Target::Arch::NVGPU) { auto func_name = hlir::GetExternFuncName(target, a->type(), "bitwise_not"); return lang::CallExtern(func_name, {a}, {{"vectorizable", false}}); } else { diff --git a/paddle/cinn/ir/operation.cc b/paddle/cinn/ir/operation.cc index 6a6b6a3107c7d5..f6ceb45964ba81 100644 --- a/paddle/cinn/ir/operation.cc +++ b/paddle/cinn/ir/operation.cc @@ -62,7 +62,7 @@ Operation ComputeOp::Make(const std::string &name, n->reduce_axis = reduce_axis; n->tag = tag; n->attrs = attrs; - n->axis = common::GenDefaultAxis(domain.size()); + n->axis = cinn::common::GenDefaultAxis(domain.size()); std::vector tmp_axis; for (auto &x : n->axis) { tmp_axis.push_back(x); diff --git a/paddle/cinn/ir/schedule/factorize_reduction.h b/paddle/cinn/ir/schedule/factorize_reduction.h index 4075feb93599e0..82ad269e0750ae 100644 --- a/paddle/cinn/ir/schedule/factorize_reduction.h +++ b/paddle/cinn/ir/schedule/factorize_reduction.h @@ -33,7 +33,7 @@ namespace ir { Tensor CreateRFTensor(const Tensor& original_tensor, const Expr& rf_loop, int rf_axis) { - std::string name = common::UniqName(original_tensor->name + "_rf"); + std::string name = cinn::common::UniqName(original_tensor->name + "_rf"); std::vector new_shape = original_tensor->shape; new_shape.insert(new_shape.begin() + rf_axis, rf_loop.As()->extent); Tensor rf_tensor = _Tensor_::Make(name, diff --git a/paddle/cinn/ir/schedule/impl/base.cc b/paddle/cinn/ir/schedule/impl/base.cc index f8cdd8f7279148..63f7d252ab2ab1 100644 --- a/paddle/cinn/ir/schedule/impl/base.cc +++ b/paddle/cinn/ir/schedule/impl/base.cc @@ -363,7 +363,7 @@ std::vector StScheduleImpl::SamplePerfectTile( CHECK_GE(n, 2) << "The number of tile factors should be at least 2"; CHECK_GE(max_innermost_factor, 1) << "The max innermost factor should be at least 1"; - CHECK(common::is_zero(loop.As()->min)) + CHECK(cinn::common::is_zero(loop.As()->min)) << "The For loop should start from 0"; int loop_extent = GetLoopExtent(loop); std::vector innermost_factors; diff --git a/paddle/cinn/ir/schedule/impl/for_type.cc b/paddle/cinn/ir/schedule/impl/for_type.cc index 0e5bf07d9fb880..63ad5b888d0ea6 100644 --- a/paddle/cinn/ir/schedule/impl/for_type.cc +++ b/paddle/cinn/ir/schedule/impl/for_type.cc @@ -90,7 +90,8 @@ void StScheduleImpl::Bind(const Expr& loop, const std::string& thread_axis) { << "thread_axis " << thread_axis << " is not supported"; int offset = thread_axis.back() - 'x'; auto cur_dev_info = - common::DevInfoMgr::GetDevInfo(0); + cinn::common::DevInfoMgr::GetDevInfo( + 0); const std::array kMaxBlockDims = cur_dev_info->GetMaxBlockDims(); const std::array kMaxGridDims = cur_dev_info->GetMaxGridDims(); auto check_offset = [&](const char& c) -> bool { diff --git a/paddle/cinn/ir/schedule/impl/loop_transformation.cc b/paddle/cinn/ir/schedule/impl/loop_transformation.cc index c628382c1f7009..f49ffb46b73acc 100644 --- a/paddle/cinn/ir/schedule/impl/loop_transformation.cc +++ b/paddle/cinn/ir/schedule/impl/loop_transformation.cc @@ -83,7 +83,7 @@ std::vector StScheduleImpl::Split(const Expr& loop, CHECK(loop.As()) << "Expr param of Split must be For node! Please check."; auto* for_node = loop.As(); - CHECK(common::is_zero(for_node->min)) + CHECK(cinn::common::is_zero(for_node->min)) << "The For node must start with 0! Please check."; CHECK(for_node->extent.is_constant()) << "The For node's extent must be constant! Please check."; @@ -105,12 +105,12 @@ std::vector StScheduleImpl::Split(const Expr& loop, std::vector new_loop_vars; Expr substitute_value(0); for (int i = 0; i < processed_factors.size(); ++i) { - Var temp_var(common::UniqName(for_node->loop_var->name)); + Var temp_var(cinn::common::UniqName(for_node->loop_var->name)); substitute_value = Expr(temp_var) + substitute_value * Expr(processed_factors[i]); new_loop_vars.push_back(temp_var); } - substitute_value = common::AutoSimplify(substitute_value); + substitute_value = cinn::common::AutoSimplify(substitute_value); Expr new_node = ir::ir_utils::IRCopy(for_node->body); ReplaceExpr(&new_node, {for_node->loop_var}, {substitute_value}); std::vector splited_loops; @@ -181,7 +181,7 @@ Expr StScheduleImpl::Fuse(const std::vector& loops) { for (int i = 0; i < loops_number; ++i) { fused_extent = fused_extent * for_nodes[i]->extent; } - fused_extent = common::AutoSimplify(fused_extent); + fused_extent = cinn::common::AutoSimplify(fused_extent); if (!fused_body.As()) fused_body = Block::Make({fused_body}); Expr new_stmt = For::Make(fused_var, diff --git a/paddle/cinn/ir/schedule/ir_schedule_util.cc b/paddle/cinn/ir/schedule/ir_schedule_util.cc index db378eba741945..ac9f609e0c20fc 100644 --- a/paddle/cinn/ir/schedule/ir_schedule_util.cc +++ b/paddle/cinn/ir/schedule/ir_schedule_util.cc @@ -72,7 +72,7 @@ Tensor GetReadTensor(const Expr& block, int index) { int GetLoopExtent(const Expr& loop) { CHECK(loop.As()); - CHECK(common::is_zero(loop.As()->min)); + CHECK(cinn::common::is_zero(loop.As()->min)); CHECK(loop.As()->extent.is_constant()); return static_cast(loop.As()->extent.get_constant()); } @@ -92,7 +92,7 @@ void SetCudaAxisInfo(Expr* lowered_func) { auto bind_info = x->As()->bind_info(); info.set_valid(true); if (bind_info.for_type == ForType::GPUThread) { - CHECK(common::is_zero(x->As()->min)); + CHECK(cinn::common::is_zero(x->As()->min)); CHECK(x->As()->extent.is_constant()); int range = x->As()->extent.get_constant(); range = range > info.block_dim(bind_info.offset) @@ -102,7 +102,7 @@ void SetCudaAxisInfo(Expr* lowered_func) { << range; info.set_block_dim(bind_info.offset, range); } else if (bind_info.for_type == ForType::GPUBlock) { - CHECK(common::is_zero(x->As()->min)); + CHECK(cinn::common::is_zero(x->As()->min)); CHECK(x->As()->extent.is_constant()); int range = x->As()->extent.get_constant(); range = range > info.grid_dim(bind_info.offset) @@ -362,8 +362,8 @@ IterRange GetAccessedRange(const Expr& index, ReplaceExpr(&indice_min, iter_vars, var_mins); ReplaceExpr(&indice_max, iter_vars, var_maxs); // simplify expression - indice_min = common::AutoSimplify(indice_min); - indice_max = common::AutoSimplify(indice_max); + indice_min = cinn::common::AutoSimplify(indice_min); + indice_max = cinn::common::AutoSimplify(indice_max); Expr indice_extent; Expr mod_extent(0); @@ -371,7 +371,7 @@ IterRange GetAccessedRange(const Expr& index, Expr mod_right_min = indice_min.As()->a(); Expr mod_right_max = indice_max.As()->a(); Expr mod_right_extent = - common::AutoSimplify(mod_right_max - mod_right_min + 1); + cinn::common::AutoSimplify(mod_right_max - mod_right_min + 1); mod_extent = indice_min.As()->b(); if (mod_right_extent.get_constant() < mod_extent.get_constant()) { mod_extent = mod_right_extent; @@ -379,15 +379,16 @@ IterRange GetAccessedRange(const Expr& index, } if (indice_min == indice_max) { - if (common::is_zero(mod_extent)) { + if (cinn::common::is_zero(mod_extent)) { // If a index keeps constant, its extent should be 1. indice_extent = Expr(1); } else { indice_extent = mod_extent; } } else { - indice_extent = common::AutoSimplify(common::AutoSimplify(indice_max) - - common::AutoSimplify(indice_min) + 1); + indice_extent = + cinn::common::AutoSimplify(cinn::common::AutoSimplify(indice_max) - + cinn::common::AutoSimplify(indice_min) + 1); } if (indice_extent.is_constant() && indice_extent.get_constant() < 0) { @@ -500,10 +501,10 @@ Expr MakeCacheBlock(const std::vector& buffer_ranges, // Create loop vars and block vars' binding_value for (const auto& range : buffer_ranges) { Var loop_var( - common::UniqName("cache_ax" + std::to_string(loop_vars.size()))); + cinn::common::UniqName("cache_ax" + std::to_string(loop_vars.size()))); // Var loop_var("ax" + std::to_string(loop_vars.size())); loop_vars.push_back(loop_var); - iter_values.push_back(common::AutoSimplify(range.min + loop_var)); + iter_values.push_back(cinn::common::AutoSimplify(range.min + loop_var)); } // block variables std::vector block_vars; @@ -516,7 +517,7 @@ Expr MakeCacheBlock(const std::vector& buffer_ranges, } auto body = new_tensor->tensor_store_expanded_body(); std::vector axis_vars = - common::GenDefaultAxis(new_tensor->domain.size()); + cinn::common::GenDefaultAxis(new_tensor->domain.size()); axis_vars.insert(axis_vars.end(), new_tensor->reduce_axis.begin(), new_tensor->reduce_axis.end()); @@ -531,7 +532,7 @@ Expr MakeCacheBlock(const std::vector& buffer_ranges, for (int i = static_cast(loop_vars.size()) - 1; i >= 0; i--) { new_body = For::Make(loop_vars[i], Expr(0), - common::AutoSimplify(buffer_ranges[i].extent), + cinn::common::AutoSimplify(buffer_ranges[i].extent), ir::ForType::Serial, device_api, ir::Block::Make({new_body})); @@ -1031,9 +1032,9 @@ void InsertBlock(Expr& for_loop, const Expr& insertion, int index) { // NOLINT } IterRange RangeUnion(const IterRange& range1, const IterRange& range2) { - Expr new_min = common::AutoSimplify(Min::Make(range1.min, range2.min)); - Expr new_extent = common::AutoSimplify( - common::AutoSimplify( + Expr new_min = cinn::common::AutoSimplify(Min::Make(range1.min, range2.min)); + Expr new_extent = cinn::common::AutoSimplify( + cinn::common::AutoSimplify( Max::Make(range1.min + range1.extent, range2.min + range2.extent)) - new_min); return IterRange(new_min, new_extent); diff --git a/paddle/cinn/ir/schedule/ir_schedule_util.h b/paddle/cinn/ir/schedule/ir_schedule_util.h index 9d9b416ae6c159..ecb269ca61d991 100644 --- a/paddle/cinn/ir/schedule/ir_schedule_util.h +++ b/paddle/cinn/ir/schedule/ir_schedule_util.h @@ -476,7 +476,7 @@ struct RfMutator : public ir::IRMutator<> { auto* rf_for = rf_loop_.As(); CHECK(rf_for); CHECK(is_zero(rf_for->min)) << "rfactor loop's min should be zero"; - auto extent = common::AutoSimplify(rf_for->extent); + auto extent = cinn::common::AutoSimplify(rf_for->extent); auto& shape = tensor->shape; auto& domain = tensor->domain; CHECK_LE(rf_axis_, shape.size()) @@ -578,14 +578,14 @@ struct LoopReconstructor : public ir::IRMutator<> { const auto& range = iter_ranges[i]; if (keep_unit_loops || range.extent != Expr(1)) { std::string var_name = - common::UniqName("ax" + std::to_string(loop_vars.size())); + cinn::common::UniqName("ax" + std::to_string(loop_vars.size())); new_var_names.push_back(var_name); Var var(var_name, Int(32)); loop_vars.push_back(var); loop_extents.push_back(range.extent); - iter_values.push_back(common::AutoSimplify(range.min) + var); + iter_values.push_back(cinn::common::AutoSimplify(range.min) + var); } else { - iter_values.push_back(common::AutoSimplify(range.min)); + iter_values.push_back(cinn::common::AutoSimplify(range.min)); } } auto schedule_block_node = diff --git a/paddle/cinn/ir/schedule_block_graph.cc b/paddle/cinn/ir/schedule_block_graph.cc index fc8b73104110d2..e879a15776675b 100644 --- a/paddle/cinn/ir/schedule_block_graph.cc +++ b/paddle/cinn/ir/schedule_block_graph.cc @@ -36,15 +36,15 @@ std::vector ScheduleBlockNode::ControlStmts() const { return ir_sch_.GetLoops(id_); } -bool EdgeCompare(const common::Shared& a, - const common::Shared& b) { +bool EdgeCompare(const cinn::common::Shared& a, + const cinn::common::Shared& b) { CHECK_NOTNULL(a.get()); CHECK_NOTNULL(b.get()); return a->index() < b->index(); } -std::vector> +std::vector> ScheduleBlockNode::OrderedInLinks() const { - std::vector> ordered_links; + std::vector> ordered_links; for (auto& in_edge : this->inlinks()) { ordered_links.push_back(in_edge); CHECK_GE(in_edge->index(), 0) @@ -55,9 +55,9 @@ ScheduleBlockNode::OrderedInLinks() const { return ordered_links; } -std::vector> +std::vector> ScheduleBlockNode::OrderedOutLinks() const { - std::vector> ordered_links; + std::vector> ordered_links; for (auto& out_edge : this->outlinks()) { ordered_links.push_back(out_edge); CHECK_GE(out_edge->index(), 0) @@ -132,7 +132,7 @@ void ScheduleBlockGraph::Update(const IRSchedule& ir_sch) { std::vector ScheduleBlockGraph::StartPoints() { std::vector res; - for (common::GraphNode* node : nodes()) { + for (cinn::common::GraphNode* node : nodes()) { if (node->inlinks().empty()) { res.push_back(dynamic_cast(node)); } @@ -142,7 +142,7 @@ std::vector ScheduleBlockGraph::StartPoints() { std::vector ScheduleBlockGraph::EndPoints() { std::vector res; - for (common::GraphNode* node : nodes()) { + for (cinn::common::GraphNode* node : nodes()) { if (node->outlinks().empty()) { res.push_back(dynamic_cast(node)); } @@ -151,7 +151,7 @@ std::vector ScheduleBlockGraph::EndPoints() { } void ScheduleBlockGraph::NodesWalk(const NodeHandlerType& NodeHandler) { - for (common::GraphNode* node : nodes()) { + for (cinn::common::GraphNode* node : nodes()) { ScheduleBlockNode* cur_node = dynamic_cast(node); NodeHandler(cur_node); } @@ -175,8 +175,8 @@ void ScheduleBlockGraph::DFSTopoWalk(const NodeHandlerType& NodeHandler, NextNodeHandler(next_node); } }; - common::DfsTopoWalker walker(VisitPreNodes, - VisitNextNodes); + cinn::common::DfsTopoWalker walker(VisitPreNodes, + VisitNextNodes); std::vector starts = is_reverse ? EndPoints() : StartPoints(); walker(starts.begin(), starts.end(), NodeHandler); diff --git a/paddle/cinn/ir/schedule_block_graph.h b/paddle/cinn/ir/schedule_block_graph.h index 2ccced20457f19..1cad28951926b3 100644 --- a/paddle/cinn/ir/schedule_block_graph.h +++ b/paddle/cinn/ir/schedule_block_graph.h @@ -28,7 +28,7 @@ namespace cinn { namespace ir { // Node in units of ScheduleBlock. -class ScheduleBlockNode : public common::GraphNode { +class ScheduleBlockNode : public cinn::common::GraphNode { public: ScheduleBlockNode(Expr block, const IRSchedule& ir_sch); @@ -66,8 +66,10 @@ class ScheduleBlockNode : public common::GraphNode { } private: - std::vector> OrderedInLinks() const; - std::vector> OrderedOutLinks() const; + std::vector> OrderedInLinks() + const; + std::vector> OrderedOutLinks() + const; private: std::string id_; @@ -78,7 +80,7 @@ class ScheduleBlockNode : public common::GraphNode { // Graph in units of ScheduleBlockNode, each node corresponds to a ScheduleBlock // in IR. -class ScheduleBlockGraph : public common::Graph { +class ScheduleBlockGraph : public cinn::common::Graph { public: explicit ScheduleBlockGraph(const IRSchedule& ir_sch); @@ -88,7 +90,8 @@ class ScheduleBlockGraph : public common::Graph { // Retrieve a node in the graph by id, the id is same as the name of // ScheduleBlock. ScheduleBlockNode* RetrieveNode(const std::string& id) { - return dynamic_cast(common::Graph::RetrieveNode(id)); + return dynamic_cast( + cinn::common::Graph::RetrieveNode(id)); } // Get all block name in order, diff --git a/paddle/cinn/ir/tensor.cc b/paddle/cinn/ir/tensor.cc index 1576564fcb1232..261db949b997bc 100644 --- a/paddle/cinn/ir/tensor.cc +++ b/paddle/cinn/ir/tensor.cc @@ -208,7 +208,7 @@ PlaceholderOp *_Tensor_::get_placeholder_op() const { void _Tensor_::InitAxis() const { // CHECK(!domain_without_reduce_axis().empty()); - axis_ = common::GenDefaultAxis(domain_without_reduce_axis().size()); + axis_ = cinn::common::GenDefaultAxis(domain_without_reduce_axis().size()); } bool _Tensor_::has_expression() const { @@ -232,7 +232,7 @@ isl::set _Tensor_::GenerateIslDomain() const { } else { dims.emplace_back(_axis_with_reduce[i]->name, Expr(0), - Sub::Make(dim, common::make_const(1))); + Sub::Make(dim, cinn::common::make_const(1))); } } } @@ -410,7 +410,7 @@ Expr _Tensor_::tensor_store_expanded_body() { Expr final_body = body(); if (shape.empty()) return final_body; - std::vector g_axis = common::GenDefaultAxisAsExpr(shape.size()); + std::vector g_axis = cinn::common::GenDefaultAxisAsExpr(shape.size()); if (!new_indices.empty()) { g_axis = new_indices; } @@ -472,7 +472,7 @@ void _Tensor_::Bind(const Buffer &buffer) { void _Tensor_::WithBuffer(const Type &type) { Type buf_type = type.is_void() ? type_ : type; lang::Buffer buf(buf_type); - buf->target = common::DefaultHostTarget(); + buf->target = cinn::common::DefaultHostTarget(); Bind(buf); } @@ -494,7 +494,7 @@ void _Tensor_::WithBuffer(const std::string &memory_type, } } else { lang::Buffer buf(buf_type, buffer_name); - buf->target = common::DefaultHostTarget(); + buf->target = cinn::common::DefaultHostTarget(); Bind(buf); if (memory_type == "shared") { @@ -513,8 +513,8 @@ bool _Tensor_::HasSameShapeWith(const Tensor &other) const { if (shape.size() != other->shape.size()) return false; for (int i = 0; i < shape.size(); i++) { - Expr dim0 = common::AutoSimplify(shape[i]); - Expr dim1 = common::AutoSimplify(other->shape[i]); + Expr dim0 = cinn::common::AutoSimplify(shape[i]); + Expr dim1 = cinn::common::AutoSimplify(other->shape[i]); if (dim0 != dim1) return false; } diff --git a/paddle/cinn/ir/tensor.h b/paddle/cinn/ir/tensor.h index c0cd53ec2349d7..d9047e01cee9c2 100644 --- a/paddle/cinn/ir/tensor.h +++ b/paddle/cinn/ir/tensor.h @@ -306,7 +306,7 @@ class _Tensor_ : public ExprNode<_Tensor_> { const Type& type = Void()); Tensor GetInitTensor( poly::StageMap stages, - const Target& target = common::DefaultHostTarget()) const; + const Target& target = cinn::common::DefaultHostTarget()) const; /** * Create the initialization tensor. @@ -316,7 +316,7 @@ class _Tensor_ : public ExprNode<_Tensor_> { */ ir::Tensor InitReduction( poly::StageMap stages, - const Target& target = common::DefaultHostTarget()) const; + const Target& target = cinn::common::DefaultHostTarget()) const; private: //! Initialize the axis field after the shape field is assigned. diff --git a/paddle/cinn/ir/test/ir_compare_test.cc b/paddle/cinn/ir/test/ir_compare_test.cc index cc9ce438221a2e..bb1c6eb46866cd 100644 --- a/paddle/cinn/ir/test/ir_compare_test.cc +++ b/paddle/cinn/ir/test/ir_compare_test.cc @@ -25,7 +25,7 @@ namespace cinn { namespace ir { namespace ir_utils { TEST(TestIrCompare, SingleFunction) { - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); ir::Expr M(32); ir::Expr N(32); diff --git a/paddle/cinn/ir/test/schedule_block_graph_test.cc b/paddle/cinn/ir/test/schedule_block_graph_test.cc index 78c809dc117d46..3190cec2bc2f10 100644 --- a/paddle/cinn/ir/test/schedule_block_graph_test.cc +++ b/paddle/cinn/ir/test/schedule_block_graph_test.cc @@ -27,17 +27,16 @@ namespace ir { IRSchedule MakeIRSchedule(frontend::Program* program) { #ifdef CINN_WITH_CUDA - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); #else - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); #endif std::unordered_set fetch_ids; auto graph = frontend::Optimize(program, fetch_ids, target); LOG_IF(WARNING, graph->fusion_groups.size() > 1) << "Test Graph has more than 1 group"; - auto& dtype_dict = - graph->GetMutableAttrs>( - "inferdtype"); + auto& dtype_dict = graph->GetMutableAttrs< + absl::flat_hash_map>("inferdtype"); auto& shape_dict = graph->GetMutableAttrs< absl::flat_hash_map>("infershape"); auto op_lowerer = diff --git a/paddle/cinn/ir/test/schedule_desc_test.cc b/paddle/cinn/ir/test/schedule_desc_test.cc index b360f021d6b2c5..dcd8b90ef120dd 100644 --- a/paddle/cinn/ir/test/schedule_desc_test.cc +++ b/paddle/cinn/ir/test/schedule_desc_test.cc @@ -121,7 +121,7 @@ std::string SourceCodeGen(const ModuleExpr& module_expr, class TestScheduleDesc : public ::testing::Test { public: - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); std::vector lowered_funcs; ScheduleDesc trace; void SetUp() override { Context::Global().ResetNameId(); } diff --git a/paddle/cinn/ir/test/st_shape_group_scheduler_test.cc b/paddle/cinn/ir/test/st_shape_group_scheduler_test.cc index 22f64849a8f7a2..37f084c436543f 100644 --- a/paddle/cinn/ir/test/st_shape_group_scheduler_test.cc +++ b/paddle/cinn/ir/test/st_shape_group_scheduler_test.cc @@ -30,7 +30,7 @@ using frontend::RunDecomposer; void Compile(NetBuilder* net_builder) { auto program = net_builder->Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); RunDecomposer(&program, target); auto graph = std::make_shared(program, target); @@ -68,7 +68,7 @@ void CheckAccuracy(NetBuilder* net_builder, const std::vector& input_names) { FLAGS_cinn_new_group_scheduler = true; auto program = net_builder->Build(); - auto target = common::DefaultTarget(); + auto target = cinn::common::DefaultTarget(); auto graph = std::make_shared(program, target); hlir::framework::ApplyPasses(graph.get(), diff --git a/paddle/cinn/ir/test/tensor_test.cc b/paddle/cinn/ir/test/tensor_test.cc index 26e3ce6a851fc0..cea1263f2aba38 100644 --- a/paddle/cinn/ir/test/tensor_test.cc +++ b/paddle/cinn/ir/test/tensor_test.cc @@ -95,10 +95,10 @@ TEST(Tensor, Reshape) { auto func = lang::Lower("fn", stages, {A, B}); - ir::Module::Builder builder("some_modue", common::DefaultHostTarget()); + ir::Module::Builder builder("some_modue", cinn::common::DefaultHostTarget()); builder.AddFunction(func); - backends::CodeGenC codegenc(common::DefaultHostTarget()); + backends::CodeGenC codegenc(cinn::common::DefaultHostTarget()); codegenc.SetInlineBuiltinCodes(false); auto source = codegenc.Compile(builder.Build(), CodeGenC::OutputKind::CImpl); LOG(INFO) << "source:\n" << source; @@ -144,10 +144,10 @@ TEST(Tensor, ReshapeCopied) { stages->InsertLazily(B); - ir::Module::Builder builder("some_modue", common::DefaultHostTarget()); + ir::Module::Builder builder("some_modue", cinn::common::DefaultHostTarget()); auto func = lang::Lower("fn", stages, {A, B}, {}, {}, &builder); - backends::CodeGenC codegenc(common::DefaultHostTarget()); + backends::CodeGenC codegenc(cinn::common::DefaultHostTarget()); codegenc.SetInlineBuiltinCodes(false); auto source = codegenc.Compile(builder.Build(), CodeGenC::OutputKind::CImpl); LOG(INFO) << "source:\n" << source; diff --git a/paddle/cinn/ir/utils/ir_copy.cc b/paddle/cinn/ir/utils/ir_copy.cc index a47150d6ab2aaf..08dc2bc1e628cd 100644 --- a/paddle/cinn/ir/utils/ir_copy.cc +++ b/paddle/cinn/ir/utils/ir_copy.cc @@ -52,7 +52,7 @@ struct IRCopyVisitor : public ir::IRVisitorRequireReImpl { return Expr(make_shared(op->type(), op->value)); } Expr Visit(const ir::StringImm* op) override { - return Expr(common::make_shared(op->value)); + return Expr(cinn::common::make_shared(op->value)); } Expr Visit(const ir::Cast* op) override { @@ -367,7 +367,7 @@ struct IRCopyVisitor : public ir::IRVisitorRequireReImpl { arguments.push_back(Visit(args)); } - auto n = common::make_shared(); + auto n = cinn::common::make_shared(); n->name = op->name; n->attrs = op->attrs; // attrs are PODs n->arguments = arguments; diff --git a/paddle/cinn/lang/buffer.cc b/paddle/cinn/lang/buffer.cc index 864adfb165cade..83bdfef63217ec 100644 --- a/paddle/cinn/lang/buffer.cc +++ b/paddle/cinn/lang/buffer.cc @@ -29,7 +29,7 @@ Buffer::Buffer(Type type, const std::string& name) { if (!name.empty()) { buffer_->name = name; } - buffer_->target = common::DefaultHostTarget(); + buffer_->target = cinn::common::DefaultHostTarget(); } } // namespace lang diff --git a/paddle/cinn/lang/builtin.cc b/paddle/cinn/lang/builtin.cc index 3e7ef7390cf7e8..b50a49096847b5 100644 --- a/paddle/cinn/lang/builtin.cc +++ b/paddle/cinn/lang/builtin.cc @@ -127,10 +127,10 @@ Expr FloorDivide(Expr a, Expr b) { } else { auto div = a / b; auto mod = a % b; - auto ret = - ir::Select::Make(ir::EQ::Make(mod, common::make_const(a.type(), 0)), - div, - div - common::make_const(a.type(), 1)); + auto ret = ir::Select::Make( + ir::EQ::Make(mod, cinn::common::make_const(a.type(), 0)), + div, + div - cinn::common::make_const(a.type(), 1)); return ir::Select::Make((a > 0 && b > 0) || (a < 0 && b < 0), div, ret); } } @@ -227,11 +227,11 @@ Expr Abs(Expr e) { Expr IsNan(Expr e) { Type type = e->type(); if (type.is_int() || type.is_uint()) { - return common::make_bool(false, type.lanes()); + return cinn::common::make_bool(false, type.lanes()); } else if (type.is_float()) { auto* node = e.As(); if (node) { - return common::make_bool(std::isnan(node->value), type.lanes()); + return cinn::common::make_bool(std::isnan(node->value), type.lanes()); } return CallExtern("isnan", {e}, {{"vectorizable", false}}); } else { @@ -258,11 +258,11 @@ Expr Infinity(const Type& type) { Expr IsInf(Expr e) { Type type = e->type(); if (type.is_int() || type.is_uint()) { - return common::make_bool(false, type.lanes()); + return cinn::common::make_bool(false, type.lanes()); } else if (type.is_float()) { auto* node = e.As(); if (node) { - return common::make_bool(std::isinf(node->value), type.lanes()); + return cinn::common::make_bool(std::isinf(node->value), type.lanes()); } return CallExtern("isinf", {e}, {{"vectorizable", false}}); } else { diff --git a/paddle/cinn/lang/compute.cc b/paddle/cinn/lang/compute.cc index a81ea059cc3fa7..4828eaac64e13c 100644 --- a/paddle/cinn/lang/compute.cc +++ b/paddle/cinn/lang/compute.cc @@ -128,7 +128,7 @@ ir::Tensor Compute(const std::vector &domain, std::function &)> fn, const std::string &name, const std::vector &shape) { - auto axises = common::GenDefaultAxis(domain.size()); + auto axises = cinn::common::GenDefaultAxis(domain.size()); std::vector _axis; for (auto &x : axises) _axis.push_back(x); Expr fn_body = fn(_axis); @@ -172,7 +172,7 @@ ir::Tensor Compute(const std::vector &domain, // check reduce_axis not include the reserved axis name for (auto &ra : reduce_axis) { - CHECK(!common::IsAxisNameReserved(ra->name)) + CHECK(!cinn::common::IsAxisNameReserved(ra->name)) << "reduce axis [" << ra->name << "]'s name is reserved"; } diff --git a/paddle/cinn/lang/lower.cc b/paddle/cinn/lang/lower.cc index c509a1977555f2..d53a9e4d5dbe19 100644 --- a/paddle/cinn/lang/lower.cc +++ b/paddle/cinn/lang/lower.cc @@ -307,7 +307,7 @@ std::vector LowerToAstVec( target); std::vector result = lower_instance(); for (auto& res : result) { - if (target == common::DefaultNVGPUTarget()) { + if (target == cinn::common::DefaultNVGPUTarget()) { res->device_api = ir::DeviceAPI::GPU; } } @@ -352,7 +352,7 @@ ir::LoweredFunc Lower(const std::string& name, break; } } - if (target == common::DefaultNVGPUTarget()) { + if (target == cinn::common::DefaultNVGPUTarget()) { res->device_api = ir::DeviceAPI::GPU; } } @@ -406,7 +406,7 @@ std::vector LowerVec(const std::string& name, } } - if (target == common::DefaultNVGPUTarget()) { + if (target == cinn::common::DefaultNVGPUTarget()) { res->device_api = ir::DeviceAPI::GPU; } } diff --git a/paddle/cinn/lang/lower.h b/paddle/cinn/lang/lower.h index b3f27129778b9c..d4793fb27ca977 100644 --- a/paddle/cinn/lang/lower.h +++ b/paddle/cinn/lang/lower.h @@ -50,7 +50,7 @@ ir::LoweredFunc Lower(const std::string &name, const std::vector &scalar_args = {}, const std::vector &temp_tensors = {}, ir::Module::Builder *b = nullptr, - const Target &target = common::DefaultHostTarget(), + const Target &target = cinn::common::DefaultHostTarget(), bool support_ir_schedule = false); /** @@ -71,19 +71,20 @@ std::vector LowerVec( const std::vector &scalar_args = {}, const std::vector &temp_tensors = {}, ir::Module::Builder *b = nullptr, - const Target &target = common::DefaultHostTarget(), + const Target &target = cinn::common::DefaultHostTarget(), bool support_ir_schedule = false); -ir::LoweredFunc LowerToAst(const std::string &name, - const std::vector &tensor_args, - ast_gen_ius::TensorGroup *tensor_group, - const Target &target = common::DefaultHostTarget()); +ir::LoweredFunc LowerToAst( + const std::string &name, + const std::vector &tensor_args, + ast_gen_ius::TensorGroup *tensor_group, + const Target &target = cinn::common::DefaultHostTarget()); std::vector LowerToAstVec( const std::string &name, const std::vector &tensor_args, ast_gen_ius::TensorGroup *tensor_group, - const Target &target = common::DefaultHostTarget()); + const Target &target = cinn::common::DefaultHostTarget()); std::vector GetTempBuffers( const std::vector &tensor_args, diff --git a/paddle/cinn/lang/lower_impl.cc b/paddle/cinn/lang/lower_impl.cc index 9b3b6d7ebb746b..f19ff767cece62 100644 --- a/paddle/cinn/lang/lower_impl.cc +++ b/paddle/cinn/lang/lower_impl.cc @@ -208,12 +208,12 @@ std::string CompuGraphNode::id() const { * @param t The tensor. * @param stages The stage map. */ -void CreateCompGraphWithInlineTensors(common::Graph* graph, +void CreateCompGraphWithInlineTensors(cinn::common::Graph* graph, const ir::Tensor& t, StageMap stages, std::set* visited) { if (visited->count(t)) return; - common::GraphNode* t_node = graph->RetrieveNode(t->name); + cinn::common::GraphNode* t_node = graph->RetrieveNode(t->name); if (!t_node) { t_node = graph->RegisterNode(t->name, new CompuGraphNode(t)); } @@ -239,10 +239,10 @@ void CreateCompGraphWithInlineTensors(common::Graph* graph, } } -std::unique_ptr CreateCompGraphWithInlineTensorHidden( +std::unique_ptr CreateCompGraphWithInlineTensorHidden( const std::vector& tensors, StageMap stages) { // create a graph with inline tensor first. - std::unique_ptr graph(new common::Graph); + std::unique_ptr graph(new cinn::common::Graph); std::set visited; for (auto& t : tensors) { CreateCompGraphWithInlineTensors(graph.get(), t, stages, &visited); @@ -251,9 +251,9 @@ std::unique_ptr CreateCompGraphWithInlineTensorHidden( // greedy remove the inline tensor, each time merge the inputs of an inline // tensor to its sink node. - std::set inline_nodes; + std::set inline_nodes; do { - inline_nodes = graph->CollectNodes([&](const common::GraphNode* x) { + inline_nodes = graph->CollectNodes([&](const cinn::common::GraphNode* x) { auto* comp_node = x->safe_as(); return stages[comp_node->tensor]->inlined(); }); @@ -295,7 +295,7 @@ std::unique_ptr CreateCompGraphWithInlineTensorHidden( return graph; } -void CompuGraphAddCtrlDepLinks(common::Graph* graph, StageMap stages) { +void CompuGraphAddCtrlDepLinks(cinn::common::Graph* graph, StageMap stages) { for (auto& x : graph->nodes()) { auto* node = x->safe_as(); CHECK(node); @@ -309,14 +309,14 @@ void CompuGraphAddCtrlDepLinks(common::Graph* graph, StageMap stages) { } } -std::unique_ptr CreateCompGraph( +std::unique_ptr CreateCompGraph( const std::vector& tensors, StageMap stages, bool hide_inline) { if (hide_inline) { auto graph = CreateCompGraphWithInlineTensorHidden(tensors, stages); CompuGraphAddCtrlDepLinks(graph.get(), stages); return graph; } else { - auto graph = std::make_unique(); + auto graph = std::make_unique(); std::set visited; for (auto& t : tensors) { CreateCompGraphWithInlineTensors(graph.get(), t, stages, &visited); @@ -559,7 +559,7 @@ std::vector LowerImpl::operator()() { func_iterator = ir::ScheduleBlockRealize::Make( {}, ir::ScheduleBlock::Make( - {}, {}, {}, common::UniqName("root"), func_iterator)); + {}, {}, {}, cinn::common::UniqName("root"), func_iterator)); } std::set temp_tensor_names; for (auto& t : temp_tensor_args_) temp_tensor_names.insert(t->name); @@ -609,7 +609,7 @@ std::vector LowerImpl::operator()() { std::unordered_set buffer_name_set; // TODO(Superjomn) write buffer latter. - if (target_ == common::DefaultNVGPUTarget()) { + if (target_ == cinn::common::DefaultNVGPUTarget()) { for (auto& t : new_temp_tensors) { if (!tensor_map.count(t->name)) continue; auto& tt = tensor_map.at(t->name); @@ -630,7 +630,7 @@ std::vector LowerImpl::operator()() { } ir::LoweredFunc func; - if (target_ == common::DefaultNVGPUTarget()) { + if (target_ == cinn::common::DefaultNVGPUTarget()) { auto func_args2 = GenFuncArgForSplitKernel(func_iterator, new_temp_tensors); std::string new_fn_name = fn_name_; @@ -745,7 +745,7 @@ std::vector LowerImpl::GenerateFunctionBody( for (auto& var : tensor->reduce_axis) { CHECK(var->lower_bound.defined()); CHECK(var->upper_bound.defined()); - CHECK(common::is_zero(var->lower_bound)); + CHECK(cinn::common::is_zero(var->lower_bound)); CHECK(var->upper_bound.is_constant()); int_shape.push_back( static_cast(var->upper_bound.get_constant())); @@ -754,7 +754,7 @@ std::vector LowerImpl::GenerateFunctionBody( std::vector block_vars; std::vector iter_values; std::vector axis_vars = - common::GenDefaultAxis(tensor->shape.size()); + cinn::common::GenDefaultAxis(tensor->shape.size()); // bind var_values axis_vars.insert(axis_vars.end(), tensor->reduce_axis.begin(), @@ -779,7 +779,7 @@ std::vector LowerImpl::GenerateFunctionBody( ir::ScheduleBlock::Make( block_vars, {}, {}, tensor->name, store_body)); // iter_values, ir::ScheduleBlock::Make(block_vars, {}, {}, - // common::UniqName(tensor->name), store_body)); + // cinn::common::UniqName(tensor->name), store_body)); VLOG(3) << "store body\n" << store_body; } tuple_to_expr[tensor->name] = store_body; @@ -795,7 +795,7 @@ std::vector LowerImpl::GenerateFunctionBody( if (group_expr.defined()) { cuda_axis_info_.emplace_back(std::move(temp_cuda_axis_info)); - if (target_ == common::DefaultNVGPUTarget() && !all_temp_tensor) { + if (target_ == cinn::common::DefaultNVGPUTarget() && !all_temp_tensor) { exprs.push_back(group_expr); Expr body = ir::Block::Make(exprs); result.push_back(body); @@ -805,7 +805,7 @@ std::vector LowerImpl::GenerateFunctionBody( } } } - if (target_ == common::DefaultHostTarget()) { + if (target_ == cinn::common::DefaultHostTarget()) { Expr body = ir::Block::Make(exprs); result.push_back(body); exprs.clear(); diff --git a/paddle/cinn/lang/lower_impl.h b/paddle/cinn/lang/lower_impl.h index 208a57c868bd74..b5f82ba7312e67 100644 --- a/paddle/cinn/lang/lower_impl.h +++ b/paddle/cinn/lang/lower_impl.h @@ -75,7 +75,7 @@ Expr LowerGroup(const poly::ScheduleGroup& group, /** * A Computation graph node. */ -struct CompuGraphNode : public common::GraphNode { +struct CompuGraphNode : public cinn::common::GraphNode { explicit CompuGraphNode(ir::Tensor tensor) : tensor(tensor) {} ir::Tensor tensor; @@ -94,7 +94,7 @@ struct CompuGraphNode : public common::GraphNode { * @param hide_inline hide inline tensor nodes. * @return a graph. */ -std::unique_ptr CreateCompGraph( +std::unique_ptr CreateCompGraph( const std::vector& tensors, StageMap stages, bool hide_inline = false); @@ -114,7 +114,7 @@ class LowerImpl { const std::vector& tensor_args, const std::vector& scalar_args, const std::vector& temp_tensor_args = {}, - const Target& target = common::DefaultHostTarget(), + const Target& target = cinn::common::DefaultHostTarget(), bool support_ir_schedule = false); std::vector operator()(); @@ -122,7 +122,7 @@ class LowerImpl { /** * Get the computational graph. */ - const common::Graph* comp_graph() const { return compu_graph_.get(); } + const cinn::common::Graph* comp_graph() const { return compu_graph_.get(); } /** * \brief generate the argument list of the final output function. @@ -193,7 +193,7 @@ class LowerImpl { StageMap stages_; //! A computation graph generated from the tensor_args and scalar_args. - std::unique_ptr compu_graph_; + std::unique_ptr compu_graph_; //! CUDA axis info for this function. std::vector cuda_axis_info_; diff --git a/paddle/cinn/lang/lower_tensor_group.cc b/paddle/cinn/lang/lower_tensor_group.cc index f59ac4ceff52fc..93453621e18393 100644 --- a/paddle/cinn/lang/lower_tensor_group.cc +++ b/paddle/cinn/lang/lower_tensor_group.cc @@ -61,7 +61,7 @@ std::vector LowerTensorGroup::operator()() { func_body = ir::ScheduleBlockRealize::Make( {}, ir::ScheduleBlock::Make( - {}, {}, {}, common::UniqName("root"), func_body)); + {}, {}, {}, cinn::common::UniqName("root"), func_body)); // 2. Assign buffer to tensors auto tensor_map = tensor_group_->AllocateBuffers(); // copy the tensor(with buffer assigned) back to func's args. @@ -217,7 +217,7 @@ std::vector LowerTensorGroup::GenerateFunctionBody( tensor->buffer.defined() && (tensor->buffer->memory_type == ir::MemoryType::GPUShared || tensor->buffer->memory_type == ir::MemoryType::GPULocal); - if (target_ == common::DefaultNVGPUTarget() && !gpu_local) { + if (target_ == cinn::common::DefaultNVGPUTarget() && !gpu_local) { result.push_back(bodies.size() == 1 ? bodies[0] : ir::Block::Make(bodies)); bodies.clear(); diff --git a/paddle/cinn/lang/lower_tensor_group.h b/paddle/cinn/lang/lower_tensor_group.h index 358e2d9ec953d5..aae18e119dd2a9 100644 --- a/paddle/cinn/lang/lower_tensor_group.h +++ b/paddle/cinn/lang/lower_tensor_group.h @@ -49,7 +49,7 @@ class LowerTensorGroup { const std::vector& scalar_args, ast_gen_ius::TensorGroup* tensor_group, const std::vector& temp_tensor_args = {}, - const Target& target = common::DefaultHostTarget()); + const Target& target = cinn::common::DefaultHostTarget()); std::vector operator()(); diff --git a/paddle/cinn/lang/lower_test.cc b/paddle/cinn/lang/lower_test.cc index 452b9e7afb7725..25b0bb20f19567 100644 --- a/paddle/cinn/lang/lower_test.cc +++ b/paddle/cinn/lang/lower_test.cc @@ -141,7 +141,7 @@ TEST(lower, temp_buffer_collects) { auto output = Compute( {M}, [&](Expr i) -> Expr { return D(i); }, "output"); - ir::Module::Builder b("somemodule", common::DefaultHostTarget()); + ir::Module::Builder b("somemodule", cinn::common::DefaultHostTarget()); auto stages = CreateStages({B, C, D, output}); diff --git a/paddle/cinn/lang/packed_func.h b/paddle/cinn/lang/packed_func.h index fa7f3e05cd34b2..94eb1e442f0dc0 100644 --- a/paddle/cinn/lang/packed_func.h +++ b/paddle/cinn/lang/packed_func.h @@ -24,7 +24,7 @@ namespace cinn { namespace lang { -using common::CINNValue; +using cinn::common::CINNValue; /** * A single argument value to Function. @@ -54,8 +54,8 @@ class Args { ArgValue& operator[](int i) { return values_[i]; } const ArgValue& operator[](int i) const { return values_[i]; } - common::CINNValuePack ToValuePack() const { - return common::CINNValuePack(values_); + cinn::common::CINNValuePack ToValuePack() const { + return cinn::common::CINNValuePack(values_); } private: diff --git a/paddle/cinn/lang/packed_func_test.cc b/paddle/cinn/lang/packed_func_test.cc index f803f97f58f793..47253996e2ec6c 100644 --- a/paddle/cinn/lang/packed_func_test.cc +++ b/paddle/cinn/lang/packed_func_test.cc @@ -78,12 +78,13 @@ TEST(Function, ReturnMultiValue) { int c = a + b; int d = a - b; - *ret = common::CINNValuePack{{common::CINNValue(c), common::CINNValue(d)}}; + *ret = cinn::common::CINNValuePack{ + {cinn::common::CINNValue(c), cinn::common::CINNValue(d)}}; }; PackedFunc func(body); - common::CINNValuePack ret = func(1, 2); + cinn::common::CINNValuePack ret = func(1, 2); int c = ret[0]; int d = ret[1]; diff --git a/paddle/cinn/lang/placeholder.h b/paddle/cinn/lang/placeholder.h index 3c20fa3942c909..f36d0edd2adbb8 100644 --- a/paddle/cinn/lang/placeholder.h +++ b/paddle/cinn/lang/placeholder.h @@ -125,7 +125,7 @@ void Placeholder::Init(const std::string &name, std::vector axis; for (int i = 0; i < shape.size(); i++) - axis.emplace_back(common::axis_name(i)); + axis.emplace_back(cinn::common::axis_name(i)); auto op = ir::PlaceholderOp::Make(name, shape, type_of()); @@ -145,7 +145,7 @@ void Placeholder::Init(const std::string &name, std::vector axis; for (int i = 0; i < shape.size(); i++) - axis.emplace_back(common::axis_name(i)); + axis.emplace_back(cinn::common::axis_name(i)); auto op = ir::PlaceholderOp::Make(name, shape, type_of()); diff --git a/paddle/cinn/optim/buffer_assign.cc b/paddle/cinn/optim/buffer_assign.cc index 6e5e4eb0da734b..256624617cc436 100644 --- a/paddle/cinn/optim/buffer_assign.cc +++ b/paddle/cinn/optim/buffer_assign.cc @@ -25,7 +25,7 @@ namespace optim { namespace { -struct BufferUFNode : public common::UnionFindNode { +struct BufferUFNode : public cinn::common::UnionFindNode { explicit BufferUFNode(const std::string& x) : tensor_name(x) {} const char* type_info() const override { return __type_info__; } @@ -57,7 +57,7 @@ std::map InitialAssignBuffer( Expr* expr, poly::StageMap stages, const std::map& all_tensor_map, - const common::Graph* comp_graph, + const cinn::common::Graph* comp_graph, const std::set& temp_tensor_names) { // The tensor map helps to reserve only one tensor instance for a // tensor(called the same name). @@ -69,7 +69,7 @@ std::map InitialAssignBuffer( } // union-find to cluster the tensors with the same buffer. - common::UnionFind union_find; + cinn::common::UnionFind union_find; // unify all the tensor occurance with a global one, e.g. there are multiple // tensor B exists in the expression, replace them with a shared one. @@ -107,7 +107,7 @@ std::map InitialAssignBuffer( auto _topo_order_topo_edges_ = comp_graph->topological_order(); auto& topo_order = std::get<0>(_topo_order_topo_edges_); auto& topo_edges = std::get<1>(_topo_order_topo_edges_); - for (common::GraphNode* n : topo_order) { + for (cinn::common::GraphNode* n : topo_order) { auto nn = n->safe_as(); CHECK(nn); { @@ -124,7 +124,7 @@ std::map InitialAssignBuffer( // Get a center of the cluster, it will consider the following rules // 1. Prefer a tensor arg than a temp tensor. auto cluster_get_center_tensor = - [&](const std::vector& cluster) { + [&](const std::vector& cluster) { ir::Tensor some_tensor; // try to find a node that is a tensor_arg, allocate buffer for it, and // make others share buffer with it. diff --git a/paddle/cinn/optim/buffer_assign.h b/paddle/cinn/optim/buffer_assign.h index e44b3a77cee2e7..03f2987bebb3da 100644 --- a/paddle/cinn/optim/buffer_assign.h +++ b/paddle/cinn/optim/buffer_assign.h @@ -33,7 +33,7 @@ std::map InitialAssignBuffer( Expr* expr, poly::StageMap stages, const std::map& all_tensor_map, - const common::Graph* comp_graph, + const cinn::common::Graph* comp_graph, const std::set& temp_tensor_names); } // namespace optim diff --git a/paddle/cinn/optim/call_arg_list_to_pod_value.cc b/paddle/cinn/optim/call_arg_list_to_pod_value.cc index 62afec620f3647..b2142b77ff52a6 100644 --- a/paddle/cinn/optim/call_arg_list_to_pod_value.cc +++ b/paddle/cinn/optim/call_arg_list_to_pod_value.cc @@ -48,7 +48,7 @@ struct CallArgListToPodValueMutator : ir::IRMutator<> { auto new_call = ir::Call::Make( Void(), op->name, - {pod_array_var, common::make_const(Int(32), args.size())}, + {pod_array_var, cinn::common::make_const(Int(32), args.size())}, {}, ir::CallType::CINN, op->func, diff --git a/paddle/cinn/optim/compute_inline_expand.cc b/paddle/cinn/optim/compute_inline_expand.cc index 20cba25ad38f17..7f42a3500ee760 100644 --- a/paddle/cinn/optim/compute_inline_expand.cc +++ b/paddle/cinn/optim/compute_inline_expand.cc @@ -115,7 +115,7 @@ struct TensorInlineExpandMutator : public ir::IRMutator<> { auto shapes = tensor->shape; CHECK_EQ(shapes.size(), node->indices.size()); for (int i = 0; i < shapes.size(); i++) { - if (common::is_zero(shapes[i] - 1)) { + if (cinn::common::is_zero(shapes[i] - 1)) { node->indices[i] = Expr(0); } } @@ -175,7 +175,7 @@ struct TensorInlineExpandMutator : public ir::IRMutator<> { } }; -struct SSANode : public common::GraphNode { +struct SSANode : public cinn::common::GraphNode { std::string id_; explicit SSANode(const std::string &id) : id_(id) {} @@ -191,7 +191,7 @@ struct SSANode : public common::GraphNode { // ir::CollectIRNodes method collects all the tensors recursively, so it can not // reserve the level information, fix it. struct SSABuilder : public ir::IRMutator<> { - common::Graph graph; + cinn::common::Graph graph; SSABuilder &operator()(Expr *expr) { ir::IRMutator<>::Visit(expr, expr); diff --git a/paddle/cinn/optim/ir_simplify.cc b/paddle/cinn/optim/ir_simplify.cc index 3076b32d3ca7b7..601e869a5b91b5 100644 --- a/paddle/cinn/optim/ir_simplify.cc +++ b/paddle/cinn/optim/ir_simplify.cc @@ -34,9 +34,9 @@ namespace cinn { namespace optim { using namespace ir; // NOLINT -using common::bfloat16; -using common::ExprToGinacConverter; -using common::float16; +using cinn::common::bfloat16; +using cinn::common::ExprToGinacConverter; +using cinn::common::float16; using utils::GetStreamCnt; using utils::Replace; @@ -48,16 +48,16 @@ namespace { //! them. void PartialSimplify( Expr* expr, - const absl::flat_hash_map& var_intervals = - {}) { - *expr = common::AutoSimplify(*expr, var_intervals); + const absl::flat_hash_map& + var_intervals = {}) { + *expr = cinn::common::AutoSimplify(*expr, var_intervals); } //! Simplify the expression but Load. struct SimplifyNoPureMathMutator : public ir::IRMutator { - common::cas_intervals_t& var_intervals; + cinn::common::cas_intervals_t& var_intervals; explicit SimplifyNoPureMathMutator( - common::cas_intervals_t& var_intervals) // NOLINT + cinn::common::cas_intervals_t& var_intervals) // NOLINT : var_intervals(var_intervals) {} void operator()(Expr* x) { ir::IRMutator::Visit(x, x); } @@ -79,7 +79,8 @@ struct SimplifyNoPureMathMutator : public ir::IRMutator { void Visit(const PolyFor* op, Expr* expr) override { auto* node = expr->As(); - node->condition = common::SolveInequality(op->condition, op->iterator); + node->condition = + cinn::common::SolveInequality(op->condition, op->iterator); Visit(&node->body, &node->body); } @@ -93,10 +94,10 @@ struct SimplifyNoPureMathMutator : public ir::IRMutator { if (min_i && extent_i && extent_i->value > min_i->value) { var_intervals.emplace( op->loop_var->name, - common::CasInterval{min_i->value, extent_i->value - 1}); + cinn::common::CasInterval{min_i->value, extent_i->value - 1}); } else { var_intervals.emplace(op->loop_var->name, - common::CasInterval{op->min, op->extent - 1}); + cinn::common::CasInterval{op->min, op->extent - 1}); } Visit(&node->body, &node->body); @@ -123,7 +124,7 @@ struct SimplifyLoadMutator : public ir::IRMutator { void Visit(const Load* expr, Expr* op) override { auto* node = op->As(); for (auto& idx : node->indices) { - if (common::IsPureMath(idx)) { + if (cinn::common::IsPureMath(idx)) { PartialSimplify(&idx, var_intervals_); } else { SimplifyNoPureMathMutator mutator(var_intervals_); @@ -138,7 +139,7 @@ struct SimplifyLoadMutator : public ir::IRMutator { if (min_i && extent_i && extent_i->value > min_i->value) { var_intervals_.emplace( op->loop_var->name, - common::CasInterval{min_i->value, extent_i->value - 1}); + cinn::common::CasInterval{min_i->value, extent_i->value - 1}); } auto* node = expr->As(); @@ -151,7 +152,7 @@ struct SimplifyLoadMutator : public ir::IRMutator { } } - common::cas_intervals_t var_intervals_; + cinn::common::cas_intervals_t var_intervals_; }; struct SimplifyStoreMutator : public ir::IRMutator { @@ -161,7 +162,7 @@ struct SimplifyStoreMutator : public ir::IRMutator { auto* node = op->As(); for (auto& idx : node->indices) { - if (common::IsPureMath(idx)) { + if (cinn::common::IsPureMath(idx)) { PartialSimplify(&idx, var_intervals_); } else { SimplifyNoPureMathMutator mutator(var_intervals_); @@ -176,7 +177,7 @@ struct SimplifyStoreMutator : public ir::IRMutator { if (min_i && extent_i) { var_intervals_.emplace( op->loop_var->name, - common::CasInterval{min_i->value, extent_i->value - 1}); + cinn::common::CasInterval{min_i->value, extent_i->value - 1}); } auto* node = expr->As(); @@ -189,7 +190,7 @@ struct SimplifyStoreMutator : public ir::IRMutator { } } - common::cas_intervals_t var_intervals_; + cinn::common::cas_intervals_t var_intervals_; }; struct SimplifyRampMutator : public ir::IRMutator { @@ -198,9 +199,9 @@ struct SimplifyRampMutator : public ir::IRMutator { void Visit(const Ramp* op, Expr* expr) override { auto* node = expr->As(); - CHECK(common::IsPureMath(node->base)) + CHECK(cinn::common::IsPureMath(node->base)) << node->base << "is not a pure math!"; - CHECK(common::IsPureMath(node->stride)) + CHECK(cinn::common::IsPureMath(node->stride)) << node->stride << "is not a pure math!"; PartialSimplify(&node->base); @@ -215,8 +216,9 @@ struct SimplifyRampMutator : public ir::IRMutator { auto b_ramp = b.As(); if (a_ramp && b_ramp && a_ramp->lanes == b_ramp->lanes) { - Expr base_add = common::AutoSimplify(a_ramp->base + b_ramp->base); - Expr stride_add = common::AutoSimplify(a_ramp->stride + b_ramp->stride); + Expr base_add = cinn::common::AutoSimplify(a_ramp->base + b_ramp->base); + Expr stride_add = + cinn::common::AutoSimplify(a_ramp->stride + b_ramp->stride); *expr = ir::Ramp::Make(base_add, stride_add, a_ramp->lanes); } } @@ -229,7 +231,7 @@ struct SimplifyIfThenElseMutator : public ir::IRMutator<> { void Visit(const IfThenElse* op, Expr* expr) override { auto* node = expr->As(); - node->condition = common::AutoSimplify(node->condition); + node->condition = cinn::common::AutoSimplify(node->condition); auto* condition_int = node->condition.As(); auto* condition_uint = node->condition.As(); @@ -335,7 +337,7 @@ struct SimplifyBlocksMutator : public ir::IRMutator<> { }; struct SimplifyForLoopsMutator : public ir::IRMutator<> { - absl::flat_hash_map var_intervals; + absl::flat_hash_map var_intervals; SimplifyForLoopsMutator() {} void operator()(Expr* x) { ir::IRMutator::Visit(x, x); } @@ -353,7 +355,8 @@ struct SimplifyForLoopsMutator : public ir::IRMutator<> { VLOG(6) << "Simplify current For Loop"; std::string var_name = node->loop_var->name; var_intervals.emplace( - var_name, common::CasInterval{min_i->value, extent_i->value - 1}); + var_name, + cinn::common::CasInterval{min_i->value, extent_i->value - 1}); *expr = node->body; @@ -468,7 +471,7 @@ void Simplify(Expr* expr) { SimplifyStoreMutator()(expr); SimplifyIfThenElseMutator()(expr); - common::cas_intervals_t var_intervals; + cinn::common::cas_intervals_t var_intervals; SimplifyNoPureMathMutator mutator(var_intervals); mutator(expr); diff --git a/paddle/cinn/optim/map_extern_call.cc b/paddle/cinn/optim/map_extern_call.cc index 3a9531391ca9dc..91122c0b5b60a2 100644 --- a/paddle/cinn/optim/map_extern_call.cc +++ b/paddle/cinn/optim/map_extern_call.cc @@ -91,8 +91,8 @@ void MapExternCall(Expr *e, Target target) { return; } - std::string extern_func = - hlir::GetExternFuncName(common::DefaultNVGPUTarget(), dtype, name); + std::string extern_func = hlir::GetExternFuncName( + cinn::common::DefaultNVGPUTarget(), dtype, name); *expr = lang::CallExtern(extern_func, node->read_args, node->attrs); } diff --git a/paddle/cinn/optim/remove_schedule_block_test.cc b/paddle/cinn/optim/remove_schedule_block_test.cc index 643412b2f261d2..401225fee2f6f0 100644 --- a/paddle/cinn/optim/remove_schedule_block_test.cc +++ b/paddle/cinn/optim/remove_schedule_block_test.cc @@ -33,7 +33,7 @@ TEST(RemovescheduleBlock, basic) { Context::Global().ResetNameId(); Placeholder A("A", {Expr(100), Expr(20)}); Placeholder B("B", {Expr(20), Expr(50)}); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); Module::Builder builder("matmul", target); // C = A * B Var k(20, "k0"); diff --git a/paddle/cinn/optim/replace_cross_thread_reduction_test.cc b/paddle/cinn/optim/replace_cross_thread_reduction_test.cc index fb8c0d185ed119..d7bd9f6defc49d 100644 --- a/paddle/cinn/optim/replace_cross_thread_reduction_test.cc +++ b/paddle/cinn/optim/replace_cross_thread_reduction_test.cc @@ -33,7 +33,7 @@ TEST(CrossThreadReductionReplacer, basic) { #ifdef CINN_WITH_CUDA Context::Global().ResetNameId(); Placeholder A("A", {Expr(64), Expr(128)}); - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); Module::Builder builder("reduce_sum", target); Var reduce_j(128, "reduce_j"); ir::Tensor B = Compute( diff --git a/paddle/cinn/optim/replace_var_with_expr.cc b/paddle/cinn/optim/replace_var_with_expr.cc index aae5f9a4f89450..25633dfd768fc4 100644 --- a/paddle/cinn/optim/replace_var_with_expr.cc +++ b/paddle/cinn/optim/replace_var_with_expr.cc @@ -158,7 +158,7 @@ std::vector> CollectTensorIndex( std::vector> result = mutator(source); for (auto& i : result) { for (auto& j : i) { - j = common::AutoSimplify(j); + j = cinn::common::AutoSimplify(j); } } return result; diff --git a/paddle/cinn/optim/transform_gpu_forloop.cc b/paddle/cinn/optim/transform_gpu_forloop.cc index f9c74702733263..7d80539541b22e 100644 --- a/paddle/cinn/optim/transform_gpu_forloop.cc +++ b/paddle/cinn/optim/transform_gpu_forloop.cc @@ -104,7 +104,7 @@ void RemoveGpuForloopsAxis(Expr *expr) { if (for_n) { // for(i, 2, 100); // ^ - if (for_n->min != common::make_const(0)) { + if (for_n->min != cinn::common::make_const(0)) { condition_append(ir::GE::Make(for_n->loop_var, for_n->min)); } @@ -112,7 +112,7 @@ void RemoveGpuForloopsAxis(Expr *expr) { // ^ condition_append(ir::LT::Make(for_n->loop_var, for_n->extent)); } else { - if (poly_for_n->init != common::make_const(0)) { + if (poly_for_n->init != cinn::common::make_const(0)) { condition_append( ir::GE::Make(poly_for_n->iterator, poly_for_n->init)); } @@ -162,7 +162,7 @@ void CudaSyncThreadsDropIfThenElse(Expr *expr) { if (!blocked_statement_stack.empty()) { auto *last_for = blocked_statement_stack.back()->As(); if (auto *eq_n = last_for->condition.As()) { - if (eq_n->b() == common::make_const(0)) { + if (eq_n->b() == cinn::common::make_const(0)) { *blocked_statement_stack.back() = *expr; } } @@ -376,7 +376,7 @@ void UpdateBufferAxisPass(ir::Expr *expr) { auto &indices = load ? load->indices : store->indices; for (auto &indice : indices) { optim::ReplaceVarWithExpr(&indice, loop_var, ir::Expr(0)); - indice = common::AutoSimplify(indice); + indice = cinn::common::AutoSimplify(indice); } } } @@ -436,7 +436,7 @@ class SharedAxisVisitor : public ir::IRMutator<> { for (auto axis : gpu_axis) { optim::ReplaceVarWithExpr(&indice, ir::Var(axis), ir::Expr(0)); } - indice = common::AutoSimplify(indice); + indice = cinn::common::AutoSimplify(indice); } } ir::IRMutator<>::Visit(op, expr); @@ -457,7 +457,7 @@ class SharedAxisVisitor : public ir::IRMutator<> { for (auto axis : gpu_axis) { optim::ReplaceVarWithExpr(&indice, ir::Var(axis), ir::Expr(0)); } - indice = common::AutoSimplify(indice); + indice = cinn::common::AutoSimplify(indice); } } ir::IRMutator<>::Visit(op, expr); @@ -484,7 +484,7 @@ class LocalAxisVisitor : public ir::IRMutator<> { for (auto axis : gpu_axis) { optim::ReplaceVarWithExpr(&indice, ir::Var(axis), ir::Expr(0)); } - indice = common::AutoSimplify(indice); + indice = cinn::common::AutoSimplify(indice); } } ir::IRMutator<>::Visit(op, expr); @@ -505,7 +505,7 @@ class LocalAxisVisitor : public ir::IRMutator<> { for (auto axis : gpu_axis) { optim::ReplaceVarWithExpr(&indice, ir::Var(axis), ir::Expr(0)); } - indice = common::AutoSimplify(indice); + indice = cinn::common::AutoSimplify(indice); } } ir::IRMutator<>::Visit(op, expr); @@ -602,8 +602,8 @@ class ResizeBufferSizeVisitor : public ir::IRMutator<> { ReplaceVarWithExpr(&tmp, var, Expr(idx)); if (deep == vars.size() - 1) { - auto simplify = common::AutoSimplify(tmp); - auto range = common::AutoSimplify(simplify); + auto simplify = cinn::common::AutoSimplify(tmp); + auto range = cinn::common::AutoSimplify(simplify); CHECK(range.is_constant()); max_range = std::max(max_range, range.as_int32() + 1); } else { @@ -635,7 +635,7 @@ class ReplaceVarToZero : public ir::IRMutator<> { for (auto var_ : loop_var_) { optim::ReplaceVarWithExpr(&indice, ir::Var(var_), ir::Expr(0)); } - indice = common::AutoSimplify(indice); + indice = cinn::common::AutoSimplify(indice); } ir::IRMutator<>::Visit(op, expr); } @@ -651,7 +651,7 @@ class ReplaceVarToZero : public ir::IRMutator<> { for (auto var_ : loop_var_) { optim::ReplaceVarWithExpr(&indice, ir::Var(var_), ir::Expr(0)); } - indice = common::AutoSimplify(indice); + indice = cinn::common::AutoSimplify(indice); } ir::IRMutator<>::Visit(op, expr); diff --git a/paddle/cinn/optim/transform_polyfor_to_for.cc b/paddle/cinn/optim/transform_polyfor_to_for.cc index 9649364ea13821..8a7392ed5d54ba 100644 --- a/paddle/cinn/optim/transform_polyfor_to_for.cc +++ b/paddle/cinn/optim/transform_polyfor_to_for.cc @@ -74,12 +74,12 @@ struct PolyForWithSimpleConditionToForMutator : public ir::IRMutator { auto* le_n = node->condition.As(); if (lt_n) { - if (lt_n->b() != common::make_const(0)) { + if (lt_n->b() != cinn::common::make_const(0)) { node->condition = lt_n->a() - lt_n->b() < 0; } } if (le_n) { - if (le_n->b() != common::make_const(0)) { + if (le_n->b() != cinn::common::make_const(0)) { node->condition = le_n->a() - le_n->b() <= 0; } } @@ -119,7 +119,7 @@ struct PolyForWithSimpleConditionToForMutator : public ir::IRMutator { Expr lhs = lt_n ? lt_n->a() : le_n->a(); Expr rhs = lt_n ? lt_n->b() : PlusOneWithMinMax(le_n->b()); - rhs = common::AutoSimplify(rhs); + rhs = cinn::common::AutoSimplify(rhs); if (op->is_vectorized()) CHECK(op->vectorize_info().valid()); diff --git a/paddle/cinn/optim/unroll_loops_test.cc b/paddle/cinn/optim/unroll_loops_test.cc index 5ce412a245e3eb..63936d931f34f7 100644 --- a/paddle/cinn/optim/unroll_loops_test.cc +++ b/paddle/cinn/optim/unroll_loops_test.cc @@ -39,7 +39,7 @@ TEST(UnrollLoops, unrolled_tag) { auto stages = CreateStages({C}); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); auto func = cinn::lang::LowerVec( "test_unrolled_tag", stages, {A, B, C}, {}, {}, nullptr, target, true); auto ast_expr = func[0]->body; @@ -80,7 +80,7 @@ TEST(UnrollLoops, auto_unroll) { "B"); auto stages = CreateStages({B}); - Target target = common::DefaultHostTarget(); + Target target = cinn::common::DefaultHostTarget(); auto func = cinn::lang::LowerVec( "test_auto_unroll", stages, {A, B}, {}, {}, nullptr, target, true); auto ast_expr = func[0]->body; diff --git a/paddle/cinn/optim/var_mod_simplify.cc b/paddle/cinn/optim/var_mod_simplify.cc index dcd6de24fef2e7..811208c49de256 100644 --- a/paddle/cinn/optim/var_mod_simplify.cc +++ b/paddle/cinn/optim/var_mod_simplify.cc @@ -80,11 +80,11 @@ struct ReplaceVarWithDivMutator : public ir::IRMutator<> { } // namespace void VarModSimplify(Expr* e) { - *e = common::AutoSimplify(*e); + *e = cinn::common::AutoSimplify(*e); ReplaceModWithDivMutator()(e); ReplaceDivWithVarMutator mutator; mutator(e); - *e = common::AutoSimplify(*e); + *e = cinn::common::AutoSimplify(*e); auto div_var_map = mutator.div_var_map_; ReplaceVarWithDivMutator()(e, mutator.div_var_map_); } diff --git a/paddle/cinn/optim/vectorize_loops.cc b/paddle/cinn/optim/vectorize_loops.cc index 30701216ade95c..0495c1ef0ffe72 100644 --- a/paddle/cinn/optim/vectorize_loops.cc +++ b/paddle/cinn/optim/vectorize_loops.cc @@ -37,9 +37,9 @@ namespace cinn { namespace optim { using namespace ir; // NOLINT -using common::make_const; -using common::make_one; -using common::make_zero; +using cinn::common::make_const; +using cinn::common::make_one; +using cinn::common::make_zero; //! Widen an expression to the given number of lanes. Expr Widen(Expr e, int lanes) { @@ -62,7 +62,7 @@ class TensorVectorizeTeller : public ir::IRMutator { TensorVectorizeTeller( const Var &iter_var, const int factor, - const absl::flat_hash_map + const absl::flat_hash_map *var_intervals) : iter_var_(iter_var), factor_(factor), var_intervals_(var_intervals) {} @@ -78,7 +78,8 @@ class TensorVectorizeTeller : public ir::IRMutator { const Var iter_var_; // loop var of new for-loop split from the vectorized loop const int factor_; - const absl::flat_hash_map *var_intervals_; + const absl::flat_hash_map + *var_intervals_; // save (tensor name) -> (bool flag) to indentify whether tensors can be // vectorized or not std::unordered_map tensor2flag_; @@ -154,7 +155,7 @@ class TensorVectorizeTeller : public ir::IRMutator { for (int i = 1; i < interval.r; ++i) { Expr next_idx = ir::ir_utils::IRCopy(indices.back()); cinn::ir::ir_utils::IrReplace(&next_idx, Expr(iter_var_), Expr(i)); - auto gap = common::AutoSimplify(Expr(next_idx - first_idx)); + auto gap = cinn::common::AutoSimplify(Expr(next_idx - first_idx)); if (!gap.As() || gap.as_int32() != i) { VLOG(5) << "Tensor:" << tensor->name << " is not accessed sequentially, next:" << next_idx @@ -195,10 +196,11 @@ class CudaVectorizer : public IRMutator { public: static constexpr int CudaVectorTypeMaxLanes = 8; - CudaVectorizer(const Var &iter_var, - const int factor, - const absl::flat_hash_map - *var_intervals) + CudaVectorizer( + const Var &iter_var, + const int factor, + const absl::flat_hash_map + *var_intervals) : iter_var_(iter_var), factor_(factor), vectorized_teller_(iter_var, factor, var_intervals) { @@ -268,7 +270,8 @@ class CudaVectorizer : public IRMutator { } std::string GetVectorTypeName(Type type) { - std::string name_prefix = common::customized_type::kcuda_builtin_vector_t; + std::string name_prefix = + cinn::common::customized_type::kcuda_builtin_vector_t; #define GET_CUDA_VECTOR_TYPE_NAME(pred_expr, scalar_name) \ if (pred_expr) { \ return name_prefix + scalar_name + std::to_string(factor_); \ @@ -359,7 +362,7 @@ class Vectorizer : public IRMutator { Expr ramp_; - absl::flat_hash_map var_intervals_; + absl::flat_hash_map var_intervals_; //! A suffix to attach to widened variables. std::string widen_suffix; @@ -367,7 +370,7 @@ class Vectorizer : public IRMutator { public: Vectorizer(const Var &var, int lanes, - const absl::flat_hash_map + const absl::flat_hash_map &var_intervals = {}) : var(var), lanes_(lanes), var_intervals_(var_intervals) { // the identity ramp. @@ -575,10 +578,10 @@ class Vectorizer : public IRMutator { std::map var_map; var_map[var.As()] = idx; - common::Substitute(expr, var_map); + cinn::common::Substitute(expr, var_map); *expr = ir::For::Make(idx, - common::make_const(0), - common::make_const(lanes_), + cinn::common::make_const(0), + cinn::common::make_const(lanes_), ForType::Serial, DeviceAPI::Host, *expr); @@ -666,7 +669,7 @@ class Vectorizer : public IRMutator { struct VectorizeLoops_ : public IRMutator { const Target ⌖ - absl::flat_hash_map var_intervals; + absl::flat_hash_map var_intervals; bool vectorizable_ = true; explicit VectorizeLoops_(const Target &t) : target(t) {} @@ -680,7 +683,8 @@ struct VectorizeLoops_ : public IRMutator { bool is_changed = false; // simplify the complicated index from poly in the format of div/mod for (int i = 0; i < indices.size(); i++) { - node->indices[i] = common::AutoSimplify(node->indices[i], var_intervals); + node->indices[i] = + cinn::common::AutoSimplify(node->indices[i], var_intervals); Simplify(&node->indices[i]); if (!node->indices[i].same_as(indices[i])) { is_changed = true; @@ -700,7 +704,8 @@ struct VectorizeLoops_ : public IRMutator { bool is_changed = false; // simplify the complicated index from poly in the format of div/mod for (int i = 0; i < indices.size(); i++) { - node->indices[i] = common::AutoSimplify(node->indices[i], var_intervals); + node->indices[i] = + cinn::common::AutoSimplify(node->indices[i], var_intervals); Simplify(&node->indices[i]); if (!node->indices[i].same_as(indices[i])) { is_changed = true; @@ -723,10 +728,12 @@ struct VectorizeLoops_ : public IRMutator { auto loopvar_name = forloop->loop_var->name; if (forloop->extent.As()) { var_intervals.emplace( - loopvar_name, common::CasInterval{0, forloop->extent.as_int32() - 1}); + loopvar_name, + cinn::common::CasInterval{0, forloop->extent.as_int32() - 1}); } else { - var_intervals.emplace(loopvar_name, - common::CasInterval{Expr(0), forloop->extent - 1}); + var_intervals.emplace( + loopvar_name, + cinn::common::CasInterval{Expr(0), forloop->extent - 1}); } // the extent the forloops marked as Vectorized should be int constant if (forloop->is_vectorized()) { @@ -735,7 +742,7 @@ struct VectorizeLoops_ : public IRMutator { CHECK_GT(forloop->vectorize_info().factor, 0); CHECK(is_zero(forloop->min)); - Expr for_extent = common::AutoSimplify(forloop->extent); + Expr for_extent = cinn::common::AutoSimplify(forloop->extent); Simplify(&for_extent); node->extent = for_extent; auto *extent_min = for_extent.As(); @@ -744,7 +751,7 @@ struct VectorizeLoops_ : public IRMutator { vectorizable_ = true; IRMutator<>::Visit(&node->body, &node->body); - if (target == common::DefaultNVGPUTarget()) { + if (target == cinn::common::DefaultNVGPUTarget()) { if (!forloop->extent.As() || forloop->extent.as_int32() % forloop->vectorize_info().factor != 0) { @@ -794,7 +801,7 @@ struct VectorizeLoops_ : public IRMutator { << extent; VLOG(2) << "before vectorize body:\n" << node->body; - if (target == common::DefaultNVGPUTarget()) { + if (target == cinn::common::DefaultNVGPUTarget()) { CudaVectorizer cuda_vectorizer( new_forloop->loop_var, factor, &var_intervals); cuda_vectorizer.Visit(&new_forloop->body); @@ -841,7 +848,7 @@ struct VectorizeLoops_ : public IRMutator { bool UnrollCmpFor(For *outer_for, For *inner_for, Expr *expr) { CHECK(outer_for); CHECK(inner_for); - Expr inner_for_extent = common::AutoSimplify(inner_for->extent); + Expr inner_for_extent = cinn::common::AutoSimplify(inner_for->extent); Simplify(&inner_for_extent); auto *extent_min = inner_for_extent.As(); if (extent_min) { @@ -855,7 +862,7 @@ struct VectorizeLoops_ : public IRMutator { auto b_int = a.As(); if (a_int || b_int) { condition = - common::SolveInequality(LE::Make(a, b), outer_for->loop_var); + cinn::common::SolveInequality(LE::Make(a, b), outer_for->loop_var); Simplify(&condition); } if (condition.defined()) { @@ -870,7 +877,7 @@ struct VectorizeLoops_ : public IRMutator { DeviceAPI::UNK, inner_for->body, inner_for->vectorize_info())}); - Expr new_extent_a = common::AutoSimplify(le_n->b() + 1); + Expr new_extent_a = cinn::common::AutoSimplify(le_n->b() + 1); Expr out_for_a = For::Make(outer_for->loop_var, outer_for->min, new_extent_a, @@ -879,9 +886,9 @@ struct VectorizeLoops_ : public IRMutator { inner_for_a, outer_for->vectorize_info()); Var new_iterator_inner( - common::UniqName(inner_for->loop_var->name + "_s")); + cinn::common::UniqName(inner_for->loop_var->name + "_s")); Var new_iterator_outer( - common::UniqName(outer_for->loop_var->name + "_s")); + cinn::common::UniqName(outer_for->loop_var->name + "_s")); Expr inner_for_b = Block::Make({For::Make(new_iterator_inner, @@ -928,10 +935,10 @@ struct VectorizeLoops_ : public IRMutator { int extent_trunc = extent_int / factor; int extent_times = extent_int % factor == 0 ? extent_trunc : extent_trunc + 1; - times = common::make_const(forloop->extent->type(), extent_times); + times = cinn::common::make_const(forloop->extent->type(), extent_times); } else { - times = - common::AutoSimplify(Div::Make(forloop->extent, make_const(factor))); + times = cinn::common::AutoSimplify( + Div::Make(forloop->extent, make_const(factor))); Simplify(×); } @@ -943,21 +950,22 @@ struct VectorizeLoops_ : public IRMutator { if (times_int && forloop->extent.as_int32() >= 1) { var_intervals.emplace( forloop->loop_var->name, - common::CasInterval{0, forloop->extent.as_int32() - 1}); + cinn::common::CasInterval{0, forloop->extent.as_int32() - 1}); } else { var_intervals.erase(forloop->loop_var->name); - var_intervals.emplace(forloop->loop_var->name, - common::CasInterval{Expr(0), forloop->extent - 1}); + var_intervals.emplace( + forloop->loop_var->name, + cinn::common::CasInterval{Expr(0), forloop->extent - 1}); } // create the new forloop { Var new_iterator(Context::Global().NewName("vi")); var_intervals.emplace(new_iterator->name, - common::CasInterval{0, factor - 1}); + cinn::common::CasInterval{0, factor - 1}); // eliminate for 1 Expr new_index; - if (common::is_zero(times - 1)) { + if (cinn::common::is_zero(times - 1)) { new_index = Expr(new_iterator); } else { new_index = Expr(forloop->loop_var) * factor + Expr(new_iterator); diff --git a/paddle/cinn/optim/vectorize_loops_test.cc b/paddle/cinn/optim/vectorize_loops_test.cc index f3ad6ee07f3c44..270e37f1dc46ae 100644 --- a/paddle/cinn/optim/vectorize_loops_test.cc +++ b/paddle/cinn/optim/vectorize_loops_test.cc @@ -52,7 +52,7 @@ TEST(Vectorize, replace_var) { auto funcs = Lower("matmul", stages, {A, B, C}); - Expr func = optim::Optimize(funcs, common::DefaultHostTarget()); + Expr func = optim::Optimize(funcs, cinn::common::DefaultHostTarget()); Target target; target.arch = Target::Arch ::X86; @@ -221,14 +221,14 @@ TEST(Vectorize, single_for) { VectorizeInfo vectorize_info(0, 16); auto forloop = ir::For::Make(loop_var, - common::make_const(0), - common::make_const(16), + cinn::common::make_const(0), + cinn::common::make_const(16), ir::ForType::Vectorized, ir::DeviceAPI::UNK, body, vectorize_info); - forloop = optim::Optimize(forloop, common::DefaultHostTarget()); + forloop = optim::Optimize(forloop, cinn::common::DefaultHostTarget()); LOG(INFO) << "Forloop\n" << forloop; } @@ -244,7 +244,7 @@ TEST(Vectorize, cuda_vectorize) { auto stages = CreateStages({C}); stages[C]->Vectorize(1, 4); - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); auto func = Lower("matmul", stages, {A, B, C}, {}, {}, nullptr, target); auto target_expr = R"ROC( @@ -281,7 +281,7 @@ TEST(Vectorize, cuda_vectorize_with_constant) { auto stages = CreateStages({C}); stages[C]->Vectorize(1, 4); - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); auto func = Lower("mul_const", stages, {A, C}, {}, {}, nullptr, target); } diff --git a/paddle/cinn/poly/domain.cc b/paddle/cinn/poly/domain.cc index dc6a27d82bfd87..c6f4479bf8bba9 100644 --- a/paddle/cinn/poly/domain.cc +++ b/paddle/cinn/poly/domain.cc @@ -62,7 +62,7 @@ std::string Domain::__str__() const { isl::set Domain::to_isl() const { VLOG(3) << "isl::set " << __str__(); - isl::set x(common::Context::isl_ctx(), __str__()); + isl::set x(cinn::common::Context::isl_ctx(), __str__()); return x; } diff --git a/paddle/cinn/poly/graph.cc b/paddle/cinn/poly/graph.cc index c647cf49565dc3..ef5aa875d5b4cf 100755 --- a/paddle/cinn/poly/graph.cc +++ b/paddle/cinn/poly/graph.cc @@ -85,12 +85,12 @@ std::string DataFlowGraphNode::id() const { bool DataFlowGraphNode::IsLinkedTo(const DataFlowGraphNode* node) const { bool found = std::find_if(inlinks_.begin(), inlinks_.end(), - [=](const Shared& x) { + [=](const Shared& x) { return x->source() == node; }) != std::end(inlinks_); return found || std::find_if(outlinks_.begin(), outlinks_.end(), - [=](const Shared& x) { + [=](const Shared& x) { return x->sink() == node; }) != std::end(outlinks_); } diff --git a/paddle/cinn/poly/graph.h b/paddle/cinn/poly/graph.h index e0c15f7be793fb..14b2a61f41e499 100644 --- a/paddle/cinn/poly/graph.h +++ b/paddle/cinn/poly/graph.h @@ -27,7 +27,7 @@ namespace cinn { namespace poly { -struct DataFlowGraphNode : public common::GraphNode { +struct DataFlowGraphNode : public cinn::common::GraphNode { //! Used for union find to gather groups. DataFlowGraphNode* group_parent{}; //! Each stage belongs to a node. @@ -58,12 +58,12 @@ struct DataFlowGraphNode : public common::GraphNode { const DataFlowGraphNode* b); }; -struct DataFlowGraphEdge : public common::GraphEdge {}; +struct DataFlowGraphEdge : public cinn::common::GraphEdge {}; /** * DataFlowGraph help to record the data dependencies between the Stages. */ -struct DataFlowGraph : public common::Graph {}; +struct DataFlowGraph : public cinn::common::Graph {}; /** * Create a dependency graph given some stages. @@ -93,7 +93,7 @@ struct Group { * Nodes has the stages has dependency relation and has the same iteration * domain, then they will be put in the same sub-graph. */ -std::vector PartitionGraphByIterationDomain(common::Graph* graph); +std::vector PartitionGraphByIterationDomain(cinn::common::Graph* graph); } // namespace detail diff --git a/paddle/cinn/poly/poly_scheduler.cc b/paddle/cinn/poly/poly_scheduler.cc index b916e5952ffe4b..0e3b84a70e8e23 100644 --- a/paddle/cinn/poly/poly_scheduler.cc +++ b/paddle/cinn/poly/poly_scheduler.cc @@ -33,11 +33,11 @@ namespace detail { //! Visit the nodes in topological order, if one node is valid to visit, visit //! it and check whether its out link children are ready to visit, merge them to //! the same group. NOTE this is discarded. -std::vector PartitionGraphByIterationDomain(common::Graph* graph) { +std::vector PartitionGraphByIterationDomain(cinn::common::Graph* graph) { VLOG(3) << "graph:\n" << graph->Visualize(); // collect indegrees for naive topological traversal. std::map indegree; - for (common::GraphNode* n : graph->nodes()) { + for (cinn::common::GraphNode* n : graph->nodes()) { auto* node = n->safe_as(); indegree[node] = node->inlinks().size(); } @@ -145,10 +145,11 @@ bool CheckGroupValid(const std::vector& groups) { } //! Tell if \param a links to \param b. -bool IsLinkTo(const common::GraphNode* a, const common::GraphNode* b) { +bool IsLinkTo(const cinn::common::GraphNode* a, + const cinn::common::GraphNode* b) { // dfs - std::stack stack({a}); - std::unordered_set visited; + std::stack stack({a}); + std::unordered_set visited; while (!stack.empty()) { auto* top = stack.top(); stack.pop(); @@ -169,9 +170,9 @@ bool IsLinkTo(const common::GraphNode* a, const common::GraphNode* b) { return false; } -bool IsBetween(const common::GraphNode* x, - const common::GraphNode* a, - const common::GraphNode* b) { +bool IsBetween(const cinn::common::GraphNode* x, + const cinn::common::GraphNode* a, + const cinn::common::GraphNode* b) { if (IsLinkTo(a, x) && IsLinkTo(x, b)) return true; if (IsLinkTo(x, a) && IsLinkTo(b, x)) return true; return false; @@ -191,8 +192,8 @@ std::vector TopoSortGroups(std::vector& groups) { // NOLINT node2group[node->id()] = group; in_degree += node->inlinks().size(); for (auto& node2 : group->nodes) { - if (node2->as()->IsLinkedTo( - node->as())) { + if (node2->as()->IsLinkedTo( + node->as())) { in_degree--; } } @@ -240,7 +241,7 @@ std::vector TopoSortGroups(std::vector& groups) { // NOLINT * 2. If ComputeAt is set between two stages and their iteration domain matches, * the stages will be put in a group with relative order. */ -std::vector NaivePartitionGraph(common::Graph* graph) { +std::vector NaivePartitionGraph(cinn::common::Graph* graph) { std::map> node_groups; auto topo_order = graph->topological_order(); auto& nodes_in_order = std::get<0>(topo_order); @@ -252,7 +253,7 @@ std::vector NaivePartitionGraph(common::Graph* graph) { } // process compute_at - absl::flat_hash_map + absl::flat_hash_map node2score; // record each node's score for sorting. int score = 0; for (auto* n : nodes_in_order) { diff --git a/paddle/cinn/poly/schedule.cc b/paddle/cinn/poly/schedule.cc index 43357dbdfb1044..1904b76cb777d9 100644 --- a/paddle/cinn/poly/schedule.cc +++ b/paddle/cinn/poly/schedule.cc @@ -184,7 +184,7 @@ void SchedulerBase::AddStage(const Stage &x) { std::string id = isl_map_get_tuple_name(x.transform().get(), isl_dim_in); schedule_graph_.RegisterNode( x.id(), - common::make_shared( + cinn::common::make_shared( id, isl_get_dim_names(x.transform(), isl_dim_out), &x)); // record the longest dimensions. diff --git a/paddle/cinn/poly/schedule.h b/paddle/cinn/poly/schedule.h index 1c28c5961e4fd4..77059ab863f232 100755 --- a/paddle/cinn/poly/schedule.h +++ b/paddle/cinn/poly/schedule.h @@ -51,7 +51,7 @@ struct TimeDim { }; class ScheduleGraphNode; -struct ScheduleGraph : public common::Graph {}; +struct ScheduleGraph : public cinn::common::Graph {}; /** * ISL schedule map with time space, used to generate the final schedule. @@ -205,9 +205,9 @@ std::unique_ptr CreateSchedule( // std::vector GatherStagesInTensors(const std::vector &xs, // bool with_placeholder = false); -struct ScheduleGraphEdge : public common::GraphEdge { - ScheduleGraphEdge(common::GraphNode *a, common::GraphNode *b) - : common::GraphEdge(a, b) {} +struct ScheduleGraphEdge : public cinn::common::GraphEdge { + ScheduleGraphEdge(cinn::common::GraphNode *a, cinn::common::GraphNode *b) + : cinn::common::GraphEdge(a, b) {} //! Dependency level. int level{-1}; @@ -216,7 +216,7 @@ struct ScheduleGraphEdge : public common::GraphEdge { /** * Node in the schedule graph. */ -struct ScheduleGraphNode : public common::GraphNode { +struct ScheduleGraphNode : public cinn::common::GraphNode { TimeSchedule time_schedule; Stage *stage{}; diff --git a/paddle/cinn/poly/stage.cc b/paddle/cinn/poly/stage.cc index 53dc1ab6aa6cfb..e04c178805ae47 100644 --- a/paddle/cinn/poly/stage.cc +++ b/paddle/cinn/poly/stage.cc @@ -227,8 +227,8 @@ std::tuple // Stage::Tile(int level0, int level1, int factor0, int factor1) { AssertAxisIsNotLocked(level0); AssertAxisIsNotLocked(level1); - Iterator i0(common::axis_name(level0)); - Iterator i1(common::axis_name(level1)); + Iterator i0(cinn::common::axis_name(level0)); + Iterator i1(cinn::common::axis_name(level1)); return Tile(i0, i1, factor0, factor1); } @@ -291,7 +291,7 @@ void Stage::ChangeIndex(Stage *other) { } this->tensor()->new_indices = indices[0]; - std::vector axis_var = common::GenDefaultAxis(indices[0].size()); + std::vector axis_var = cinn::common::GenDefaultAxis(indices[0].size()); for (int i = 0; i < axis_var.size(); i++) { optim::ReplaceVarWithExpr(&(this->expr_), axis_var[i], indices[0][i]); } @@ -325,7 +325,7 @@ void Stage::AddForLoopInTransform(std::vector> &indices) { int int_range = GetRange(indices, i); if (int_range == 0) continue; - std::string dim_name = common::axis_name(i) + "_at"; + std::string dim_name = cinn::common::axis_name(i) + "_at"; Var dim_var(dim_name); indices[0][i] = ir::Add::Make(indices[0][i], Expr(dim_var)); std::string this_domain = isl_set_to_str(domain_.get()); diff --git a/paddle/cinn/poly/stage.h b/paddle/cinn/poly/stage.h index cf0586710bc9c7..ac36e5fd98e092 100644 --- a/paddle/cinn/poly/stage.h +++ b/paddle/cinn/poly/stage.h @@ -529,7 +529,7 @@ inline std::string OuterName(const std::string& name); inline std::string OuterName(const Iterator& iterator); inline Iterator DefaultIterator(int i) { - return Iterator(common::axis_name(i)); + return Iterator(cinn::common::axis_name(i)); } /** diff --git a/paddle/cinn/poly/stage_test.cc b/paddle/cinn/poly/stage_test.cc index a9c00a82f2487c..e8cbf9dd8ff878 100644 --- a/paddle/cinn/poly/stage_test.cc +++ b/paddle/cinn/poly/stage_test.cc @@ -215,10 +215,10 @@ function fn (_A, _A1, _B) )ROC"; ASSERT_EQ(utils::Trim(target), utils::GetStreamCnt(fn)); - Module::Builder builder("module", common::DefaultHostTarget()); + Module::Builder builder("module", cinn::common::DefaultHostTarget()); builder.AddFunction(fn); - CodeGenC codegen(common::DefaultHostTarget()); + CodeGenC codegen(cinn::common::DefaultHostTarget()); codegen.SetInlineBuiltinCodes(false); LOG(INFO) << "source:\n" << codegen.Compile(builder.Build(), @@ -320,7 +320,8 @@ void TestElementwiseAddJitPrecession( auto fn = Lower("fn", stages, {A, B, C}); LOG(INFO) << "fn:\n" << fn; - Module::Builder module_builder("some_module", common::DefaultHostTarget()); + Module::Builder module_builder("some_module", + cinn::common::DefaultHostTarget()); module_builder.AddFunction(fn); auto jit = backends::SimpleJIT::Create(); @@ -329,17 +330,20 @@ void TestElementwiseAddJitPrecession( auto* fn_handler = reinterpret_cast(_fn_handler); // create buffer and args - auto A_buf = common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) - .set_random() - .Build(); - auto B_buf = common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) - .set_random() - .Build(); - auto C_buf = common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) - .set_zero() - .Build(); + auto A_buf = + cinn::common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) + .set_random() + .Build(); + auto B_buf = + cinn::common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) + .set_random() + .Build(); + auto C_buf = + cinn::common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) + .set_zero() + .Build(); auto arg_pack = - common::ArgsBuilder().Add(A_buf).Add(B_buf).Add(C_buf).Build(); + cinn::common::ArgsBuilder().Add(A_buf).Add(B_buf).Add(C_buf).Build(); fn_handler(arg_pack.data(), arg_pack.size()); @@ -511,10 +515,10 @@ TEST(ShareBufferWith, basic) { LOG(INFO) << "fn:\n" << fn; - Module::Builder builder("some_module", common::DefaultHostTarget()); + Module::Builder builder("some_module", cinn::common::DefaultHostTarget()); builder.AddFunction(fn); - CodeGenC codegen(common::DefaultHostTarget()); + CodeGenC codegen(cinn::common::DefaultHostTarget()); codegen.SetInlineBuiltinCodes(false); LOG(INFO) << "\n" diff --git a/paddle/cinn/pybind/CMakeLists.txt b/paddle/cinn/pybind/CMakeLists.txt index 33dc27860f9473..ec409578930df9 100755 --- a/paddle/cinn/pybind/CMakeLists.txt +++ b/paddle/cinn/pybind/CMakeLists.txt @@ -25,7 +25,8 @@ if(WITH_CUDA) DEPS cinncore_static cinn_runtime - pybind) + pybind + common) message("cuda_nvrtc: ${CUDA_NVRTC}") target_link_libraries(core_api ${CUDA_NVRTC_LIB} ${CUDA_LIBRARIES} cuda cudnn) if(NVTX_FOUND) @@ -45,7 +46,7 @@ else() ${llvm_libs}) endif() -target_link_libraries(core_api ${MKLML_LIB} isl ginac) +target_link_libraries(core_api ${MKLML_LIB} isl ginac common) if(USE_OPENMP STREQUAL "gnu") target_link_libraries(core_api ${OpenMP_CXX_LIBRARIES}) message(STATUS "OpenMP lib: ${OpenMP_CXX_LIBRARIES}") diff --git a/paddle/cinn/pybind/bind_utils.h b/paddle/cinn/pybind/bind_utils.h index 397ee42dd4bf17..2e509ddb5d97ba 100644 --- a/paddle/cinn/pybind/bind_utils.h +++ b/paddle/cinn/pybind/bind_utils.h @@ -30,9 +30,9 @@ namespace py = pybind11; namespace cinn::pybind { -using common::CINNValue; -using common::Shared; -using common::Type; +using cinn::common::CINNValue; +using cinn::common::Shared; +using cinn::common::Type; using ir::Expr; using ir::ExprNode; diff --git a/paddle/cinn/pybind/common.cc b/paddle/cinn/pybind/common.cc index bdb4b46c848ef9..80ff3abba928da 100644 --- a/paddle/cinn/pybind/common.cc +++ b/paddle/cinn/pybind/common.cc @@ -27,11 +27,11 @@ namespace py = pybind11; namespace cinn::pybind { -using common::bfloat16; -using common::CINNValue; -using common::float16; -using common::Target; -using common::Type; +using cinn::common::bfloat16; +using cinn::common::CINNValue; +using cinn::common::float16; +using cinn::common::Target; +using cinn::common::Type; using utils::GetStreamCnt; using utils::StringFormat; @@ -41,7 +41,7 @@ void BindType(py::module *); void BindShared(py::module *); void BindCinnValue(py::module *); -void ResetGlobalNameID() { common::Context::Global().ResetNameId(); } +void ResetGlobalNameID() { cinn::common::Context::Global().ResetNameId(); } void BindTarget(py::module *m) { py::class_ target(*m, "Target"); @@ -57,9 +57,9 @@ void BindTarget(py::module *m) { .def("defined", &Target::defined) .def("runtime_arch", &Target::runtime_arch); - m->def("DefaultHostTarget", &common::DefaultHostTarget) - .def("DefaultNVGPUTarget", &common::DefaultNVGPUTarget) - .def("DefaultTarget", &common::DefaultTarget); + m->def("DefaultHostTarget", &cinn::common::DefaultHostTarget) + .def("DefaultNVGPUTarget", &cinn::common::DefaultNVGPUTarget) + .def("DefaultTarget", &cinn::common::DefaultTarget); m->def("get_target", &cinn::runtime::CurrentTarget::GetCurrentTarget); m->def("set_target", @@ -153,68 +153,68 @@ void BindType(py::module *m) { .value("HandleHandle", Type::cpp_type_t::HandleHandle) .export_values(); - m->def("Void", &common::Void) - .def("Int", &common::Int, py::arg("bits"), py::arg("lanes") = 1) - .def("UInt", &common::UInt, py::arg("bits"), py::arg("lanes") = 1) + m->def("Void", &cinn::common::Void) + .def("Int", &cinn::common::Int, py::arg("bits"), py::arg("lanes") = 1) + .def("UInt", &cinn::common::UInt, py::arg("bits"), py::arg("lanes") = 1) .def("Float", - &common::Float, + &cinn::common::Float, py::arg("bits"), py::arg("lanes") = 1, py::arg("st") = Type::specific_type_t::None) - .def("Float16", &common::Float16, py::arg("lanes") = 1) - .def("BFloat16", &common::BFloat16, py::arg("lanes") = 1) - .def("Bool", &common::Bool, py::arg("lanes") = 1) - .def("String", &common::String); + .def("Float16", &cinn::common::Float16, py::arg("lanes") = 1) + .def("BFloat16", &cinn::common::BFloat16, py::arg("lanes") = 1) + .def("Bool", &cinn::common::Bool, py::arg("lanes") = 1) + .def("String", &cinn::common::String); m->def( "make_const", [](const Type &type, int32_t val) -> Expr { - return common::make_const(type, val); + return cinn::common::make_const(type, val); }, py::arg("type"), py::arg("val")) .def( "make_const", [](const Type &type, int64_t val) -> Expr { - return common::make_const(type, val); + return cinn::common::make_const(type, val); }, py::arg("type"), py::arg("val")) .def( "make_const", [](const Type &type, float val) -> Expr { - return common::make_const(type, val); + return cinn::common::make_const(type, val); }, py::arg("type"), py::arg("val")) .def( "make_const", [](const Type &type, double val) -> Expr { - return common::make_const(type, val); + return cinn::common::make_const(type, val); }, py::arg("type"), py::arg("val")) .def( "make_const", [](const Type &type, bool val) -> Expr { - return common::make_const(type, val); + return cinn::common::make_const(type, val); }, py::arg("type"), py::arg("val")); m->def("type_of", [](absl::string_view dtype) { - return common::Str2Type(dtype.data()); + return cinn::common::Str2Type(dtype.data()); }); } void BindShared(py::module *m) { - py::class_ ref_count(*m, "RefCount"); + py::class_ ref_count(*m, "RefCount"); ref_count.def(py::init<>()) - .def("inc", &common::RefCount::Inc) - .def("dec", &common::RefCount::Dec) - .def("is_zero", &common::RefCount::is_zero) - .def("to_string", &common::RefCount::to_string) - .def("val", &common::RefCount::val); + .def("inc", &cinn::common::RefCount::Inc) + .def("dec", &cinn::common::RefCount::Dec) + .def("is_zero", &cinn::common::RefCount::is_zero) + .def("to_string", &cinn::common::RefCount::to_string) + .def("val", &cinn::common::RefCount::val); } // TODO(wanghaipeng03) using true_type or false_type as tag disptcher losses @@ -240,8 +240,8 @@ inline void __binary_op_visitor_dispatch( } void BindCinnValue(py::module *m) { - using common::_CINNValuePack_; - using common::CINNValuePack; + using cinn::common::_CINNValuePack_; + using cinn::common::CINNValuePack; DefineShared<_CINNValuePack_>(m, "_CINNValuePack_"); @@ -259,7 +259,7 @@ void BindCinnValue(py::module *m) { .def("__len__", &_CINNValuePack_::size) .def("type_info", &_CINNValuePack_::type_info); - py::class_> + py::class_> cinn_value_pack_shared(*m, "CINNValuePack"); cinn_value_pack_shared.def(py::init<_CINNValuePack_ *>()) .def("__getitem__", diff --git a/paddle/cinn/pybind/framework.cc b/paddle/cinn/pybind/framework.cc index 752ac5003f43a2..fde1f7dd8eba00 100644 --- a/paddle/cinn/pybind/framework.cc +++ b/paddle/cinn/pybind/framework.cc @@ -51,15 +51,15 @@ void BindFramework(pybind11::module *m) { const std::vector &inputs, const std::vector &out_types, const std::vector> &output_shapes, - const common::Target &target) { + const cinn::common::Target &target) { const Operator *op_ptr = Operator::Get(key); auto impl = OpStrategy::SelectImpl( self[op_ptr](attrs, inputs, out_types, output_shapes, target)); - std::vector temp_inputs; + std::vector temp_inputs; std::vector res; for (auto &tensor : inputs) { res.push_back(tensor); - temp_inputs.push_back(common::CINNValue(tensor)); + temp_inputs.push_back(cinn::common::CINNValue(tensor)); } ir::LoweredFunc func; @@ -73,7 +73,7 @@ void BindFramework(pybind11::module *m) { std::vector funcs = hlir::framework::GetFuncFromImpl( impl, - common::CINNValuePack{temp_inputs}, + cinn::common::CINNValuePack{temp_inputs}, res, input_output_names, key, @@ -114,7 +114,7 @@ void BindFramework(pybind11::module *m) { .def("get_tensor", [](Scope &self, const std::string &name, const Target &target) { auto t = self.GetTensor(name); - py::dtype dt(common::Type2Str(t->type())); + py::dtype dt(cinn::common::Type2Str(t->type())); py::array::ShapeContainer shape(t->shape().data().begin(), t->shape().data().end()); py::array array(std::move(dt), std::move(shape)); @@ -140,8 +140,10 @@ void BindFramework(pybind11::module *m) { }) .def("var_names", &Scope::var_names); - py::class_>(*m, "SharedTensor"); - py::class_>(*m, "Tensor") + py::class_>(*m, + "SharedTensor"); + py::class_>(*m, + "Tensor") .def(py::init<>()) .def("shape", [](hlir::framework::Tensor &self) { return self->shape().data(); }) @@ -151,8 +153,9 @@ void BindFramework(pybind11::module *m) { }) .def( "numpy", - [](hlir::framework::Tensor &self, const common::Target &target) { - std::string type_str = common::Type2Str(self->type()); + [](hlir::framework::Tensor &self, + const cinn::common::Target &target) { + std::string type_str = cinn::common::Type2Str(self->type()); if (type_str == "bfloat16") { type_str = "uint16"; } @@ -183,8 +186,9 @@ void BindFramework(pybind11::module *m) { "from_numpy", [](hlir::framework::Tensor &self, py::array array, - const common::Target &target) { - CHECK(array.dtype().is(py::dtype(common::Type2Str(self->type())))) + const cinn::common::Target &target) { + CHECK(array.dtype().is( + py::dtype(cinn::common::Type2Str(self->type())))) << "currently only support float32 data type as input"; hlir::framework::shape_t shape; std::copy_n(array.shape(), array.ndim(), std::back_inserter(shape)); diff --git a/paddle/cinn/pybind/frontend.cc b/paddle/cinn/pybind/frontend.cc index aafa9bedf40d07..05e814ce107f80 100644 --- a/paddle/cinn/pybind/frontend.cc +++ b/paddle/cinn/pybind/frontend.cc @@ -41,7 +41,7 @@ #include "paddle/cinn/utils/timer.h" namespace cinn::pybind { -using common::Type; +using cinn::common::Type; using frontend::Placeholder; namespace py = pybind11; using namespace cinn::frontend; // NOLINT @@ -78,7 +78,8 @@ void BindFrontend(pybind11::module *m) { .def("id", [](Variable &self) { return self->id; }) .def("name", [](Variable &self) { return self->id; }) .def("shape", [](Variable &self) { return self->shape; }) - .def("type", [](Variable &self) { return common::Type2Str(self->type); }) + .def("type", + [](Variable &self) { return cinn::common::Type2Str(self->type); }) .def("set_type", [](Variable &self, const Type &type) { self->type = type; @@ -86,7 +87,7 @@ void BindFrontend(pybind11::module *m) { }) .def("set_type", [](Variable &self, const std::string &type) { - self->type = common::Str2Type(type); + self->type = cinn::common::Str2Type(type); return self; }) .def("set_shape", [](Variable &self, const std::vector &shape) { @@ -95,15 +96,16 @@ void BindFrontend(pybind11::module *m) { }); py::class_(*m, "Placeholder") // - .def(py::init &, absl::string_view>(), py::arg("type"), py::arg("shape"), py::arg("id") = "") .def("shape", &Placeholder::shape) - .def("type", - [](Placeholder &self) { return common::Type2Str(self.type()); }) + .def( + "type", + [](Placeholder &self) { return cinn::common::Type2Str(self.type()); }) .def("id", &Placeholder::id) .def("name", &Placeholder::id) .def("__str__", [](const Placeholder &self) { return self.id(); }); @@ -179,7 +181,7 @@ void BindFrontend(pybind11::module *m) { .def( "build_and_get_output", [](Program &self, - const common::Target &target, + const cinn::common::Target &target, const std::vector &tensor_inputs, const std::vector &input_data, const std::vector &tensor_outputs, @@ -263,7 +265,7 @@ void BindFrontend(pybind11::module *m) { .def("apply_pass", [](Program &self, const std::unordered_set &fetch_ids, - const common::Target &target, + const cinn::common::Target &target, const std::vector &passes = {}) { auto graph = Optimize(&self, fetch_ids, target, passes); return graph->fusion_groups.size(); @@ -294,7 +296,7 @@ void BindFrontend(pybind11::module *m) { .def( "test_benchmark", [](Program &self, - const common::Target &target, + const cinn::common::Target &target, const std::vector &tensor_inputs, const std::vector &input_data, const Variable &tensor_out, @@ -340,7 +342,7 @@ void BindFrontend(pybind11::module *m) { .def( "test_benchmark_with_code", [](Program &self, - const common::Target &target, + const cinn::common::Target &target, const std::vector &tensor_inputs, const std::vector &input_data, const Variable &tensor_out, @@ -485,7 +487,7 @@ void BindFrontend(pybind11::module *m) { // clang-format on .def(py::init(), py::arg("name") = "") .def("create_input", - static_cast &, const std::string &)>( &NetBuilder::CreateInput), @@ -843,7 +845,7 @@ void BindFrontend(pybind11::module *m) { // used always .def_static( "build_and_compile", - [](const common::Target &target, + [](const cinn::common::Target &target, NetBuilder &builder, const CinnComputation::CompileOptions &options) { return CinnComputation::BuildAndCompile(target, builder, options); @@ -853,7 +855,7 @@ void BindFrontend(pybind11::module *m) { py::arg("options") = CinnComputation::DefaultCompileOptions()) .def_static( "compile", - [](const common::Target &target, + [](const cinn::common::Target &target, Program &program, const CinnComputation::CompileOptions &options) { return CinnComputation::Compile(target, program, options); @@ -863,7 +865,7 @@ void BindFrontend(pybind11::module *m) { py::arg("options") = CinnComputation::DefaultCompileOptions()) .def_static( "compile_paddle_model", - [](const common::Target &target, + [](const cinn::common::Target &target, const std::string &model_path, const std::vector &input_names, const std::vector &input_shapes, @@ -888,7 +890,7 @@ void BindFrontend(pybind11::module *m) { py::class_(*m, "PaddleModelConvertor") .def(py::init<>()) - .def(py::init, std::shared_ptr>(), py::arg("target"), diff --git a/paddle/cinn/pybind/ir/ir.cc b/paddle/cinn/pybind/ir/ir.cc index f569bd2c973bee..6118f7c8a5e695 100644 --- a/paddle/cinn/pybind/ir/ir.cc +++ b/paddle/cinn/pybind/ir/ir.cc @@ -41,7 +41,7 @@ std::vector AxisMap(const std::string& kinds, // TODO(6clc): set bound of IterVar - Var iter_var = ir::_Var_::Make("iter_tmp", common::Int(32)); + Var iter_var = ir::_Var_::Make("iter_tmp", cinn::common::Int(32)); if (c == 'S') { iter_var->is_reduce_axis = false; } else if (c == 'R') { @@ -89,7 +89,7 @@ IRContext Sequential(Expr min, Expr extent) { ForContextNode* for_ctx_node = new ForContextNode(); for_ctx_node->min = min; for_ctx_node->extent = extent; - for_ctx_node->loop_var = ir::_Var_::Make("v", common::Int(32)); + for_ctx_node->loop_var = ir::_Var_::Make("v", cinn::common::Int(32)); return IRContext(for_ctx_node); } diff --git a/paddle/cinn/pybind/ir/ir_api.cc b/paddle/cinn/pybind/ir/ir_api.cc index 9d8320c31c7adf..b2e625e741ba62 100644 --- a/paddle/cinn/pybind/ir/ir_api.cc +++ b/paddle/cinn/pybind/ir/ir_api.cc @@ -135,8 +135,8 @@ void BindNode(py::module *m) { DefineShared(m, "IrNode"); // class IrNodeRef : public Shared - py::class_> ir_node_ref(*m, - "IrNodeRef"); + py::class_> ir_node_ref( + *m, "IrNodeRef"); ir_node_ref.def(py::init<>()) .def(py::init()) .def(py::init()) @@ -477,9 +477,9 @@ void BindIrIr(py::module *m) { py::class_ var(*m, "Var"); var.def(py::init<>()) .def(py::init()) - .def(py::init(), + .def(py::init(), arg("name_hint"), - arg("t") = common::type_of()) + arg("t") = cinn::common::type_of()) .def(py::init()) .def(py::init()) .def(py::init()) @@ -734,7 +734,7 @@ void BindIrTensor(py::module *m) { auto PackedFuncCall(lang::PackedFunc &self, py::args args) { // NOLINT lang::Args cinn_args; - using common::CINNValue; + using cinn::common::CINNValue; for (auto handle : args) { if (py::isinstance(handle)) { cinn_args.Append(CINNValue(py::cast(handle))); @@ -766,7 +766,9 @@ void BindPackedFunc(py::module *m) { [](lang::Args &self, int i) { return self[i]; }, py::return_value_policy::reference) .def("__setitem__", - [](lang::Args &self, int i, common::CINNValue &v) { self[i] = v; }); + [](lang::Args &self, int i, cinn::common::CINNValue &v) { + self[i] = v; + }); py::class_ packed_func(*m, "PackedFunc"); packed_func.def(py::init<>()) diff --git a/paddle/cinn/pybind/ir/ir_context.cc b/paddle/cinn/pybind/ir/ir_context.cc index 8af89d974222f1..8b4d0a4cf1e1d3 100644 --- a/paddle/cinn/pybind/ir/ir_context.cc +++ b/paddle/cinn/pybind/ir/ir_context.cc @@ -92,7 +92,7 @@ void IRBuilderNode::Reset() { } IRBuilder::IRBuilder() { - common::Shared n(new IRBuilderNode()); + cinn::common::Shared n(new IRBuilderNode()); n->Reset(); data_ = n; } diff --git a/paddle/cinn/pybind/ir/ir_context.h b/paddle/cinn/pybind/ir/ir_context.h index 89b65512e26664..8cdf0ed85c0818 100644 --- a/paddle/cinn/pybind/ir/ir_context.h +++ b/paddle/cinn/pybind/ir/ir_context.h @@ -29,7 +29,7 @@ namespace pybind { /** * A base context that represents the CINN IR that need context information */ -class IRContextNode : public common::Object { +class IRContextNode : public cinn::common::Object { public: std::vector exprs; @@ -60,7 +60,7 @@ class IRContext { void add_expr(Expr expr) { data_->exprs.push_back(expr); } public: - common::Shared data_; + cinn::common::Shared data_; public: template @@ -196,7 +196,7 @@ class ElseContextNode : public IRContextNode { /** * A stack used to store current IRContext */ -class IRBuilderNode : public common::Object { +class IRBuilderNode : public cinn::common::Object { public: std::vector contexts; Expr result; @@ -226,7 +226,7 @@ class IRBuilder { static IRBuilder CurrentIRBuilder(); public: - common::Shared data_; + cinn::common::Shared data_; }; std::vector* IRBuilderStack(); diff --git a/paddle/cinn/pybind/lang.cc b/paddle/cinn/pybind/lang.cc index 8e121fc5628c93..5f7a80e12e2c06 100644 --- a/paddle/cinn/pybind/lang.cc +++ b/paddle/cinn/pybind/lang.cc @@ -35,7 +35,7 @@ namespace py = pybind11; namespace cinn::pybind { -using common::Type; +using cinn::common::Type; using lang::Placeholder; using py::arg; using utils::GetStreamCnt; @@ -70,7 +70,7 @@ void BindLower(py::module *m) { arg("scalar_args") = std::vector(), arg("temp_tensors") = std::vector(), arg("b") = nullptr, - arg("target") = common::DefaultHostTarget(), + arg("target") = cinn::common::DefaultHostTarget(), arg("supprt_ir_schedule") = false); } @@ -84,7 +84,7 @@ void BindLowerVec(py::module *m) { arg("scalar_args") = std::vector(), arg("temp_tensors") = std::vector(), arg("b") = nullptr, - arg("target") = common::DefaultHostTarget(), + arg("target") = cinn::common::DefaultHostTarget(), arg("supprt_ir_schedule") = false); } @@ -144,13 +144,13 @@ void BindModule(py::module *m) { .def("submodules", &ir::Module::submodules) .def("compile", &ir::Module::Compile) .def("get_c_code", [](const ir::Module &self) -> std::string { - backends::CodeGenC codegen(common::DefaultHostTarget()); + backends::CodeGenC codegen(cinn::common::DefaultHostTarget()); codegen.SetInlineBuiltinCodes(false); return codegen.Compile(self, backends::CodeGenC::OutputKind::CImpl); }); py::class_ builder(module, "Builder"); - builder.def(py::init()) + builder.def(py::init()) .def("add_function", [](ir::Module::Builder &self, ir::LoweredFunc func) { if (self.GetTargetArch() == Target::Arch::NVGPU) { diff --git a/paddle/cinn/pybind/pe.cc b/paddle/cinn/pybind/pe.cc index 94204ae4b3e44c..2cd837ab2da3f4 100644 --- a/paddle/cinn/pybind/pe.cc +++ b/paddle/cinn/pybind/pe.cc @@ -26,7 +26,7 @@ namespace py = pybind11; namespace cinn { namespace pybind { -using common::Type; +using cinn::common::Type; using lang::Placeholder; using py::arg; using utils::GetStreamCnt; @@ -137,7 +137,7 @@ void BindPE(py::module* m) { py::arg("trans_b") = false, py::arg("alpha") = 1, py::arg("out") = "T_Matmul_mkl_out", - py::arg("target") = common::DefaultHostTarget()); + py::arg("target") = cinn::common::DefaultHostTarget()); } } // namespace pybind diff --git a/paddle/cinn/pybind/runtime.cc b/paddle/cinn/pybind/runtime.cc index a4d14edc709316..91db8af397ec29 100644 --- a/paddle/cinn/pybind/runtime.cc +++ b/paddle/cinn/pybind/runtime.cc @@ -76,9 +76,9 @@ cinn_buffer_t *CreateBufferFromNumpy(py::array data, cinn_buffer_t *CreateBufferFromNumpy( py::array data, - common::Target target = common::DefaultHostTarget(), + cinn::common::Target target = cinn::common::DefaultHostTarget(), int align = 0) { - if (target == common::DefaultHostTarget()) { + if (target == cinn::common::DefaultHostTarget()) { return CreateBufferFromNumpy(data, cinn_x86_device); } else if (target.arch == Target::Arch::NVGPU) { #ifdef CINN_WITH_CUDA @@ -276,7 +276,7 @@ void BindCinnRuntime(py::module *m) { arg("data"), arg("device"), arg("align") = 0) - .def(py::init(py::overload_cast( + .def(py::init(py::overload_cast( &CreateBufferFromNumpy)), arg("data"), arg("target"), diff --git a/paddle/cinn/runtime/cpu/cblas.cc b/paddle/cinn/runtime/cpu/cblas.cc index 8a9f7be63083cc..9e08c128cb66b0 100644 --- a/paddle/cinn/runtime/cpu/cblas.cc +++ b/paddle/cinn/runtime/cpu/cblas.cc @@ -137,14 +137,14 @@ void cinn_call_cholesky_host( CINN_REGISTER_HELPER(cinn_cpu_mkl) { using namespace cinn; // NOLINT using backends::FunctionProto; - auto host_target = common::DefaultHostTarget(); + auto host_target = cinn::common::DefaultHostTarget(); FunctionProto::shape_inference_t inference_shape_gemm = [](const std::vector& args, int offset) { CHECK_EQ(offset, 0UL) << "Only one output"; CHECK_EQ(args.size(), 12UL) << "Wrong number of arguments passed in"; - auto M = common::AutoSimplify(args[1]); - auto N = common::AutoSimplify(args[2]); + auto M = cinn::common::AutoSimplify(args[1]); + auto N = cinn::common::AutoSimplify(args[2]); std::vector shape; shape.push_back(M); shape.push_back(N); @@ -159,16 +159,16 @@ CINN_REGISTER_HELPER(cinn_cpu_mkl) { auto A_tensor = A.as_tensor(); CHECK(A_tensor); - auto batch_size = common::AutoSimplify(args[1]); + auto batch_size = cinn::common::AutoSimplify(args[1]); int32_t batch_size_val = batch_size.as_int32(); - auto M = common::AutoSimplify(args[2]); - auto N = common::AutoSimplify(args[3]); + auto M = cinn::common::AutoSimplify(args[2]); + auto N = cinn::common::AutoSimplify(args[3]); std::vector shape; int total = 1; for (auto& v : A_tensor->shape) { - auto val = common::AutoSimplify(v); + auto val = cinn::common::AutoSimplify(v); CHECK(val.is_constant()); shape.push_back(val); total *= val.as_int32(); diff --git a/paddle/cinn/runtime/cpu/host_intrinsics_test.cc b/paddle/cinn/runtime/cpu/host_intrinsics_test.cc index 22e13f8b0c3abf..f7f5ba7cb085d2 100644 --- a/paddle/cinn/runtime/cpu/host_intrinsics_test.cc +++ b/paddle/cinn/runtime/cpu/host_intrinsics_test.cc @@ -41,7 +41,7 @@ TEST(tanh, basic) { auto jit = backends::SimpleJIT::Create(); - ir::Module::Builder builder("module1", common::DefaultHostTarget()); + ir::Module::Builder builder("module1", cinn::common::DefaultHostTarget()); auto fn = Lower("fn", stages, {x, y}); LOG(INFO) << "fn:\n" << fn; @@ -54,13 +54,15 @@ TEST(tanh, basic) { auto fnp = reinterpret_cast(fn_ptr); ASSERT_TRUE(fnp); - auto* x_buf = common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) - .set_random() - .Build(); - auto* out_buf = common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) - .set_zero() - .Build(); - auto args = common::ArgsBuilder().Add(x_buf).Add(out_buf).Build(); + auto* x_buf = + cinn::common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) + .set_random() + .Build(); + auto* out_buf = + cinn::common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) + .set_zero() + .Build(); + auto args = cinn::common::ArgsBuilder().Add(x_buf).Add(out_buf).Build(); fnp(args.data(), args.size()); auto* x_buf_data = reinterpret_cast(x_buf->memory); @@ -87,7 +89,7 @@ TEST(find_value_nd, basic) { auto jit = backends::SimpleJIT::Create(); - ir::Module::Builder builder("module1", common::DefaultHostTarget()); + ir::Module::Builder builder("module1", cinn::common::DefaultHostTarget()); auto fn = Lower("fn", stages, {x, y}); LOG(INFO) << "fn:\n" << fn; @@ -100,12 +102,13 @@ TEST(find_value_nd, basic) { auto fnp = reinterpret_cast(fn_ptr); ASSERT_TRUE(fnp); - auto* x_buf = common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) - .set_random() - .Build(); + auto* x_buf = + cinn::common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) + .set_random() + .Build(); auto* out_buf = - common::BufferBuilder(Int(32), {N.as_int32()}).set_zero().Build(); - auto args = common::ArgsBuilder().Add(x_buf).Add(out_buf).Build(); + cinn::common::BufferBuilder(Int(32), {N.as_int32()}).set_zero().Build(); + auto args = cinn::common::ArgsBuilder().Add(x_buf).Add(out_buf).Build(); fnp(args.data(), args.size()); auto* x_buf_data = reinterpret_cast(x_buf->memory); @@ -135,7 +138,7 @@ TEST(cinn_host_lt_num_fp32, basic) { auto jit = backends::SimpleJIT::Create(); - ir::Module::Builder builder("module1", common::DefaultHostTarget()); + ir::Module::Builder builder("module1", cinn::common::DefaultHostTarget()); auto fn = Lower("fn", stages, {x, y}); LOG(INFO) << "fn:\n" << fn; @@ -148,12 +151,13 @@ TEST(cinn_host_lt_num_fp32, basic) { auto fnp = reinterpret_cast(fn_ptr); ASSERT_TRUE(fnp); - auto* x_buf = common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) - .set_random() - .Build(); + auto* x_buf = + cinn::common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) + .set_random() + .Build(); auto* out_buf = - common::BufferBuilder(Int(32), {N.as_int32()}).set_zero().Build(); - auto args = common::ArgsBuilder().Add(x_buf).Add(out_buf).Build(); + cinn::common::BufferBuilder(Int(32), {N.as_int32()}).set_zero().Build(); + auto args = cinn::common::ArgsBuilder().Add(x_buf).Add(out_buf).Build(); fnp(args.data(), args.size()); auto* x_buf_data = reinterpret_cast(x_buf->memory); @@ -186,7 +190,7 @@ TEST(cinn_host_gt_num_fp32, basic) { auto jit = backends::SimpleJIT::Create(); - ir::Module::Builder builder("module1", common::DefaultHostTarget()); + ir::Module::Builder builder("module1", cinn::common::DefaultHostTarget()); auto fn = Lower("fn", stages, {x, y}); LOG(INFO) << "fn:\n" << fn; @@ -199,12 +203,13 @@ TEST(cinn_host_gt_num_fp32, basic) { auto fnp = reinterpret_cast(fn_ptr); ASSERT_TRUE(fnp); - auto* x_buf = common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) - .set_random() - .Build(); + auto* x_buf = + cinn::common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) + .set_random() + .Build(); auto* out_buf = - common::BufferBuilder(Int(32), {N.as_int32()}).set_zero().Build(); - auto args = common::ArgsBuilder().Add(x_buf).Add(out_buf).Build(); + cinn::common::BufferBuilder(Int(32), {N.as_int32()}).set_zero().Build(); + auto args = cinn::common::ArgsBuilder().Add(x_buf).Add(out_buf).Build(); fnp(args.data(), args.size()); auto* x_buf_data = reinterpret_cast(x_buf->memory); diff --git a/paddle/cinn/runtime/cpu/mkl_math_test.cc b/paddle/cinn/runtime/cpu/mkl_math_test.cc index f91a76ddd54114..d064535d940c18 100644 --- a/paddle/cinn/runtime/cpu/mkl_math_test.cc +++ b/paddle/cinn/runtime/cpu/mkl_math_test.cc @@ -33,11 +33,13 @@ cinn_buffer_t *CreateBuffer(const std::vector shape, bool random = true, int set_value = 0) { if (random) { - return common::BufferBuilder(Float(32), shape).set_random().Build(); + return cinn::common::BufferBuilder(Float(32), shape).set_random().Build(); } else if (set_value != 0) { - return common::BufferBuilder(Float(32), shape).set_val(set_value).Build(); + return cinn::common::BufferBuilder(Float(32), shape) + .set_val(set_value) + .Build(); } - return common::BufferBuilder(Float(32), shape).set_zero().Build(); + return cinn::common::BufferBuilder(Float(32), shape).set_zero().Build(); } template @@ -74,7 +76,7 @@ void TestCallElementwise(const std::string &fn_name, auto stages = CreateStages(lower_args); - auto target = common::DefaultHostTarget(); + auto target = cinn::common::DefaultHostTarget(); target.arch = Target::Arch::X86; ir::Module::Builder builder("module0", target); auto func = Lower("fn", stages, lower_args); @@ -96,8 +98,9 @@ void TestCallElementwise(const std::string &fn_name, } else { A_buf = CreateBuffer({10, 10}); } - auto *B_buf = - common::BufferBuilder(type, {10, 10}).set_align(type.bits()).Build(); + auto *B_buf = cinn::common::BufferBuilder(type, {10, 10}) + .set_align(type.bits()) + .Build(); cinn_pod_value_t a_arg(A_buf), b_arg(B_buf); cinn_pod_value_t args[] = {a_arg, b_arg}; @@ -183,18 +186,18 @@ TEST(cinn_cpu_mkl_gemm_fp32, test) { [=]() -> Expr { return lang::CallExtern("cinn_cpu_mkl_gemm_fp32", { - common::make_one(), // alpha - M, // M - N, // N - K, // K - common::make_bool(false), // ta - common::make_bool(false), // tb - K, // lda - N, // ldb - N, // ldc - common::make_zero(), // beta - A.tensor(), // A - B.tensor(), // B + cinn::common::make_one(), // alpha + M, // M + N, // N + K, // K + cinn::common::make_bool(false), // ta + cinn::common::make_bool(false), // tb + K, // lda + N, // ldb + N, // ldc + cinn::common::make_zero(), // beta + A.tensor(), // A + B.tensor(), // B }); }, "extern_call"); @@ -204,7 +207,7 @@ TEST(cinn_cpu_mkl_gemm_fp32, test) { auto stages = CreateStages({call, out}); - auto target = common::DefaultHostTarget(); + auto target = cinn::common::DefaultHostTarget(); target.arch = Target::Arch::X86; ir::Module::Builder builder("module0", target); @@ -221,17 +224,21 @@ TEST(cinn_cpu_mkl_gemm_fp32, test) { auto fn_ptr = reinterpret_cast(fn); // test with real data - auto *A_buf = common::BufferBuilder(Float(32), {M.as_int32(), K.as_int32()}) - .set_random() - .Build(); - auto *B_buf = common::BufferBuilder(Float(32), {K.as_int32(), N.as_int32()}) - .set_random() - .Build(); - auto *C_buf = common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) - .set_zero() - .Build(); - - auto args = common::ArgsBuilder().Add(A_buf).Add(B_buf).Add(C_buf).Build(); + auto *A_buf = + cinn::common::BufferBuilder(Float(32), {M.as_int32(), K.as_int32()}) + .set_random() + .Build(); + auto *B_buf = + cinn::common::BufferBuilder(Float(32), {K.as_int32(), N.as_int32()}) + .set_random() + .Build(); + auto *C_buf = + cinn::common::BufferBuilder(Float(32), {M.as_int32(), N.as_int32()}) + .set_zero() + .Build(); + + auto args = + cinn::common::ArgsBuilder().Add(A_buf).Add(B_buf).Add(C_buf).Build(); fn_ptr(args.data(), args.size()); diff --git a/paddle/cinn/runtime/cpu/mkldnn_math.cc b/paddle/cinn/runtime/cpu/mkldnn_math.cc index 8154ee538ad739..b45ddedd2e890a 100644 --- a/paddle/cinn/runtime/cpu/mkldnn_math.cc +++ b/paddle/cinn/runtime/cpu/mkldnn_math.cc @@ -157,23 +157,23 @@ void cinn_cpu_mkldnn_conv2d_nchw_fp32(int batch_size, CINN_REGISTER_HELPER(cinn_cpu_mkldnn) { using namespace cinn; // NOLINT using backends::FunctionProto; - auto host_target = common::DefaultHostTarget(); + auto host_target = cinn::common::DefaultHostTarget(); FunctionProto::shape_inference_t inference_shape_conv2d_nchw = [](const std::vector& args, int offset) { CHECK_EQ(args.size(), 16UL) << "Wrong number of arguments passed in"; - auto N = common::AutoSimplify(args[0]); - int input_h = common::AutoSimplify(args[2]).as_int32(); - int input_w = common::AutoSimplify(args[3]).as_int32(); - auto c_out = common::AutoSimplify(args[4]); - int filter_h = common::AutoSimplify(args[6]).as_int32(); - int filter_w = common::AutoSimplify(args[7]).as_int32(); - int pad_h = common::AutoSimplify(args[8]).as_int32(); - int pad_w = common::AutoSimplify(args[9]).as_int32(); - int stride_h = common::AutoSimplify(args[10]).as_int32(); - int stride_w = common::AutoSimplify(args[11]).as_int32(); - int dilation_h = common::AutoSimplify(args[12]).as_int32(); - int dilation_w = common::AutoSimplify(args[13]).as_int32(); + auto N = cinn::common::AutoSimplify(args[0]); + int input_h = cinn::common::AutoSimplify(args[2]).as_int32(); + int input_w = cinn::common::AutoSimplify(args[3]).as_int32(); + auto c_out = cinn::common::AutoSimplify(args[4]); + int filter_h = cinn::common::AutoSimplify(args[6]).as_int32(); + int filter_w = cinn::common::AutoSimplify(args[7]).as_int32(); + int pad_h = cinn::common::AutoSimplify(args[8]).as_int32(); + int pad_w = cinn::common::AutoSimplify(args[9]).as_int32(); + int stride_h = cinn::common::AutoSimplify(args[10]).as_int32(); + int stride_w = cinn::common::AutoSimplify(args[11]).as_int32(); + int dilation_h = cinn::common::AutoSimplify(args[12]).as_int32(); + int dilation_w = cinn::common::AutoSimplify(args[13]).as_int32(); int out_h = (input_h - ((filter_h - 1) * dilation_h + 1) + 2 * pad_h) / stride_h + 1; diff --git a/paddle/cinn/runtime/cpu/mkldnn_math_test.cc b/paddle/cinn/runtime/cpu/mkldnn_math_test.cc index 26d06d715d5504..15574a90280427 100644 --- a/paddle/cinn/runtime/cpu/mkldnn_math_test.cc +++ b/paddle/cinn/runtime/cpu/mkldnn_math_test.cc @@ -33,11 +33,13 @@ cinn_buffer_t *CreateBuffer(const std::vector shape, bool random = true, int set_value = 0) { if (random) { - return common::BufferBuilder(Float(32), shape).set_random().Build(); + return cinn::common::BufferBuilder(Float(32), shape).set_random().Build(); } else if (set_value != 0) { - return common::BufferBuilder(Float(32), shape).set_val(set_value).Build(); + return cinn::common::BufferBuilder(Float(32), shape) + .set_val(set_value) + .Build(); } - return common::BufferBuilder(Float(32), shape).set_zero().Build(); + return cinn::common::BufferBuilder(Float(32), shape).set_zero().Build(); } TEST(cinn_cpu_mkldnn_conv2d_nchw_fp32, test) { @@ -90,7 +92,7 @@ TEST(cinn_cpu_mkldnn_conv2d_nchw_fp32, test) { auto stages = CreateStages({call, out}); - auto target = common::DefaultHostTarget(); + auto target = cinn::common::DefaultHostTarget(); target.arch = Target::Arch::X86; ir::Module::Builder builder("module0", target); @@ -109,16 +111,18 @@ TEST(cinn_cpu_mkldnn_conv2d_nchw_fp32, test) { // test with real data int o_h = (i_h - ((k_h - 1) * dilation_h + 1) + pad_h * 2) / stride_h + 1; int o_w = (i_w - ((k_w - 1) * dilation_w + 1) + pad_w * 2) / stride_w + 1; - auto *A_buf = common::BufferBuilder(Float(32), {n, c_in, i_h, i_w}) + auto *A_buf = cinn::common::BufferBuilder(Float(32), {n, c_in, i_h, i_w}) .set_random() .Build(); - auto *B_buf = common::BufferBuilder(Float(32), {c_out, c_in, k_h, k_w}) + auto *B_buf = cinn::common::BufferBuilder(Float(32), {c_out, c_in, k_h, k_w}) .set_random() .Build(); - auto *C_buf = - common::BufferBuilder(Float(32), {n, c_out, o_h, o_w}).set_zero().Build(); + auto *C_buf = cinn::common::BufferBuilder(Float(32), {n, c_out, o_h, o_w}) + .set_zero() + .Build(); - auto args = common::ArgsBuilder().Add(A_buf).Add(B_buf).Add(C_buf).Build(); + auto args = + cinn::common::ArgsBuilder().Add(A_buf).Add(B_buf).Add(C_buf).Build(); fn_ptr(args.data(), args.size()); diff --git a/paddle/cinn/runtime/cpu/thread_backend.cc b/paddle/cinn/runtime/cpu/thread_backend.cc index c6c49dfe5d5052..43804e33b1e60b 100644 --- a/paddle/cinn/runtime/cpu/thread_backend.cc +++ b/paddle/cinn/runtime/cpu/thread_backend.cc @@ -64,7 +64,7 @@ int cinn_backend_parallel_launch(FCINNParallelLambda flambda, CINN_REGISTER_HELPER(cinn_backend_parallel) { using namespace cinn; // NOLINT using backends::FunctionProto; - auto host_target = common::DefaultHostTarget(); + auto host_target = cinn::common::DefaultHostTarget(); backends::GlobalSymbolRegistry::Global().RegisterFn( runtime::intrinsic::parallel_launch, reinterpret_cast(&cinn_backend_parallel_launch)); diff --git a/paddle/cinn/runtime/cuda/cublas_util.h b/paddle/cinn/runtime/cuda/cublas_util.h index edb3d60e8a1a3c..bdd21dafed544f 100644 --- a/paddle/cinn/runtime/cuda/cublas_util.h +++ b/paddle/cinn/runtime/cuda/cublas_util.h @@ -91,8 +91,8 @@ inline cublasStatus_t cublasGemm(cudaDataType_t dtype, CUBLAS_COMPUTE_32F, CUBLAS_GEMM_DEFAULT_TENSOR_OP); #else - common::float16 alpha_fp16{alpha}; - common::float16 beta_fp16{beta}; + cinn::common::float16 alpha_fp16{alpha}; + cinn::common::float16 beta_fp16{beta}; return cublasHgemm(handle, transa, transb, @@ -221,8 +221,8 @@ inline cublasStatus_t cublasGemmStridedBatched(cudaDataType_t dtype, CUBLAS_COMPUTE_32F, CUBLAS_GEMM_DEFAULT_TENSOR_OP); #else - common::float16 alpha_fp16{alpha}; - common::float16 beta_fp16{beta}; + cinn::common::float16 alpha_fp16{alpha}; + cinn::common::float16 beta_fp16{beta}; return cublasHgemmStridedBatched( handle, transa, diff --git a/paddle/cinn/runtime/cuda/cuda_module_test.cc b/paddle/cinn/runtime/cuda/cuda_module_test.cc index 2b0ff0c89c72a1..fe41a1ed0ca2e0 100644 --- a/paddle/cinn/runtime/cuda/cuda_module_test.cc +++ b/paddle/cinn/runtime/cuda/cuda_module_test.cc @@ -51,7 +51,7 @@ void saxpy(float a, float *x, float *y, float *out, size_t n) } TEST(CUDAModule, float16) { - using common::float16; + using cinn::common::float16; using runtime::cuda::util::Vector; auto generate_ptx = [] { @@ -120,7 +120,7 @@ TEST(CUDAModule, float16) { } TEST(CUDAModule, bfloat16) { - using common::bfloat16; + using cinn::common::bfloat16; using runtime::cuda::util::Vector; auto generate_ptx = [] { diff --git a/paddle/cinn/runtime/cuda/cuda_util.cc b/paddle/cinn/runtime/cuda/cuda_util.cc index 6509fb33dbeb49..326e5a3aac561d 100644 --- a/paddle/cinn/runtime/cuda/cuda_util.cc +++ b/paddle/cinn/runtime/cuda/cuda_util.cc @@ -191,7 +191,7 @@ void cinn_call_cublas(void *v_args, bool is_float = type_code == cinn_type_float; bool is_bfloat16 = type_code == cinn_type_bfloat; int bytes = args[0].operator cinn_buffer_t *()->type.bits / CHAR_BIT; - if (is_float && bytes == sizeof(common::float16)) { + if (is_float && bytes == sizeof(cinn::common::float16)) { cuda_dtype = CUDA_R_16F; } else if (is_float && bytes == sizeof(float)) { cuda_dtype = CUDA_R_32F; @@ -413,7 +413,7 @@ void cinn_call_batched_cublas(void *v_args, bool is_float = type_code == cinn_type_float; bool is_bfloat16 = type_code == cinn_type_bfloat; int bytes = args[0].operator cinn_buffer_t *()->type.bits / CHAR_BIT; - if (is_float && bytes == sizeof(common::float16)) { + if (is_float && bytes == sizeof(cinn::common::float16)) { cuda_dtype = CUDA_R_16F; } else if (is_float && bytes == sizeof(float)) { cuda_dtype = CUDA_R_32F; @@ -1841,7 +1841,7 @@ void cinn_assert_true_nvgpu( msg, only_warning, stream, - common::DefaultNVGPUTarget()); + cinn::common::DefaultNVGPUTarget()); } void cinn_gpu_cublas_mul(const std::vector &attrs, @@ -2172,11 +2172,11 @@ void cinn_gpu_cudnn_conv2d(const absl::flat_hash_map &attr, cinn_buffer_t *w, cinn_buffer_t *y, cudaStream_t stream, - common::Layout target) { + cinn::common::Layout target) { cudnnTensorFormat_t cudnn_tensor_format; - if (target == common::Layout::kNCHW) { + if (target == cinn::common::Layout::kNCHW) { cudnn_tensor_format = CUDNN_TENSOR_NCHW; - } else if (target == common::Layout::kNHWC) { + } else if (target == cinn::common::Layout::kNHWC) { cudnn_tensor_format = CUDNN_TENSOR_NHWC; } else { CINN_NOT_IMPLEMENTED diff --git a/paddle/cinn/runtime/cuda/cuda_util.h b/paddle/cinn/runtime/cuda/cuda_util.h index ec7f2ca6a88352..7ea9dbe00a2c5b 100644 --- a/paddle/cinn/runtime/cuda/cuda_util.h +++ b/paddle/cinn/runtime/cuda/cuda_util.h @@ -150,12 +150,13 @@ void cinn_call_batched_cublas(void* v_args, void* stream); #ifdef CINN_WITH_CUDNN -void cinn_gpu_cudnn_conv2d(const absl::flat_hash_map& attr, - cinn_buffer_t* x, - cinn_buffer_t* w, - cinn_buffer_t* y, - cudaStream_t stream = nullptr, - common::Layout target = common::Layout::kNCHW); +void cinn_gpu_cudnn_conv2d( + const absl::flat_hash_map& attr, + cinn_buffer_t* x, + cinn_buffer_t* w, + cinn_buffer_t* y, + cudaStream_t stream = nullptr, + cinn::common::Layout target = cinn::common::Layout::kNCHW); void cinn_gpu_cudnn_conv2d_backward_data( const absl::flat_hash_map& attr, diff --git a/paddle/cinn/runtime/custom_function.cc b/paddle/cinn/runtime/custom_function.cc index dda7430b9e336f..08fe5c1bd7f351 100644 --- a/paddle/cinn/runtime/custom_function.cc +++ b/paddle/cinn/runtime/custom_function.cc @@ -27,7 +27,7 @@ PD_DECLARE_string(cinn_check_fusion_accuracy_pass); namespace cinn { namespace runtime { -using common::Target; +using cinn::common::Target; using hlir::framework::Shape; using hlir::framework::Tensor; @@ -104,7 +104,7 @@ bool MemcpyToHost(void* dst, size_t bytes, const Target& input_target, void* stream = nullptr) { - if (input_target == common::DefaultNVGPUTarget()) { + if (input_target == cinn::common::DefaultNVGPUTarget()) { #ifdef CINN_WITH_CUDA const auto& cuda_stream = static_cast(stream); cudaMemcpyAsync(dst, src, bytes, cudaMemcpyDeviceToHost, cuda_stream); @@ -116,7 +116,7 @@ bool MemcpyToHost(void* dst, return false; #endif } - if (input_target == common::DefaultHostTarget()) { + if (input_target == cinn::common::DefaultHostTarget()) { memcpy(dst, src, bytes); return true; } @@ -132,14 +132,14 @@ bool MemcpyToDevice(void* dst, const Target& input_target, void* stream = nullptr) { #ifdef CINN_WITH_CUDA - if (input_target == common::DefaultNVGPUTarget()) { + if (input_target == cinn::common::DefaultNVGPUTarget()) { cudaMemcpyAsync(dst, src, bytes, cudaMemcpyDeviceToDevice, static_cast(stream)); return true; - } else if (input_target == common::DefaultHostTarget()) { + } else if (input_target == cinn::common::DefaultHostTarget()) { cudaMemcpyAsync(dst, src, bytes, @@ -223,7 +223,7 @@ void cinn_assert_true(void* v_args, Tensor cpu_tensor; cpu_tensor->Resize(Shape(shape)); - bool* dst = cpu_tensor->mutable_data(common::DefaultHostTarget()); + bool* dst = cpu_tensor->mutable_data(cinn::common::DefaultHostTarget()); // copy data from gpu to cpu const bool* src = reinterpret_cast(x->memory); @@ -236,7 +236,7 @@ void cinn_assert_true(void* v_args, utils::AssertTrueMsgTool::GetInstance()->GetMsg(msg), target); - if (target == common::DefaultNVGPUTarget()) { + if (target == cinn::common::DefaultNVGPUTarget()) { utils::MemcpyToDevice( output->memory, x->memory, numel * sizeof(bool), target, stream); } else { diff --git a/paddle/cinn/runtime/custom_function_test.cc b/paddle/cinn/runtime/custom_function_test.cc index df88a0e4b817b5..546599f252cc2f 100644 --- a/paddle/cinn/runtime/custom_function_test.cc +++ b/paddle/cinn/runtime/custom_function_test.cc @@ -45,7 +45,7 @@ class CinnBufferAllocHelper { template T* mutable_data(const Target& target) { - if (target_ != common::UnkTarget()) { + if (target_ != cinn::common::UnkTarget()) { CHECK_EQ(target, target_) << "Cannot alloc twice, the memory had alloced at " << target_ << "! Please check."; @@ -53,9 +53,9 @@ class CinnBufferAllocHelper { } target_ = target; - if (target == common::DefaultHostTarget()) { + if (target == cinn::common::DefaultHostTarget()) { cinn_buffer_malloc(nullptr, buffer_); - } else if (target == common::DefaultNVGPUTarget()) { + } else if (target == cinn::common::DefaultNVGPUTarget()) { #ifdef CINN_WITH_CUDA cudaMalloc(&buffer_->memory, buffer_->num_elements() * sizeof(T)); #else @@ -72,7 +72,7 @@ class CinnBufferAllocHelper { template const T* data() { - if (target_ == common::UnkTarget()) { + if (target_ == cinn::common::UnkTarget()) { LOG(FATAL) << "No memory had alloced! Please check."; } return reinterpret_cast(buffer_->memory); @@ -80,11 +80,11 @@ class CinnBufferAllocHelper { ~CinnBufferAllocHelper() { if (buffer_) { - if (target_ == common::UnkTarget()) { + if (target_ == cinn::common::UnkTarget()) { // pass - } else if (target_ == common::DefaultHostTarget()) { + } else if (target_ == cinn::common::DefaultHostTarget()) { cinn_buffer_free(nullptr, buffer_); - } else if (target_ == common::DefaultNVGPUTarget()) { + } else if (target_ == cinn::common::DefaultNVGPUTarget()) { #ifdef CINN_WITH_CUDA cudaFree(buffer_->memory); #else @@ -105,7 +105,7 @@ class CinnBufferAllocHelper { private: cinn_buffer_t* buffer_{nullptr}; - Target target_{common::UnkTarget()}; + Target target_{cinn::common::UnkTarget()}; }; template @@ -113,11 +113,11 @@ void SetInputValue(T* input, const T* input_h, size_t num, const Target& target) { - if (target == common::DefaultHostTarget()) { + if (target == cinn::common::DefaultHostTarget()) { for (int i = 0; i < num; ++i) { input[i] = input_h[i]; } - } else if (target == common::DefaultNVGPUTarget()) { + } else if (target == cinn::common::DefaultNVGPUTarget()) { #ifdef CINN_WITH_CUDA cudaMemcpy(input, input_h, num * sizeof(T), cudaMemcpyHostToDevice); #else @@ -128,7 +128,7 @@ void SetInputValue(T* input, } TEST(CinnAssertTrue, test_true) { - Target target = common::DefaultTarget(); + Target target = cinn::common::DefaultTarget(); CinnBufferAllocHelper x(cinn_x86_device, cinn_bool_t(), {1}); @@ -151,10 +151,10 @@ TEST(CinnAssertTrue, test_true) { cinn::runtime::utils::AssertTrueMsgTool::GetInstance()->SetMsg(msg_key, msg); cinn_assert_true(v_args, 2, msg_key, true, nullptr, target); - if (target == common::DefaultHostTarget()) { + if (target == cinn::common::DefaultHostTarget()) { ASSERT_EQ(input[0], output[0]) << "The output of AssertTrue should be the same as input"; - } else if (target == common::DefaultNVGPUTarget()) { + } else if (target == cinn::common::DefaultNVGPUTarget()) { #ifdef CINN_WITH_CUDA bool output_h = false; cudaMemcpy(&output_h, output, sizeof(bool), cudaMemcpyDeviceToHost); @@ -166,7 +166,7 @@ TEST(CinnAssertTrue, test_true) { } TEST(CinnAssertTrue, test_false_only_warning) { - Target target = common::DefaultTarget(); + Target target = cinn::common::DefaultTarget(); CinnBufferAllocHelper x(cinn_x86_device, cinn_bool_t(), {1}); @@ -189,10 +189,10 @@ TEST(CinnAssertTrue, test_false_only_warning) { cinn::runtime::utils::AssertTrueMsgTool::GetInstance()->SetMsg(msg_key, msg); cinn_assert_true(v_args, 2, msg_key, true, nullptr, target); - if (target == common::DefaultHostTarget()) { + if (target == cinn::common::DefaultHostTarget()) { ASSERT_EQ(input[0], output[0]) << "The output of AssertTrue should be the same as input"; - } else if (target == common::DefaultNVGPUTarget()) { + } else if (target == cinn::common::DefaultNVGPUTarget()) { #ifdef CINN_WITH_CUDA bool output_h = false; cudaMemcpy(&output_h, output, sizeof(bool), cudaMemcpyDeviceToHost); @@ -204,7 +204,7 @@ TEST(CinnAssertTrue, test_false_only_warning) { } TEST(CustomCallGaussianRandom, test_target_nvgpu) { - Target target = common::DefaultTarget(); + Target target = cinn::common::DefaultTarget(); // Arg mean float mean = 0.0f; @@ -220,9 +220,9 @@ TEST(CustomCallGaussianRandom, test_target_nvgpu) { int num_args = 1; cinn_pod_value_t v_args[1] = {cinn_pod_value_t(out.get())}; - if (target == common::DefaultHostTarget()) { + if (target == cinn::common::DefaultHostTarget()) { LOG(INFO) << "Op gaussian random only support on NVGPU"; - } else if (target == common::DefaultNVGPUTarget()) { + } else if (target == cinn::common::DefaultNVGPUTarget()) { #ifdef CINN_WITH_CUDA cinn::runtime::cuda::cinn_call_gaussian_random( v_args, num_args, mean, std, seed, nullptr); @@ -240,7 +240,7 @@ TEST(CustomCallGaussianRandom, test_target_nvgpu) { } TEST(CustomCallUniformRandom, test_target_nvgpu) { - Target target = common::DefaultTarget(); + Target target = cinn::common::DefaultTarget(); // Arg min float min = -1.0f; @@ -256,9 +256,9 @@ TEST(CustomCallUniformRandom, test_target_nvgpu) { int num_args = 1; cinn_pod_value_t v_args[1] = {cinn_pod_value_t(out.get())}; - if (target == common::DefaultHostTarget()) { + if (target == cinn::common::DefaultHostTarget()) { LOG(INFO) << "Op uniform random only support on NVGPU"; - } else if (target == common::DefaultNVGPUTarget()) { + } else if (target == cinn::common::DefaultNVGPUTarget()) { #ifdef CINN_WITH_CUDA cinn::runtime::cuda::cinn_call_uniform_random( v_args, num_args, min, max, seed, nullptr); @@ -276,7 +276,7 @@ TEST(CustomCallUniformRandom, test_target_nvgpu) { } TEST(CustomCallCholesky, test) { - Target target = common::DefaultTarget(); + Target target = cinn::common::DefaultTarget(); // Batch size int batch_size = 1; @@ -331,7 +331,7 @@ TEST(CustomCallCholesky, test) { cinn_pod_value_t v_args[2] = {cinn_pod_value_t(x.get()), cinn_pod_value_t(out.get())}; - if (target == common::DefaultHostTarget()) { + if (target == cinn::common::DefaultHostTarget()) { #ifdef CINN_WITH_MKL_CBLAS cinn_call_cholesky_host(v_args, num_args, batch_size, m, upper); for (int i = 0; i < batch_size * m * m; i++) { @@ -342,7 +342,7 @@ TEST(CustomCallCholesky, test) { LOG(INFO) << "Host Target only support on flag CINN_WITH_MKL_CBLAS ON! " "Please check."; #endif - } else if (target == common::DefaultNVGPUTarget()) { + } else if (target == cinn::common::DefaultNVGPUTarget()) { #ifdef CINN_WITH_CUDA cinn::runtime::cuda::cinn_call_cholesky_nvgpu( v_args, num_args, batch_size, m, upper); @@ -364,7 +364,7 @@ TEST(CustomCallCholesky, test) { #ifdef CINN_WITH_CUDA TEST(CustomCallTriangularSolve, test) { - Target target = common::DefaultNVGPUTarget(); + Target target = cinn::common::DefaultNVGPUTarget(); int batch_size = 1; int m = 3; diff --git a/paddle/cinn/runtime/flags.cc b/paddle/cinn/runtime/flags.cc index eb93d3442684b4..ac41a22f445623 100644 --- a/paddle/cinn/runtime/flags.cc +++ b/paddle/cinn/runtime/flags.cc @@ -304,10 +304,11 @@ bool IsCompiledWithCUDNN() { #endif } -common::Target CurrentTarget::target_ = common::DefaultTarget(); +cinn::common::Target CurrentTarget::target_ = cinn::common::DefaultTarget(); -void CurrentTarget::SetCurrentTarget(const common::Target& target) { - if (!IsCompiledWithCUDA() && target.arch == common::Target::Arch::NVGPU) { +void CurrentTarget::SetCurrentTarget(const cinn::common::Target& target) { + if (!IsCompiledWithCUDA() && + target.arch == cinn::common::Target::Arch::NVGPU) { LOG(FATAL) << "Current CINN version does not support NVGPU, please try to " "recompile with -DWITH_CUDA."; } else { @@ -315,7 +316,7 @@ void CurrentTarget::SetCurrentTarget(const common::Target& target) { } } -common::Target& CurrentTarget::GetCurrentTarget() { return target_; } +cinn::common::Target& CurrentTarget::GetCurrentTarget() { return target_; } } // namespace runtime } // namespace cinn diff --git a/paddle/cinn/runtime/flags.h b/paddle/cinn/runtime/flags.h index e75af97a2a276d..ef7225c281c03d 100644 --- a/paddle/cinn/runtime/flags.h +++ b/paddle/cinn/runtime/flags.h @@ -47,15 +47,15 @@ bool IsCompiledWithCUDNN(); class CurrentTarget { public: - static common::Target &GetCurrentTarget(); - static void SetCurrentTarget(const common::Target &target); + static cinn::common::Target &GetCurrentTarget(); + static void SetCurrentTarget(const cinn::common::Target &target); private: CurrentTarget() = default; CurrentTarget(const CurrentTarget &) = delete; CurrentTarget &operator=(const CurrentTarget &) = delete; - static common::Target target_; + static cinn::common::Target target_; }; } // namespace runtime diff --git a/paddle/cinn/runtime/intrinsic.cc b/paddle/cinn/runtime/intrinsic.cc index 41e12331650b68..eb68cb5637cf3d 100644 --- a/paddle/cinn/runtime/intrinsic.cc +++ b/paddle/cinn/runtime/intrinsic.cc @@ -25,7 +25,7 @@ using cinn::common::float16; cinn_type_t ToRuntimeType(Type type) { #define SET_TYPE_CASE_ITEM(compiled_type, runtime_type) \ - if (type == common::compiled_type()) { \ + if (type == cinn::common::compiled_type()) { \ return runtime_type(); \ } diff --git a/paddle/cinn/utils/data_util.cc b/paddle/cinn/utils/data_util.cc index 5066395305f756..ddd8a451e8ffb5 100644 --- a/paddle/cinn/utils/data_util.cc +++ b/paddle/cinn/utils/data_util.cc @@ -19,7 +19,7 @@ namespace cinn { void SetRandInt(hlir::framework::Tensor tensor, - const common::Target& target, + const cinn::common::Target& target, int seed, int low, int high) { @@ -37,7 +37,7 @@ void SetRandInt(hlir::framework::Tensor tensor, auto* data = tensor->mutable_data(target); #ifdef CINN_WITH_CUDA - if (target == common::DefaultNVGPUTarget()) { + if (target == cinn::common::DefaultNVGPUTarget()) { cudaMemcpy(data, random_data.data(), num_ele * sizeof(int), @@ -45,13 +45,13 @@ void SetRandInt(hlir::framework::Tensor tensor, return; } #endif - CHECK(target == common::DefaultHostTarget()); + CHECK(target == cinn::common::DefaultHostTarget()); std::copy(random_data.begin(), random_data.end(), data); } template <> void SetRandData(hlir::framework::Tensor tensor, - const common::Target& target, + const cinn::common::Target& target, int seed) { if (seed == -1) { std::random_device rd; @@ -67,7 +67,7 @@ void SetRandData(hlir::framework::Tensor tensor, auto* data = tensor->mutable_data(target); #ifdef CINN_WITH_CUDA - if (target == common::DefaultNVGPUTarget()) { + if (target == cinn::common::DefaultNVGPUTarget()) { cudaMemcpy(data, random_data.data(), num_ele * sizeof(float), @@ -75,13 +75,13 @@ void SetRandData(hlir::framework::Tensor tensor, return; } #endif - CHECK(target == common::DefaultHostTarget()); + CHECK(target == cinn::common::DefaultHostTarget()); std::copy(random_data.begin(), random_data.end(), data); } template <> void SetRandData(hlir::framework::Tensor tensor, - const common::Target& target, + const cinn::common::Target& target, int seed) { if (seed == -1) { std::random_device rd; @@ -97,48 +97,48 @@ void SetRandData(hlir::framework::Tensor tensor, auto* data = tensor->mutable_data(target); #ifdef CINN_WITH_CUDA - if (target == common::DefaultNVGPUTarget()) { + if (target == cinn::common::DefaultNVGPUTarget()) { cudaMemcpy(data, random_data.data(), num_ele * sizeof(float), cudaMemcpyHostToDevice); - } else if (target == common::DefaultHostTarget()) { + } else if (target == cinn::common::DefaultHostTarget()) { std::copy(random_data.begin(), random_data.end(), data); } else { CINN_NOT_IMPLEMENTED } #else - CHECK(target == common::DefaultHostTarget()); + CHECK(target == cinn::common::DefaultHostTarget()); std::copy(random_data.begin(), random_data.end(), data); #endif } template std::vector GetTensorData(const hlir::framework::Tensor& tensor, - const common::Target& target) { + const cinn::common::Target& target) { auto size = tensor->shape().numel(); std::vector data(size); #ifdef CINN_WITH_CUDA - if (target == common::DefaultNVGPUTarget()) { + if (target == cinn::common::DefaultNVGPUTarget()) { cudaMemcpy(data.data(), static_cast(tensor->data()), size * sizeof(T), cudaMemcpyDeviceToHost); - } else if (target == common::DefaultHostTarget()) { + } else if (target == cinn::common::DefaultHostTarget()) { std::copy(tensor->data(), tensor->data() + size, data.begin()); } else { CINN_NOT_IMPLEMENTED } #else - CHECK(target == common::DefaultHostTarget()); + CHECK(target == cinn::common::DefaultHostTarget()); std::copy(tensor->data(), tensor->data() + size, data.begin()); #endif return data; } template std::vector GetTensorData( - const hlir::framework::Tensor& tensor, const common::Target& target); + const hlir::framework::Tensor& tensor, const cinn::common::Target& target); template std::vector GetTensorData( - const hlir::framework::Tensor& tensor, const common::Target& target); + const hlir::framework::Tensor& tensor, const cinn::common::Target& target); } // namespace cinn diff --git a/paddle/cinn/utils/data_util.h b/paddle/cinn/utils/data_util.h index a55ad554579f19..b3fa5745b5ab45 100644 --- a/paddle/cinn/utils/data_util.h +++ b/paddle/cinn/utils/data_util.h @@ -38,18 +38,18 @@ namespace cinn { * [low, high). */ void SetRandInt(hlir::framework::Tensor tensor, - const common::Target& target, + const cinn::common::Target& target, int seed = -1, int low = 0, int high = 11); template void SetRandData(hlir::framework::Tensor tensor, - const common::Target& target, + const cinn::common::Target& target, int seed = -1); template std::vector GetTensorData(const hlir::framework::Tensor& tensor, - const common::Target& target); + const cinn::common::Target& target); } // namespace cinn diff --git a/paddle/common/CMakeLists.txt b/paddle/common/CMakeLists.txt index 2ae07983c77858..dae3176aca6d19 100644 --- a/paddle/common/CMakeLists.txt +++ b/paddle/common/CMakeLists.txt @@ -26,3 +26,6 @@ cc_library(common ${COMMON_BUILD_TYPE} SRCS ${common_srcs}) if(WIN32) set_property(TARGET common PROPERTY WINDOWS_EXPORT_ALL_SYMBOLS ON) endif() + +# only used for libpaddle_inference.a +cc_library(common_static STATIC SRCS ${common_srcs}) diff --git a/paddle/common/array.h b/paddle/common/array.h index db604605f4ce33..11457a1eaa756b 100644 --- a/paddle/common/array.h +++ b/paddle/common/array.h @@ -140,3 +140,8 @@ class Array { }; } // namespace common + +namespace phi { +template +using Array = common::Array; +} // namespace phi diff --git a/paddle/common/ddim.cc b/paddle/common/ddim.cc index 76069a1a037123..c4ae80c75db852 100644 --- a/paddle/common/ddim.cc +++ b/paddle/common/ddim.cc @@ -18,6 +18,57 @@ namespace common { +DDim::DDim() : rank_(-1) { dim_[0] = 0; } + +DDim::DDim(const DDim& ddim) : dim_() { CopyFrom(ddim); } + +DDim::DDim(const int* d, int n) : rank_(n) { + dynamic_dim_assign(d, dim_.GetMutable(), n); +} + +DDim::DDim(const int64_t* d, int n) : rank_(n) { + dynamic_dim_assign(d, dim_.GetMutable(), n); +} + +DDim::DDim(std::initializer_list init_list) + : DDim(init_list.begin(), init_list.size()) {} + +int64_t& DDim::at(int idx) { + COMMON_ENFORCE_GE(idx, + 0, + common::errors::InvalidArgument( + "Invalid DDim index to be accessed. The valid index " + "is between 0 and %d, but received index is %d.", + rank_, + idx)); + COMMON_ENFORCE_LT(idx, + rank_, + common::errors::InvalidArgument( + "Invalid DDim index to be accessed. The valid index " + "is between 0 and %d, but received index is %d.", + rank_, + idx)); + return dim_[idx]; +} + +int64_t DDim::at(int idx) const { + COMMON_ENFORCE_GE(idx, + 0, + common::errors::InvalidArgument( + "Invalid DDim index to be accessed. The valid index " + "is between 0 and %d, but received index is %d.", + rank_, + idx)); + COMMON_ENFORCE_LT(idx, + rank_, + common::errors::InvalidArgument( + "Invalid DDim index to be accessed. The valid index " + "is between 0 and %d, but received index is %d.", + rank_, + idx)); + return dim_[idx]; +} + DDim make_ddim(std::initializer_list dims) { return DDim(dims.begin(), static_cast(dims.size())); } diff --git a/paddle/common/ddim.h b/paddle/common/ddim.h index cfed0d221221d9..4710708c70d4a2 100644 --- a/paddle/common/ddim.h +++ b/paddle/common/ddim.h @@ -19,6 +19,7 @@ #include #include "paddle/common/dim.h" +#include "paddle/common/enforce.h" #include "paddle/common/exception.h" #include "paddle/utils/test_macros.h" @@ -68,26 +69,21 @@ class TEST_API DDim { public: constexpr static int kMaxRank = 9; - DDim() : rank_(-1) { dim_[0] = 0; } + DDim(); - DDim(const DDim& ddim) : dim_() { CopyFrom(ddim); } + DDim(const DDim& ddim); - DDim(const int* d, int n) : rank_(n) { - dynamic_dim_assign(d, dim_.GetMutable(), n); - } + DDim(const int* d, int n); - DDim(const int64_t* d, int n) : rank_(n) { - dynamic_dim_assign(d, dim_.GetMutable(), n); - } + DDim(const int64_t* d, int n); + + /*implicit*/ DDim(std::initializer_list init_list); template /*implicit*/ DDim(const Dim& in) : rank_(D) { // NOLINT UnsafeCast() = in; } - /*implicit*/ DDim(std::initializer_list init_list) - : DDim(init_list.begin(), init_list.size()) {} - inline DDim& operator=(const DDim& ddim) { return CopyFrom(ddim); } template @@ -101,41 +97,9 @@ class TEST_API DDim { inline int64_t operator[](int idx) const { return dim_[idx]; } - int64_t& at(int idx) { - COMMON_ENFORCE_GE(idx, - 0, - common::errors::InvalidArgument( - "Invalid DDim index to be accessed. The valid index " - "is between 0 and %d, but received index is %d.", - rank_, - idx)); - COMMON_ENFORCE_LT(idx, - rank_, - common::errors::InvalidArgument( - "Invalid DDim index to be accessed. The valid index " - "is between 0 and %d, but received index is %d.", - rank_, - idx)); - return dim_[idx]; - } + int64_t& at(int idx); - int64_t at(int idx) const { - COMMON_ENFORCE_GE(idx, - 0, - common::errors::InvalidArgument( - "Invalid DDim index to be accessed. The valid index " - "is between 0 and %d, but received index is %d.", - rank_, - idx)); - COMMON_ENFORCE_LT(idx, - rank_, - common::errors::InvalidArgument( - "Invalid DDim index to be accessed. The valid index " - "is between 0 and %d, but received index is %d.", - rank_, - idx)); - return dim_[idx]; - } + int64_t at(int idx) const; template typename std::result_of&)>::type apply_visitor( @@ -188,8 +152,8 @@ class TEST_API DDim { PADDLE_VISIT_DDIM(ddim.rank_, (*this = ddim.UnsafeCast())); } - friend DDim stride(const DDim& ddim); - friend DDim stride_numel(const DDim& ddim); + friend TEST_API DDim stride(const DDim& ddim); + friend TEST_API DDim stride_numel(const DDim& ddim); private: Dim dim_; @@ -229,7 +193,7 @@ std::vector vectorize(const DDim& ddim) { TEST_API int64_t product(const DDim& ddim); -bool contain_unknown_dim(const DDim& ddim); +TEST_API bool contain_unknown_dim(const DDim& ddim); /** * \brief Slice a ddim @@ -238,7 +202,7 @@ bool contain_unknown_dim(const DDim& ddim); * e.g. DDim d = make_ddim({1,2,3,4,5}); * slice_ddim(d, 1, 3); ====> {2,3} */ -DDim slice_ddim(const DDim& dim, int begin, int end); +TEST_API DDim slice_ddim(const DDim& dim, int begin, int end); /** * \brief What is the length of this dimension? @@ -246,7 +210,7 @@ DDim slice_ddim(const DDim& dim, int begin, int end); * \param Dynamic dimension to inspect */ -int arity(const DDim& ddim); +TEST_API int arity(const DDim& ddim); TEST_API std::ostream& operator<<(std::ostream&, const DDim&); @@ -255,22 +219,49 @@ TEST_API std::ostream& operator<<(std::ostream&, const DDim&); * e.g., DDim d = mak_ddim({1, 2, 3, 4, 5, 6}) * flatten_to_3d(d, 2, 4); ===> {1*2, 3*4, 5*6} ===> {2, 12, 30} */ -DDim flatten_to_3d(const DDim& src, int num_row_dims, int num_col_dims); +TEST_API DDim flatten_to_3d(const DDim& src, + int num_row_dims, + int num_col_dims); // Reshape a tensor to a matrix. The matrix's first dimension(column length) // will be the product of tensor's first `num_col_dims` dimensions. -DDim flatten_to_2d(const DDim& src, int num_col_dims); +TEST_API DDim flatten_to_2d(const DDim& src, int num_col_dims); -DDim flatten_to_1d(const DDim& src); +TEST_API DDim flatten_to_1d(const DDim& src); -DDim stride(const DDim& ddim); +TEST_API DDim stride(const DDim& ddim); -DDim stride_numel(const DDim& ddim); +TEST_API DDim stride_numel(const DDim& ddim); } // namespace common +namespace paddle { +namespace framework { +using DDim = common::DDim; +} +} // namespace paddle + +namespace phi { +using DDim = common::DDim; +using common::arity; +using common::contain_unknown_dim; +using common::flatten_to_1d; +using common::flatten_to_2d; +using common::flatten_to_3d; +using common::make_ddim; +using common::product; +using common::slice_ddim; +using common::stride; +using common::stride_numel; +using common::vectorize; +} // namespace phi + +namespace pir { +using DDim = common::DDim; +} + namespace std { template <> -struct hash { +struct TEST_API hash { std::size_t operator()(common::DDim const& ddim) const; }; } // namespace std diff --git a/paddle/common/dim.h b/paddle/common/dim.h index 4423a452a370a5..68cd29872f8766 100644 --- a/paddle/common/dim.h +++ b/paddle/common/dim.h @@ -101,3 +101,16 @@ inline void static_dim_assign(const T1* in, T2* out) { } } // namespace common + +// Note: `namespace paddle::framework` will case CI error. +namespace paddle { +namespace framework { +template +using Dim = common::Dim; +} +} // namespace paddle + +namespace phi { +template +using Dim = common::Dim; +} diff --git a/paddle/common/enforce.h b/paddle/common/enforce.h index d09f8942e79a75..e2a33951da71d4 100644 --- a/paddle/common/enforce.h +++ b/paddle/common/enforce.h @@ -1,13 +1,16 @@ -/* Copyright (c) 2013 PaddlePaddle Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. #pragma once @@ -26,14 +29,15 @@ limitations under the License. */ #include // GetModuleFileName, Sleep #endif +#include "paddle/common/errors.h" #include "paddle/common/macros.h" + #if !defined(_WIN32) && !defined(PADDLE_WITH_MUSL) #include #endif -// #define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with -// windows.h -#include "paddle/common/errors.h" +// msvc conflict logging with windows.h +#define GLOG_NO_ABBREVIATED_SEVERITIES #include "paddle/utils/string/printf.h" #include "paddle/utils/string/to_string.h" #include "paddle/utils/test_macros.h" @@ -50,14 +54,13 @@ class CommonNotMetException : public std::exception { std::string err_str_; }; } // namespace common + namespace common { namespace enforce { -#if !defined(_WIN32) -#define UNLIKELY(condition) __builtin_expect(static_cast(condition), 0) -#else -// there is no equivalent intrinsics in msvc. -#define UNLIKELY(condition) (condition) +/** HELPER MACROS AND FUNCTIONS **/ +#ifndef PADDLE_MAY_THROW +#define PADDLE_MAY_THROW noexcept(false) #endif #if defined _WIN32 && defined PADDLE_ON_INFERENCE && defined PADDLE_NO_PYTHON @@ -160,3 +163,54 @@ using CommonType2 = typename std::add_lvalue_reference< } // namespace enforce } // namespace common + +// TODO(zhangbopd): This is a copy from pir, and shoud be removed after merge +// this into common enfoce namespace above. +template +inline bool is_error(const T& stat) { + return !stat; +} + +namespace pir { +class IrNotMetException : public std::exception { + public: + explicit IrNotMetException(const std::string& str) : err_str_(str) {} + + const char* what() const noexcept override { return err_str_.c_str(); } + + private: + std::string err_str_; +}; + +#define IR_THROW(...) \ + do { \ + try { \ + throw pir::IrNotMetException( \ + paddle::string::Sprintf("Error occured at: %s:%d :\n%s", \ + __FILE__, \ + __LINE__, \ + paddle::string::Sprintf(__VA_ARGS__))); \ + } catch (const std::exception& e) { \ + std::cout << e.what() << std::endl; \ + throw; \ + } \ + } while (0) + +#define IR_ENFORCE(COND, ...) \ + do { \ + bool __cond__(COND); \ + if (UNLIKELY(is_error(__cond__))) { \ + try { \ + throw pir::IrNotMetException( \ + paddle::string::Sprintf("Error occured at: %s:%d :\n%s", \ + __FILE__, \ + __LINE__, \ + paddle::string::Sprintf(__VA_ARGS__))); \ + } catch (const std::exception& e) { \ + std::cout << e.what() << std::endl; \ + throw; \ + } \ + } \ + } while (0) + +} // namespace pir diff --git a/paddle/phi/core/errors.cc b/paddle/common/errors.cc similarity index 93% rename from paddle/phi/core/errors.cc rename to paddle/common/errors.cc index 0fcf8f292c1e17..b720132c505f56 100644 --- a/paddle/phi/core/errors.cc +++ b/paddle/common/errors.cc @@ -1,4 +1,4 @@ -/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -12,11 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/phi/core/errors.h" +#include "paddle/common/errors.h" #include -namespace phi { +namespace common { std::string error_name(ErrorCode code) { switch (code) { case ErrorCode::LEGACY: @@ -70,4 +70,4 @@ std::string ErrorSummary::to_string() const { result += error_message(); return result; } -} // namespace phi +} // namespace common diff --git a/paddle/common/errors.h b/paddle/common/errors.h index 826a2350606587..e2ebf971f36cac 100644 --- a/paddle/common/errors.h +++ b/paddle/common/errors.h @@ -145,3 +145,9 @@ REGISTER_ERROR(External, ErrorCode::EXTERNAL) } // namespace errors } // namespace common + +namespace phi { +namespace errors = ::common::errors; +using ErrorCode = ::common::ErrorCode; +using ErrorSummary = ::common::ErrorSummary; +} // namespace phi diff --git a/paddle/phi/common/layout.h b/paddle/common/layout.h similarity index 93% rename from paddle/phi/common/layout.h rename to paddle/common/layout.h index 622962d787a9e7..408dd51c1be9db 100644 --- a/paddle/phi/common/layout.h +++ b/paddle/common/layout.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,9 +15,9 @@ limitations under the License. */ #pragma once #include -#include "paddle/phi/api/ext/exception.h" +#include "paddle/common/exception.h" -namespace phi { +namespace common { // Note: The original design of paddle DataLayout is confusing. // It contains two levels of "layout", one is the data layout @@ -132,10 +132,18 @@ inline std::ostream& operator<<(std::ostream& os, DataLayout layout) { return os; } -} // namespace phi +} // namespace common + +namespace pir { +using DataLayout = common::DataLayout; +} + +namespace phi { +using DataLayout = common::DataLayout; +} namespace paddle { // In order to be compatible with the original custom operator Tensor interface -using DataLayout = phi::DataLayout; +using DataLayout = common::DataLayout; } // namespace paddle diff --git a/paddle/common/macros.h b/paddle/common/macros.h index 13ce7e6d82d9c0..8506aa92486640 100644 --- a/paddle/common/macros.h +++ b/paddle/common/macros.h @@ -17,13 +17,14 @@ limitations under the License. */ namespace common { // Disable the copy and assignment operator for a class. - +#ifndef DISABLE_COPY_AND_ASSIGN #define DISABLE_COPY_AND_ASSIGN(classname) \ private: \ classname(const classname&) = delete; \ classname(classname&&) = delete; \ classname& operator=(const classname&) = delete; \ classname& operator=(classname&&) = delete +#endif #define PD_STATIC_ASSERT_GLOBAL_NAMESPACE(uniq_name, msg) \ _PD_STATIC_ASSERT_GLOBAL_NAMESPACE(uniq_name, msg) @@ -47,6 +48,25 @@ namespace common { #define UNUSED __attribute__((unused)) #endif +// Because most enforce conditions would evaluate to true, we can use +// __builtin_expect to instruct the C++ compiler to generate code that +// always forces branch prediction of true. +// This generates faster binary code. __builtin_expect is since C++11. +// For more details, please check https://stackoverflow.com/a/43870188/724872. +#if !defined(_WIN32) +#define UNLIKELY(condition) __builtin_expect(static_cast(condition), 0) +#else +// there is no equivalent intrinsics in msvc. +#define UNLIKELY(condition) (condition) +#endif + +#if !defined(_WIN32) +#define LIKELY(condition) __builtin_expect(static_cast(condition), 1) +#else +// there is no equivalent intrinsics in msvc. +#define LIKELY(condition) (condition) +#endif + #define PD_CONCATENATE(arg1, arg2) PD_CONCATENATE1(arg1, arg2) #define PD_CONCATENATE1(arg1, arg2) PD_CONCATENATE2(arg1, arg2) #define PD_CONCATENATE2(arg1, arg2) arg1##arg2 diff --git a/paddle/fluid/distributed/auto_parallel/CMakeLists.txt b/paddle/fluid/distributed/auto_parallel/CMakeLists.txt index 14c4b693885927..d1eae7f5995490 100644 --- a/paddle/fluid/distributed/auto_parallel/CMakeLists.txt +++ b/paddle/fluid/distributed/auto_parallel/CMakeLists.txt @@ -3,6 +3,6 @@ add_subdirectory(spmd_rules) cc_library( op_dist_attr SRCS dist_attr.cc - DEPS phi auto_parallel_proto proto_desc) + DEPS phi common auto_parallel_proto proto_desc) cc_library(auto_parallel DEPS op_dist_attr spmd_rules) diff --git a/paddle/fluid/distributed/auto_parallel/spmd_rules/CMakeLists.txt b/paddle/fluid/distributed/auto_parallel/spmd_rules/CMakeLists.txt index 42fde81693429c..f16c1558905791 100644 --- a/paddle/fluid/distributed/auto_parallel/spmd_rules/CMakeLists.txt +++ b/paddle/fluid/distributed/auto_parallel/spmd_rules/CMakeLists.txt @@ -3,4 +3,4 @@ file(GLOB spmd_srcs *.cc) cc_library( spmd_rules SRCS ${spmd_srcs} - DEPS phi) + DEPS phi common) diff --git a/paddle/fluid/distributed/auto_parallel/test/CMakeLists.txt b/paddle/fluid/distributed/auto_parallel/test/CMakeLists.txt index 954af0cc852a03..55948f83c635eb 100644 --- a/paddle/fluid/distributed/auto_parallel/test/CMakeLists.txt +++ b/paddle/fluid/distributed/auto_parallel/test/CMakeLists.txt @@ -1,22 +1,22 @@ cc_test( device_mesh_test SRCS device_mesh_test.cc - DEPS phi) + DEPS phi common) cc_test( process_mesh_test SRCS process_mesh_test.cc - DEPS phi) + DEPS phi common) cc_test( dist_attr_test SRCS dist_attr_test.cc - DEPS phi proto_desc) + DEPS phi common proto_desc) cc_test( dist_mapper_test SRCS dist_mapper_test.cc - DEPS phi) + DEPS phi common) cc_test( spmd_rule_test diff --git a/paddle/fluid/distributed/collective/CMakeLists.txt b/paddle/fluid/distributed/collective/CMakeLists.txt index a2267e1f6cebdd..d42b810972dc85 100644 --- a/paddle/fluid/distributed/collective/CMakeLists.txt +++ b/paddle/fluid/distributed/collective/CMakeLists.txt @@ -1,18 +1,18 @@ cc_library( process_group SRCS process_group.cc - DEPS phi xxhash) + DEPS phi common xxhash) cc_library( eager_reducer SRCS reducer.cc - DEPS eager_api process_group phi string_helper) + DEPS eager_api process_group phi common string_helper) if(WITH_DISTRIBUTE) cc_library( process_group_gloo SRCS process_group_gloo.cc gloo_send_recv.cc - DEPS phi eager_api gloo_wrapper) + DEPS phi common eager_api gloo_wrapper) endif() if(WITH_NCCL OR WITH_RCCL) @@ -21,6 +21,7 @@ if(WITH_NCCL OR WITH_RCCL) SRCS process_group_nccl.cc common.cc DEPS process_group phi + common place enforce collective_helper @@ -32,7 +33,13 @@ if(WITH_XPU_BKCL) cc_library( process_group_bkcl SRCS process_group_bkcl.cc bkcl_tools.cc common.cc - DEPS process_group phi place enforce collective_helper device_context) + DEPS process_group + phi + common + place + enforce + collective_helper + device_context) endif() if(WITH_MPI) @@ -46,7 +53,13 @@ if(WITH_CUSTOM_DEVICE) cc_library( process_group_custom SRCS process_group_custom.cc custom_ccl_tools.cc common.cc - DEPS process_group phi place enforce collective_helper device_context) + DEPS process_group + phi + common + place + enforce + collective_helper + device_context) endif() set(COMM_UTILS_DEPS process_group) diff --git a/paddle/fluid/distributed/collective/process_group.h b/paddle/fluid/distributed/collective/process_group.h index e2b31950bd51bc..ae1c7fe7d9d97c 100644 --- a/paddle/fluid/distributed/collective/process_group.h +++ b/paddle/fluid/distributed/collective/process_group.h @@ -20,12 +20,12 @@ #include #include +#include "paddle/common/errors.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/device_context.h" #include "paddle/phi/core/distributed/types.h" #include "paddle/phi/core/distributed/utils.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" constexpr auto kWaitTimeout = std::chrono::milliseconds(0); diff --git a/paddle/fluid/distributed/collective/process_group_bkcl.cc b/paddle/fluid/distributed/collective/process_group_bkcl.cc index 7a295b3360602a..8b306e29f52b32 100644 --- a/paddle/fluid/distributed/collective/process_group_bkcl.cc +++ b/paddle/fluid/distributed/collective/process_group_bkcl.cc @@ -14,6 +14,7 @@ #include "paddle/fluid/distributed/collective/process_group_bkcl.h" +#include "paddle/common/errors.h" #include "paddle/fluid/distributed/collective/bkcl_tools.h" #include "paddle/fluid/distributed/collective/common.h" #include "paddle/fluid/framework/convert_utils.h" @@ -25,7 +26,6 @@ #include "paddle/phi/core/distributed/check/static_check.h" #include "paddle/phi/core/distributed/comm_context_manager.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" namespace paddle { namespace distributed { diff --git a/paddle/fluid/distributed/collective/process_group_with_stream.h b/paddle/fluid/distributed/collective/process_group_with_stream.h index 0cea9bb3ed87e6..58d1a042fec3c8 100644 --- a/paddle/fluid/distributed/collective/process_group_with_stream.h +++ b/paddle/fluid/distributed/collective/process_group_with_stream.h @@ -14,9 +14,9 @@ #pragma once +#include "paddle/common/errors.h" #include "paddle/fluid/distributed/collective/process_group.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" namespace paddle { namespace distributed { diff --git a/paddle/fluid/distributed/collective/process_group_without_stream.h b/paddle/fluid/distributed/collective/process_group_without_stream.h index dd22c0f1e4cbdb..a3c103574cbc5a 100644 --- a/paddle/fluid/distributed/collective/process_group_without_stream.h +++ b/paddle/fluid/distributed/collective/process_group_without_stream.h @@ -14,9 +14,9 @@ #pragma once +#include "paddle/common/errors.h" #include "paddle/fluid/distributed/collective/process_group.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" namespace paddle { namespace distributed { diff --git a/paddle/fluid/distributed/common/afs_warpper.h b/paddle/fluid/distributed/common/afs_warpper.h index 516b35448fe516..30f4f164ba5a1d 100644 --- a/paddle/fluid/distributed/common/afs_warpper.h +++ b/paddle/fluid/distributed/common/afs_warpper.h @@ -20,9 +20,9 @@ #include #include +#include "paddle/common/macros.h" #include "paddle/fluid/distributed/the_one_ps.pb.h" #include "paddle/fluid/string/string_helper.h" -#include "paddle/phi/core/macros.h" namespace paddle { namespace distributed { struct FsDataConverter { diff --git a/paddle/fluid/distributed/fleet_executor/CMakeLists.txt b/paddle/fluid/distributed/fleet_executor/CMakeLists.txt index 9c28205520129c..84b58422ab2e71 100755 --- a/paddle/fluid/distributed/fleet_executor/CMakeLists.txt +++ b/paddle/fluid/distributed/fleet_executor/CMakeLists.txt @@ -5,9 +5,9 @@ endif() proto_library(interceptor_message_proto SRCS interceptor_message.proto) if(WITH_ARM_BRPC) - set(BRPC_DEPS arm_brpc snappy phi glog) + set(BRPC_DEPS arm_brpc snappy phi common glog) elseif(WITH_DISTRIBUTE AND NOT WITH_PSLIB) - set(BRPC_DEPS ${EXTERNAL_BRPC_DEPS} zlib phi) + set(BRPC_DEPS ${EXTERNAL_BRPC_DEPS} zlib phi common) else() set(BRPC_DEPS "") endif() @@ -15,7 +15,7 @@ endif() cc_library( task_loop_thread_pool SRCS task_loop_thread_pool.cc task_loop_thread.cc task_loop.cc - DEPS enforce glog) + DEPS enforce glog common) cc_library( fleet_executor SRCS fleet_executor.cc @@ -43,6 +43,7 @@ cc_library( executor_gc_helper op_registry phi + common glog ${BRPC_DEPS}) if(WITH_DISTRIBUTE) diff --git a/paddle/fluid/distributed/fleet_executor/compute_interceptor.cc b/paddle/fluid/distributed/fleet_executor/compute_interceptor.cc index 7817b9bc0e9dfe..4190019e0d1738 100644 --- a/paddle/fluid/distributed/fleet_executor/compute_interceptor.cc +++ b/paddle/fluid/distributed/fleet_executor/compute_interceptor.cc @@ -14,12 +14,12 @@ #include "paddle/fluid/distributed/fleet_executor/compute_interceptor.h" +#include "paddle/common/errors.h" #include "paddle/fluid/distributed/fleet_executor/carrier.h" #include "paddle/fluid/distributed/fleet_executor/task_node.h" #include "paddle/fluid/framework/executor_gc_helper.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/jit/serializer.h" -#include "paddle/phi/core/errors.h" namespace paddle { namespace distributed { diff --git a/paddle/fluid/distributed/fleet_executor/cond_interceptor.cc b/paddle/fluid/distributed/fleet_executor/cond_interceptor.cc index 2e3389af5feb59..704dd16400065c 100644 --- a/paddle/fluid/distributed/fleet_executor/cond_interceptor.cc +++ b/paddle/fluid/distributed/fleet_executor/cond_interceptor.cc @@ -14,13 +14,13 @@ #include "paddle/fluid/distributed/fleet_executor/cond_interceptor.h" #include +#include "paddle/common/errors.h" #include "paddle/fluid/distributed/fleet_executor/task_node.h" #include "paddle/fluid/framework/executor_gc_helper.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/platform/errors.h" #include "paddle/fluid/platform/place.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/errors.h" namespace paddle { namespace distributed { diff --git a/paddle/fluid/distributed/fleet_executor/dist_model.cc b/paddle/fluid/distributed/fleet_executor/dist_model.cc index 1ec8c11fdf610d..a1fd38295319ed 100644 --- a/paddle/fluid/distributed/fleet_executor/dist_model.cc +++ b/paddle/fluid/distributed/fleet_executor/dist_model.cc @@ -46,7 +46,7 @@ bool LoadDataFromDistModelTensor(const DistModelTensor &input_data, phi::DenseTensor *input_tensor, const platform::Place &place) { VLOG(3) << "Loading data from DistModelTensor for " << input_data.name; - framework::DDim dims = phi::make_ddim(input_data.shape); + framework::DDim dims = common::make_ddim(input_data.shape); void *input_tensor_ptr = nullptr; if (input_data.dtype == DistModelDataType::INT64) { input_tensor_ptr = input_tensor->mutable_data(dims, place); @@ -645,7 +645,7 @@ bool DistModel::FetchResults(std::vector *output_data, template bool DistModel::FetchResult(const phi::DenseTensor &fetch, DistModelTensor *output_data) { - auto shape = phi::vectorize(fetch.dims()); + auto shape = common::vectorize(fetch.dims()); output_data->shape.assign(shape.begin(), shape.end()); const T *data = fetch.data(); int64_t num_elems = fetch.numel(); diff --git a/paddle/fluid/distributed/fleet_executor/start_interceptor.cc b/paddle/fluid/distributed/fleet_executor/start_interceptor.cc index 830f619ed3c00c..1fe4aaea15fc4d 100644 --- a/paddle/fluid/distributed/fleet_executor/start_interceptor.cc +++ b/paddle/fluid/distributed/fleet_executor/start_interceptor.cc @@ -14,9 +14,9 @@ #include "paddle/fluid/distributed/fleet_executor/start_interceptor.h" +#include "paddle/common/errors.h" #include "paddle/fluid/distributed/fleet_executor/task_node.h" #include "paddle/fluid/framework/operator.h" -#include "paddle/phi/core/errors.h" namespace paddle { namespace distributed { diff --git a/paddle/fluid/distributed/fleet_executor/test/compute_interceptor_run_op_test.cc b/paddle/fluid/distributed/fleet_executor/test/compute_interceptor_run_op_test.cc index 63d4fa1bf97049..71474ec6be6fb4 100644 --- a/paddle/fluid/distributed/fleet_executor/test/compute_interceptor_run_op_test.cc +++ b/paddle/fluid/distributed/fleet_executor/test/compute_interceptor_run_op_test.cc @@ -37,7 +37,7 @@ namespace distributed { std::vector GetOps() { framework::AttributeMap attrs; attrs["dtype"] = framework::proto::VarType::FP32; - attrs["shape"] = phi::vectorize({2, 3}); + attrs["shape"] = common::vectorize({2, 3}); attrs["value"] = 1.0f; auto zero_op = framework::OpRegistry::CreateOp( diff --git a/paddle/fluid/distributed/ps/service/CMakeLists.txt b/paddle/fluid/distributed/ps/service/CMakeLists.txt index c23f26c6352180..eac2585416d8b8 100755 --- a/paddle/fluid/distributed/ps/service/CMakeLists.txt +++ b/paddle/fluid/distributed/ps/service/CMakeLists.txt @@ -3,11 +3,11 @@ set_source_files_properties(${BRPC_SRCS}) if(WITH_HETERPS) - set(BRPC_DEPS ${EXTERNAL_BRPC_DEPS} phi zlib device_context rocksdb) + set(BRPC_DEPS ${EXTERNAL_BRPC_DEPS} phi common zlib device_context rocksdb) else() - set(BRPC_DEPS ${EXTERNAL_BRPC_DEPS} phi zlib device_context) + set(BRPC_DEPS ${EXTERNAL_BRPC_DEPS} phi common zlib device_context) endif() @@ -99,6 +99,7 @@ cc_library( simple_rpc scope phi + common ps_gpu_wrapper fleet ${RPC_DEPS}) @@ -126,7 +127,7 @@ cc_library( #cc_library( # communicator # SRCS communicator/communicator.cc -# DEPS scope client table phi ${RPC_DEPS}) +# DEPS scope client table phi common ${RPC_DEPS}) #cc_library( # ps_service # SRCS ps_service/service.cc diff --git a/paddle/fluid/distributed/ps/service/brpc_ps_client.cc b/paddle/fluid/distributed/ps/service/brpc_ps_client.cc index 9ad8768e0927d5..c0e09af4c550b6 100644 --- a/paddle/fluid/distributed/ps/service/brpc_ps_client.cc +++ b/paddle/fluid/distributed/ps/service/brpc_ps_client.cc @@ -1505,7 +1505,7 @@ int32_t BrpcPsClient::RecvAndSaveTable(const uint64_t table_id, phi::DenseTensor *var_tensor = var->GetMutable(); std::vector vec_dim = {var_num, var_shape}; - var_tensor->Resize(phi::make_ddim(vec_dim)); + var_tensor->Resize(common::make_ddim(vec_dim)); // copy and save float *tensor_data = var_tensor->mutable_data(place); diff --git a/paddle/fluid/distributed/ps/service/brpc_ps_client.h b/paddle/fluid/distributed/ps/service/brpc_ps_client.h old mode 100755 new mode 100644 index d902824bfd60c9..73f730bf6d8fde --- a/paddle/fluid/distributed/ps/service/brpc_ps_client.h +++ b/paddle/fluid/distributed/ps/service/brpc_ps_client.h @@ -23,6 +23,7 @@ #include "brpc/channel.h" #include "brpc/controller.h" #include "brpc/server.h" +#include "paddle/common/macros.h" #include "paddle/fluid/distributed/ps/service/brpc_utils.h" #include "paddle/fluid/distributed/ps/service/ps_client.h" #include "paddle/fluid/distributed/ps/service/sendrecv.pb.h" @@ -30,7 +31,6 @@ #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/tensor_util.h" -#include "paddle/phi/core/macros.h" namespace brpc { class Channel; class Controller; diff --git a/paddle/fluid/distributed/ps/service/brpc_utils.cc b/paddle/fluid/distributed/ps/service/brpc_utils.cc index 715d1bbf954f07..8006e6d943579b 100644 --- a/paddle/fluid/distributed/ps/service/brpc_utils.cc +++ b/paddle/fluid/distributed/ps/service/brpc_utils.cc @@ -106,7 +106,7 @@ void SerializeLodTensor(framework::Variable* var, } var_msg->set_data_type(static_cast( framework::TransToProtoVarType(tensor->dtype()))); - for (auto& dim : phi::vectorize(tensor->dims())) { + for (auto& dim : common::vectorize(tensor->dims())) { var_msg->add_dims(dim); } // IO Buffer @@ -153,7 +153,7 @@ void SerializeSelectedRows(framework::Variable* var, memcpy(data_ptr, &((*rows)[0]), rows->size() * sizeof(int64_t)); var_msg->set_data_type(static_cast( framework::TransToProtoVarType(tensor->dtype()))); - for (auto& dim : phi::vectorize(tensor->dims())) { + for (auto& dim : common::vectorize(tensor->dims())) { var_msg->add_dims(dim); } // IO Buffer @@ -232,7 +232,7 @@ void DeserializeLodTensor(framework::Variable* var, for (auto& x : msg.dims()) { vec_dim.push_back(x); } - tensor->Resize(phi::make_ddim(vec_dim)); + tensor->Resize(common::make_ddim(vec_dim)); framework::LoD lod; for (int i = 0; i < msg.lod_level(); ++i) { @@ -288,7 +288,7 @@ void DeserializeSelectedRows( for (auto& x : msg.dims()) { vec_dim.push_back(x); } - tensor->Resize(phi::make_ddim(vec_dim)); + tensor->Resize(common::make_ddim(vec_dim)); void* tensor_data = tensor->mutable_data( place, framework::TransToPhiDataType(VarMessageToVarType(msg.data_type()))); diff --git a/paddle/fluid/distributed/ps/service/communicator/communicator.cc b/paddle/fluid/distributed/ps/service/communicator/communicator.cc index 9932343fa779bd..f7a8410919f4cc 100644 --- a/paddle/fluid/distributed/ps/service/communicator/communicator.cc +++ b/paddle/fluid/distributed/ps/service/communicator/communicator.cc @@ -872,7 +872,7 @@ bool AsyncCommunicator::Check(const std::vector &var_tables) { VLOG(3) << "send step_counter into queue"; auto tmp_var = std::make_shared(); auto *tensor = tmp_var->GetMutable(); - tensor->Resize(phi::make_ddim({1})); + tensor->Resize(common::make_ddim({1})); auto *out_d = tensor->mutable_data(platform::CPUPlace()); out_d[0] = 1; send_varname_to_queue_[table_name]->Push(tmp_var); diff --git a/paddle/fluid/distributed/ps/service/env.h b/paddle/fluid/distributed/ps/service/env.h index d6b403523496c5..0552c54282d35c 100644 --- a/paddle/fluid/distributed/ps/service/env.h +++ b/paddle/fluid/distributed/ps/service/env.h @@ -25,7 +25,7 @@ #include #include -#include "paddle/phi/core/macros.h" +#include "paddle/common/macros.h" #include "paddle/utils/flags.h" namespace paddle { diff --git a/paddle/fluid/distributed/ps/service/server.h b/paddle/fluid/distributed/ps/service/server.h index fc1d4a2bd343ba..5a0764b11e8a1f 100644 --- a/paddle/fluid/distributed/ps/service/server.h +++ b/paddle/fluid/distributed/ps/service/server.h @@ -23,6 +23,7 @@ #include "butil/endpoint.h" #include "google/protobuf/service.h" +#include "paddle/common/macros.h" #include "paddle/fluid/distributed/common/registerer.h" #include "paddle/fluid/distributed/ps/service/env.h" #include "paddle/fluid/distributed/ps/service/sendrecv.pb.h" @@ -31,7 +32,6 @@ #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/place.h" -#include "paddle/phi/core/macros.h" namespace google { namespace protobuf { diff --git a/paddle/fluid/distributed/ps/table/CMakeLists.txt b/paddle/fluid/distributed/ps/table/CMakeLists.txt index bedb28ec231f7b..d4e5a81f718b4a 100644 --- a/paddle/fluid/distributed/ps/table/CMakeLists.txt +++ b/paddle/fluid/distributed/ps/table/CMakeLists.txt @@ -18,7 +18,7 @@ set_source_files_properties( cc_library( graph_node SRCS ${graphDir}/graph_node.cc - DEPS WeightedSampler enforce) + DEPS WeightedSampler enforce common) set_source_files_properties( memory_dense_table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) set_source_files_properties( @@ -46,7 +46,8 @@ cc_library( string_helper simple_threadpool xxhash - phi) + phi + common) set_source_files_properties( tensor_accessor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) @@ -90,6 +91,7 @@ cc_library( string_helper device_context phi + common glog framework_io afs_wrapper diff --git a/paddle/fluid/distributed/ps/table/graph/graph_edge.h b/paddle/fluid/distributed/ps/table/graph/graph_edge.h index 8a5c7c1ce10dbe..d8e38491aa2a39 100644 --- a/paddle/fluid/distributed/ps/table/graph/graph_edge.h +++ b/paddle/fluid/distributed/ps/table/graph/graph_edge.h @@ -16,7 +16,7 @@ #include #include #include -#include "paddle/phi/core/macros.h" +#include "paddle/common/macros.h" namespace paddle { namespace distributed { diff --git a/paddle/fluid/distributed/ps/table/table.h b/paddle/fluid/distributed/ps/table/table.h index dc44831e891ca1..b39a12da02a4fc 100644 --- a/paddle/fluid/distributed/ps/table/table.h +++ b/paddle/fluid/distributed/ps/table/table.h @@ -22,6 +22,7 @@ #include #include +#include "paddle/common/macros.h" #include "paddle/fluid/distributed/common/afs_warpper.h" #include "paddle/fluid/distributed/ps/table/accessor.h" #include "paddle/fluid/distributed/ps/table/depends/sparse_utils.h" @@ -32,7 +33,6 @@ #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/place.h" #include "paddle/fluid/string/string_helper.h" -#include "paddle/phi/core/macros.h" namespace paddle { namespace distributed { diff --git a/paddle/fluid/distributed/rpc/CMakeLists.txt b/paddle/fluid/distributed/rpc/CMakeLists.txt index 4042a6fe3ccfeb..c7c31fd55be81b 100644 --- a/paddle/fluid/distributed/rpc/CMakeLists.txt +++ b/paddle/fluid/distributed/rpc/CMakeLists.txt @@ -12,7 +12,7 @@ set_source_files_properties( set_source_files_properties(rpc_agent.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -set(PADDLE_RPC_DEPS ${EXTERNAL_BRPC_DEPS} zlib phi pybind) +set(PADDLE_RPC_DEPS ${EXTERNAL_BRPC_DEPS} zlib phi common pybind) proto_library(paddle_rpc_proto SRCS rpc.proto) cc_library( paddle_rpc diff --git a/paddle/fluid/distributed/test/CMakeLists.txt b/paddle/fluid/distributed/test/CMakeLists.txt index aaae9761330254..ba08768ab4a104 100644 --- a/paddle/fluid/distributed/test/CMakeLists.txt +++ b/paddle/fluid/distributed/test/CMakeLists.txt @@ -43,6 +43,7 @@ cc_test( DEPS brpc_utils scope phi + common sendrecv_rpc ps_service ${COMMON_DEPS} diff --git a/paddle/fluid/distributed/test/brpc_utils_test.cc b/paddle/fluid/distributed/test/brpc_utils_test.cc index 4ff9f2709b81c0..98dc18c3d4cbe9 100644 --- a/paddle/fluid/distributed/test/brpc_utils_test.cc +++ b/paddle/fluid/distributed/test/brpc_utils_test.cc @@ -37,7 +37,7 @@ void CreateVarsOnScope(framework::Scope* scope, // var 1 framework::Variable* var1 = scope->Var("x1"); auto* tensor1 = var1->GetMutable(); - tensor1->Resize(phi::make_ddim({512, 8, 4, 2})); + tensor1->Resize(common::make_ddim({512, 8, 4, 2})); framework::LoD lod1; lod1.push_back(phi::Vector({1, 3, 8})); tensor1->set_lod(lod1); @@ -47,7 +47,7 @@ void CreateVarsOnScope(framework::Scope* scope, // var 2 framework::Variable* var2 = scope->Var("x2"); auto* tensor2 = var2->GetMutable(); - tensor2->Resize(phi::make_ddim({1000, 64})); + tensor2->Resize(common::make_ddim({1000, 64})); framework::LoD lod2; lod2.push_back(phi::Vector({1, 1})); tensor2->set_lod(lod2); @@ -60,7 +60,7 @@ void CreateVarsOnScope(framework::Scope* scope, slr->set_height(564); auto* tensor3 = slr->mutable_value(); auto* rows = slr->mutable_rows(); - tensor3->Resize(phi::make_ddim({564, 128})); + tensor3->Resize(common::make_ddim({564, 128})); tensor3->mutable_data(*place); phi::funcs::set_constant(ctx, tensor3, 32.7); for (int i = 0; i < 564; ++i) rows->push_back(i); @@ -97,7 +97,7 @@ void RunMultiVarMsg(platform::Place place) { // check var1 framework::Variable* var1 = scope_recv.FindVar("x1"); auto* tensor1 = var1->GetMutable(); - EXPECT_EQ(tensor1->dims(), phi::make_ddim({512, 8, 4, 2})); + EXPECT_EQ(tensor1->dims(), common::make_ddim({512, 8, 4, 2})); // EXPECT_EQ(tensor1->lod(), phi::Vector({1, 3, 8})); auto* tensor_data1 = const_cast(tensor1->data()); int tensor_numel1 = 512 * 8 * 4 * 2; @@ -107,7 +107,7 @@ void RunMultiVarMsg(platform::Place place) { // check var2 framework::Variable* var2 = scope_recv.FindVar("x2"); auto* tensor2 = var2->GetMutable(); - EXPECT_EQ(tensor2->dims(), phi::make_ddim({1000, 64})); + EXPECT_EQ(tensor2->dims(), common::make_ddim({1000, 64})); // EXPECT_EQ(tensor2->lod(), phi::Vector({1, 1})); auto* tensor_data2 = const_cast(tensor2->data()); int tensor_numel2 = 1000 * 64; @@ -122,7 +122,7 @@ void RunMultiVarMsg(platform::Place place) { } auto* tensor3 = slr->mutable_value(); - EXPECT_EQ(tensor3->dims(), phi::make_ddim({564, 128})); + EXPECT_EQ(tensor3->dims(), common::make_ddim({564, 128})); auto* tensor_data3 = const_cast(tensor3->data()); int tensor_numel3 = 564 * 128; for (int i = 0; i < tensor_numel3; ++i) diff --git a/paddle/fluid/eager/CMakeLists.txt b/paddle/fluid/eager/CMakeLists.txt index f948e050387bca..a7ea765aadc3c8 100755 --- a/paddle/fluid/eager/CMakeLists.txt +++ b/paddle/fluid/eager/CMakeLists.txt @@ -1,5 +1,6 @@ set(eager_deps phi + common hook_utils utils global_utils @@ -45,26 +46,27 @@ if(NOT ((NOT WITH_PYTHON) AND ON_INFER)) cc_library( backward SRCS backward.cc - DEPS grad_tensor_holder utils autograd_meta grad_node_info phi) + DEPS grad_tensor_holder utils autograd_meta grad_node_info phi common) endif() cc_library( eager_nan_inf_utils SRCS nan_inf_utils.cc - DEPS phi nan_inf_utils enforce) + DEPS phi common nan_inf_utils enforce) cc_library( grad_node_info SRCS grad_node_info.cc - DEPS phi) + DEPS phi common) cc_library( autograd_meta SRCS autograd_meta.cc - DEPS phi) + DEPS phi common) cc_library( utils SRCS utils.cc DEPS phi + common global_utils layer proto_desc diff --git a/paddle/fluid/eager/accumulation/CMakeLists.txt b/paddle/fluid/eager/accumulation/CMakeLists.txt index 574123661847b6..129baebf4ca876 100755 --- a/paddle/fluid/eager/accumulation/CMakeLists.txt +++ b/paddle/fluid/eager/accumulation/CMakeLists.txt @@ -2,5 +2,5 @@ if(NOT (NOT WITH_PYTHON AND ON_INFER)) cc_library( accumulation_node SRCS accumulation_node.cc - DEPS gradient_accumulator phi grad_node_info final_dygraph_function) + DEPS gradient_accumulator phi common grad_node_info final_dygraph_function) endif() diff --git a/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc b/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc index 7567236c4ff68e..be15752419771c 100644 --- a/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc +++ b/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc @@ -107,7 +107,7 @@ void ScaleAPI(const paddle::Tensor& x, dense_tensor->dtype(), dense_tensor->dims(), dense_tensor->layout()); auto place = dense_tensor->place(); size_t bytes_size = - phi::product(dense_tensor->dims()) * SizeOf(dense_tensor->dtype()); + common::product(dense_tensor->dims()) * SizeOf(dense_tensor->dtype()); auto dense_out = std::make_shared( paddle::memory::Alloc(place, bytes_size), std::move(tensor_meta)); // Handle Device Context diff --git a/paddle/fluid/eager/api/utils/CMakeLists.txt b/paddle/fluid/eager/api/utils/CMakeLists.txt index c6c5f4e9302a94..3de1959416b306 100755 --- a/paddle/fluid/eager/api/utils/CMakeLists.txt +++ b/paddle/fluid/eager/api/utils/CMakeLists.txt @@ -7,10 +7,10 @@ if(NOT (NOT WITH_PYTHON AND ON_INFER)) cc_library( hook_utils SRCS hook_utils.cc - DEPS phi autograd_meta grad_node_info utils accumulation_node) + DEPS phi common autograd_meta grad_node_info utils accumulation_node) else() cc_library( hook_utils SRCS hook_utils.cc - DEPS phi autograd_meta grad_node_info utils) + DEPS phi common autograd_meta grad_node_info utils) endif() diff --git a/paddle/fluid/eager/auto_code_generator/CMakeLists.txt b/paddle/fluid/eager/auto_code_generator/CMakeLists.txt index 25d2f4dacfd168..a6bb716e6b7ade 100644 --- a/paddle/fluid/eager/auto_code_generator/CMakeLists.txt +++ b/paddle/fluid/eager/auto_code_generator/CMakeLists.txt @@ -61,6 +61,12 @@ if(WIN32) list(APPEND EAGER_CODEGEN_DEPS ${eager_generator_path}/phi.dll) endif() + add_custom_command( + OUTPUT ${eager_generator_path}/common.dll + COMMAND ${CMAKE_COMMAND} -E copy ${COMMON_LIB} ${eager_generator_path} + DEPENDS common) + list(APPEND EAGER_CODEGEN_DEPS ${eager_generator_path}/common.dll) + if(WITH_SHARED_IR) add_custom_command( OUTPUT ${eager_generator_path}/ir.dll diff --git a/paddle/fluid/eager/custom_operator/CMakeLists.txt b/paddle/fluid/eager/custom_operator/CMakeLists.txt index a74ba2dc8c6287..189fb23c80aa8d 100644 --- a/paddle/fluid/eager/custom_operator/CMakeLists.txt +++ b/paddle/fluid/eager/custom_operator/CMakeLists.txt @@ -1,9 +1,9 @@ cc_library( custom_operator_node SRCS custom_operator_node.cc - DEPS phi grad_node_info custom_operator utils custom_operator_utils) + DEPS phi common grad_node_info custom_operator utils custom_operator_utils) cc_library( custom_operator_utils SRCS custom_operator_utils.cc - DEPS phi grad_node_info custom_operator utils) + DEPS phi common grad_node_info custom_operator utils) diff --git a/paddle/fluid/eager/custom_operator/custom_operator_utils.cc b/paddle/fluid/eager/custom_operator/custom_operator_utils.cc index 795abd5e72f4e7..8894a06267b514 100644 --- a/paddle/fluid/eager/custom_operator/custom_operator_utils.cc +++ b/paddle/fluid/eager/custom_operator/custom_operator_utils.cc @@ -227,7 +227,8 @@ static std::vector> RunInferShapeFunc( auto duplicable_input_pair = ctx.InputRangeAt(inplace_reverse_map[i]); result.push_back({ctx.InputAt(duplicable_input_pair.first).dims()}); } else { - result.push_back({phi::make_ddim(output_shapes[output_shape_idx++])}); + result.push_back( + {common::make_ddim(output_shapes[output_shape_idx++])}); } } } @@ -436,7 +437,7 @@ paddle::Tensor BuildEmptyDistPaddleTensor( meta.dims = dims; meta.dtype = dtype; - auto dist_attr = phi::distributed::TensorDistAttr(phi::vectorize(dims)); + auto dist_attr = phi::distributed::TensorDistAttr(common::vectorize(dims)); dist_attr.set_process_mesh(process_mesh); auto dist_t = std::make_shared( @@ -604,7 +605,7 @@ void TransCtxTensorsToDistTensors( for (size_t i = 0; i < output_all->size(); ++i) { auto& tensor = output_all->at(i); phi::distributed::TensorDistAttr dist_attr = - phi::distributed::TensorDistAttr(phi::vectorize(tensor.dims())); + phi::distributed::TensorDistAttr(common::vectorize(tensor.dims())); dist_attr.set_process_mesh(current_process_mesh); auto dist_t = std::make_shared( std::dynamic_pointer_cast(tensor.impl()), @@ -615,7 +616,7 @@ void TransCtxTensorsToDistTensors( for (size_t i = 0; i < input_all->size(); ++i) { auto& tensor = input_all->at(i); phi::distributed::TensorDistAttr dist_attr = - phi::distributed::TensorDistAttr(phi::vectorize(tensor.dims())); + phi::distributed::TensorDistAttr(common::vectorize(tensor.dims())); dist_attr.set_process_mesh(current_process_mesh); auto dist_t = std::make_shared( std::dynamic_pointer_cast(tensor.impl()), diff --git a/paddle/fluid/eager/eager_layout_transformer.h b/paddle/fluid/eager/eager_layout_transformer.h index d707b14be416e0..81ad21302f2868 100644 --- a/paddle/fluid/eager/eager_layout_transformer.h +++ b/paddle/fluid/eager/eager_layout_transformer.h @@ -79,7 +79,7 @@ inline void DealWithShapeOp(paddle::Tensor* out_tensor, for (int i = 0; i < dim_size; i++) { dims[i] = value[i]; } - auto des_str = phi::DataLayoutToString(des_layout); + auto des_str = common::DataLayoutToString(des_layout); if (change_dim && des_str == "NCHW") { // NCHW -> NHWC VLOG(6) << "layout autotune get Shape from NCHW -> NHWC " << value[0] << " " @@ -200,7 +200,7 @@ class EagerHeavilyLayoutSensitiveOpTransformer : public EagerLayoutTransformer { std::string* layout) : op_name_(op_name), desired_layout_(DesiredLayout()) { VLOG(4) << "Heavily op: " << op_name << " layout " << *layout; - *layout = phi::DataLayoutToString(DesiredLayout()); + *layout = common::DataLayoutToString(DesiredLayout()); } paddle::Tensor TransInTensor(const std::string& in_name, @@ -247,13 +247,13 @@ class EagerLightlyLayoutSensitiveOpTransformer : public EagerLayoutTransformer { const std::string& op_name) { VLOG(4) << "Lightly op : " << op_name; auto desired_layout = DesiredLayout(); - final_layout_ = phi::DataLayoutToString(desired_layout); + final_layout_ = common::DataLayoutToString(desired_layout); } // transpose from desired to default paddle::Tensor TransInTensor(const std::string& in_name UNUSED, const paddle::Tensor& in) { - std::string input_layout = phi::DataLayoutToString(in.layout()); + std::string input_layout = common::DataLayoutToString(in.layout()); auto default_layout = DefaultLayout(); if (final_layout_ == input_layout && in.shape().size() == 4) { auto out_tensor = EagerTraceTransposeOp(phi::DataLayout::UNDEFINED, in); diff --git a/paddle/fluid/eager/eager_tensor.h b/paddle/fluid/eager/eager_tensor.h index e8e74232888f46..f90cdba5b54b30 100644 --- a/paddle/fluid/eager/eager_tensor.h +++ b/paddle/fluid/eager/eager_tensor.h @@ -18,9 +18,9 @@ #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/variable.h" // Phi deps +#include "paddle/common/macros.h" #include "paddle/phi/api/include/tensor.h" #include "paddle/phi/core/compat/convert_utils.h" -#include "paddle/phi/core/macros.h" namespace egr { diff --git a/paddle/fluid/eager/pylayer/CMakeLists.txt b/paddle/fluid/eager/pylayer/CMakeLists.txt index fe7a57fe795942..609f43b3e00942 100644 --- a/paddle/fluid/eager/pylayer/CMakeLists.txt +++ b/paddle/fluid/eager/pylayer/CMakeLists.txt @@ -1,4 +1,4 @@ cc_library( py_layer_node SRCS py_layer_node.cc - DEPS pybind phi grad_node_info) + DEPS pybind phi common grad_node_info) diff --git a/paddle/fluid/eager/utils.cc b/paddle/fluid/eager/utils.cc index 542c6429c43c9e..1bc28549cb0c44 100644 --- a/paddle/fluid/eager/utils.cc +++ b/paddle/fluid/eager/utils.cc @@ -19,8 +19,8 @@ #include "paddle/fluid/eager/grad_node_info.h" #include "paddle/fluid/eager/tensor_wrapper.h" +#include "paddle/common/layout.h" #include "paddle/phi/api/all.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/core/compat/convert_utils.h" #include "paddle/phi/core/tensor_meta.h" @@ -538,7 +538,7 @@ void EagerUtils::FillZeroForEmptyOptionalGradInput( grad_in_metas[i].DistAttr())); if (grad_in_metas[i].GetTensorMeta().dims.size() != -1) { auto tensor_with_zero = paddle::experimental::full( - phi::vectorize(grad_in_metas[i].GetTensorMeta().dims), + common::vectorize(grad_in_metas[i].GetTensorMeta().dims), 0.0, grad_in_metas[i].GetTensorMeta().dtype, grad_in_metas[i].GetPlace()); @@ -548,7 +548,7 @@ void EagerUtils::FillZeroForEmptyOptionalGradInput( } } else { auto tensor_with_zero = paddle::experimental::full( - phi::vectorize(grad_in_metas[i].GetTensorMeta().dims), + common::vectorize(grad_in_metas[i].GetTensorMeta().dims), 0.0, grad_in_metas[i].GetTensorMeta().dtype, grad_in_metas[i].GetPlace()); @@ -576,7 +576,7 @@ void EagerUtils::FillZeroForEmptyOptionalGradOutput( grad_output_metas[i].DistAttr())); if (grad_output_metas[i].GetTensorMeta().dims.size() != -1) { auto tensor_with_zero = paddle::experimental::full( - phi::vectorize(grad_output_metas[i].GetTensorMeta().dims), + common::vectorize(grad_output_metas[i].GetTensorMeta().dims), 0.0, grad_output_metas[i].GetTensorMeta().dtype, grad_output_metas[i].GetPlace()); @@ -587,7 +587,7 @@ void EagerUtils::FillZeroForEmptyOptionalGradOutput( } else { auto tensor_with_zero = paddle::experimental::full( // only create dense tensor. - phi::vectorize(grad_output_metas[i].GetTensorMeta().dims), + common::vectorize(grad_output_metas[i].GetTensorMeta().dims), 0.0, grad_output_metas[i].GetTensorMeta().dtype, grad_output_metas[i].GetPlace()); @@ -610,7 +610,7 @@ void EagerUtils::FillZeroForEmptyGradInput(paddle::Tensor* in_grad, grad_in_meta.DistTensorGlobalDims(), grad_in_meta.DistAttr())); if (tensor_meta.dims.size() != -1) { auto tensor_with_zero = - paddle::experimental::full(phi::vectorize(tensor_meta.dims), + paddle::experimental::full(common::vectorize(tensor_meta.dims), 0.0, tensor_meta.dtype, grad_in_meta.GetPlace()); @@ -620,7 +620,7 @@ void EagerUtils::FillZeroForEmptyGradInput(paddle::Tensor* in_grad, } } else { auto tensor_with_zero = - paddle::experimental::full(phi::vectorize(tensor_meta.dims), + paddle::experimental::full(common::vectorize(tensor_meta.dims), 0.0, tensor_meta.dtype, grad_in_meta.GetPlace()); @@ -638,7 +638,7 @@ void EagerUtils::FillZeroForEmptyOptionalGradInput( grad_in_meta.DistTensorGlobalDims(), grad_in_meta.DistAttr())); if (tensor_meta.dims.size() != -1) { auto tensor_with_zero = - paddle::experimental::full(phi::vectorize(tensor_meta.dims), + paddle::experimental::full(common::vectorize(tensor_meta.dims), 0.0, tensor_meta.dtype, grad_in_meta.GetPlace()); @@ -648,7 +648,7 @@ void EagerUtils::FillZeroForEmptyOptionalGradInput( } } else { auto tensor_with_zero = - paddle::experimental::full(phi::vectorize(tensor_meta.dims), + paddle::experimental::full(common::vectorize(tensor_meta.dims), 0.0, tensor_meta.dtype, grad_in_meta.GetPlace()); diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 83b156d10e5cb5..e1395488160191 100755 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -115,7 +115,7 @@ proto_library(trainer_desc_proto SRCS trainer_desc.proto DEPS framework_proto cc_library( string_array SRCS string_array.cc - DEPS utf8proc phi) + DEPS utf8proc phi common) cc_library( data_type @@ -125,27 +125,27 @@ cc_library( cc_library( tensor SRCS tensor_util.cc - DEPS place memory data_type device_context phi) + DEPS place memory data_type device_context phi common) cc_library( lod_tensor SRCS lod_tensor.cc - DEPS phi place tensor framework_proto version) + DEPS phi common place tensor framework_proto version) cc_library( garbage_collector SRCS garbage_collector.cc - DEPS device_context memory phi glog) + DEPS device_context memory phi common glog) cc_library( reader SRCS reader.cc - DEPS lod_tensor phi) + DEPS lod_tensor phi common) cc_library( var_type_traits SRCS var_type_traits.cc - DEPS framework_proto scope phi) + DEPS framework_proto scope phi common) if(WITH_GPU) target_link_libraries(var_type_traits dynload_cuda) endif() @@ -177,7 +177,7 @@ endif() cc_library( scope SRCS scope.cc - DEPS glog phi xxhash var_type_traits) + DEPS glog phi common xxhash var_type_traits) cc_library( device_worker SRCS device_worker.cc @@ -228,24 +228,24 @@ endif() cc_library( data_layout_transform SRCS data_layout_transform.cc - DEPS tensor phi) + DEPS tensor phi common) cc_library( data_transform SRCS data_transform.cc - DEPS phi - tensor + DEPS tensor framework_proto selected_rows_utils data_device_transform data_type_transform data_layout_transform - phi) + phi + common) cc_library( attribute SRCS attribute.cc - DEPS framework_proto enforce) + DEPS framework_proto enforce common) cc_library( op_version_proto SRCS op_version_proto.cc @@ -271,7 +271,7 @@ cc_library( cc_library( shape_inference SRCS shape_inference.cc - DEPS phi attribute selected_rows_utils) + DEPS phi common attribute selected_rows_utils) # every source file that includes "dnnl.h" must depends on mkldnn # or, the first one should depends on mkldnn @@ -302,6 +302,7 @@ if(WITH_XPU) selected_rows_utils place phi + common var_type_traits op_info xpu_op_list) @@ -309,7 +310,13 @@ else() cc_library( phi_utils SRCS phi_utils.cc - DEPS lod_tensor selected_rows_utils place phi var_type_traits op_info) + DEPS lod_tensor + selected_rows_utils + place + phi + common + var_type_traits + op_info) endif() if(WITH_XPU) @@ -337,6 +344,7 @@ if(WITH_XPU) phi_utils infershape_utils phi + common op_compat_infos type_info) else() @@ -363,6 +371,7 @@ else() phi_utils infershape_utils phi + common op_compat_infos type_info) endif() @@ -380,7 +389,8 @@ add_dependencies( glog version xxhash - phi) + phi + common) cc_library( proto_desc @@ -395,6 +405,7 @@ cc_library( xxhash op_dist_attr phi + common op_version_proto op_version_registry) @@ -406,7 +417,7 @@ cc_library( cc_library( op_call_stack SRCS op_call_stack.cc - DEPS op_proto_maker enforce) + DEPS op_proto_maker enforce common) cc_library( program_utils @@ -677,7 +688,8 @@ if(WITH_DISTRIBUTE) fleet heter_server ${${EXTERNAL_BRPC_DEPS}} - phi) + phi + common) set(DISTRIBUTE_COMPILE_FLAGS "") if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0) set(DISTRIBUTE_COMPILE_FLAGS "${DISTRIBUTE_COMPILE_FLAGS} -faligned-new") @@ -883,7 +895,7 @@ cc_library( cc_library( selected_rows_utils SRCS selected_rows_utils.cc - DEPS phi device_context) + DEPS phi common device_context) cc_library( dlpack_tensor @@ -904,6 +916,7 @@ cc_library( place var_type_traits phi + common phi_utils op_info shape_inference) @@ -938,6 +951,7 @@ cc_library( dynamic_loader string_helper phi + common imperative_flag layer) diff --git a/paddle/fluid/framework/convert_utils.h b/paddle/fluid/framework/convert_utils.h index 6995e21da89109..4fc86eba98dd65 100644 --- a/paddle/fluid/framework/convert_utils.h +++ b/paddle/fluid/framework/convert_utils.h @@ -14,8 +14,8 @@ limitations under the License. */ #pragma once +#include "paddle/common/layout.h" #include "paddle/fluid/framework/data_type.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/core/utils/data_type.h" // TODO(chenweihang): this file may need to be removed diff --git a/paddle/fluid/framework/custom_operator.cc b/paddle/fluid/framework/custom_operator.cc index 81075e0c5fb5bd..bf2f9e4379b693 100644 --- a/paddle/fluid/framework/custom_operator.cc +++ b/paddle/fluid/framework/custom_operator.cc @@ -434,7 +434,7 @@ static void RunInferShapeFunc( vec_ddim.end(), std::back_inserter(vec_shape), [&](const DDim& ddim) -> std::vector { - return phi::vectorize(ddim); + return common::vectorize(ddim); }); } else { // optional inputs, `vec_shape` is empty @@ -450,7 +450,7 @@ static void RunInferShapeFunc( } else { if (ctx->HasInput(in_name)) { // general inputs auto ddim = ctx->GetInputDim(in_name); - input_shapes.emplace_back(phi::vectorize(ddim)); + input_shapes.emplace_back(common::vectorize(ddim)); } else { // optional inputs PADDLE_ENFORCE( detail::IsOptionalVar(in_name), @@ -582,7 +582,7 @@ static void RunInferShapeFunc( } else { // Set output dims by the output of InferShapeFn ctx->SetOutputDim(out_name, - phi::make_ddim(output_shapes[output_shape_idx++])); + common::make_ddim(output_shapes[output_shape_idx++])); } } } diff --git a/paddle/fluid/framework/data_feed.cc b/paddle/fluid/framework/data_feed.cc index 19c5196d2f933a..ca5c7b66b343ac 100644 --- a/paddle/fluid/framework/data_feed.cc +++ b/paddle/fluid/framework/data_feed.cc @@ -1021,7 +1021,7 @@ void MultiSlotDataFeed::PutToFeedVec( use_slots_shape_[i][inductive_shape_index_[i]] = total_instance / total_dims_without_inductive_[i]; } - feed_vec_[i]->Resize(phi::make_ddim(use_slots_shape_[i])); + feed_vec_[i]->Resize(common::make_ddim(use_slots_shape_[i])); } } #endif @@ -1423,7 +1423,7 @@ void MultiSlotInMemoryDataFeed::PutToFeedVec(const Record* ins_vec, int num) { use_slots_shape_[i][inductive_shape_index_[i]] = total_instance / total_dims_without_inductive_[i]; } - feed_vec_[i]->Resize(phi::make_ddim(use_slots_shape_[i])); + feed_vec_[i]->Resize(common::make_ddim(use_slots_shape_[i])); } } #endif @@ -1523,7 +1523,7 @@ void MultiSlotInMemoryDataFeed::PutToFeedVec( use_slots_shape_[i][inductive_shape_index_[i]] = total_instance / total_dims_without_inductive_[i]; } - feed_vec_[i]->Resize(phi::make_ddim(use_slots_shape_[i])); + feed_vec_[i]->Resize(common::make_ddim(use_slots_shape_[i])); } } #endif @@ -1568,7 +1568,7 @@ void PrivateInstantDataFeed::PutToFeedVec() { use_slots_[i].c_str(), total_dims, total_instance)); - feed_vec_[i]->Resize(phi::make_ddim(use_slots_shape_[i])); + feed_vec_[i]->Resize(common::make_ddim(use_slots_shape_[i])); } } } @@ -1998,7 +1998,7 @@ void PaddleBoxDataFeed::PutToFeedVec(const std::vector& ins_vec) { use_slots_shape_[i][inductive_shape_index_[i]] = total_instance / total_dims_without_inductive_[i]; } - feed_vec_[i]->Resize(phi::make_ddim(use_slots_shape_[i])); + feed_vec_[i]->Resize(common::make_ddim(use_slots_shape_[i])); } } #endif @@ -2615,7 +2615,7 @@ void SlotRecordInMemoryDataFeed::PutToFeedVec(const SlotRecord* ins_vec, info.local_shape[info.inductive_shape_index] = total_instance / info.total_dims_without_inductive; } - feed->Resize(phi::make_ddim(info.local_shape)); + feed->Resize(common::make_ddim(info.local_shape)); } else { LoD data_lod{slot_offset}; feed_vec_[j]->set_lod(data_lod); @@ -2987,7 +2987,7 @@ void SlotRecordInMemoryDataFeed::PackToScope(MiniBatchGpuPack* pack, info.local_shape[info.inductive_shape_index] = total_instance / info.total_dims_without_inductive; } - feed->Resize(phi::make_ddim(info.local_shape)); + feed->Resize(common::make_ddim(info.local_shape)); } else { LoD& lod = (*feed->mutable_lod()); lod.resize(1); diff --git a/paddle/fluid/framework/data_feed.h b/paddle/fluid/framework/data_feed.h index dd17c9d4d0bab3..6472f6eadbca9d 100644 --- a/paddle/fluid/framework/data_feed.h +++ b/paddle/fluid/framework/data_feed.h @@ -31,6 +31,7 @@ limitations under the License. */ #include #include #include +#include "paddle/common/macros.h" #include "paddle/fluid/framework/archive.h" #include "paddle/fluid/framework/blocking_queue.h" #include "paddle/fluid/framework/channel.h" @@ -41,7 +42,6 @@ limitations under the License. */ #include "paddle/fluid/framework/variable.h" #include "paddle/fluid/platform/timer.h" #include "paddle/fluid/string/string_helper.h" -#include "paddle/phi/core/macros.h" #if defined(PADDLE_WITH_CUDA) #include "paddle/fluid/framework/fleet/heter_ps/gpu_graph_utils.h" #include "paddle/fluid/platform/cuda_device_guard.h" diff --git a/paddle/fluid/framework/data_layout.h b/paddle/fluid/framework/data_layout.h index 93c5f805098b33..d7bc7c5a817181 100644 --- a/paddle/fluid/framework/data_layout.h +++ b/paddle/fluid/framework/data_layout.h @@ -18,4 +18,4 @@ limitations under the License. */ #include #include -#include "paddle/phi/common/layout.h" +#include "paddle/common/layout.h" diff --git a/paddle/fluid/framework/data_layout_transform.cc b/paddle/fluid/framework/data_layout_transform.cc index 8e94a04ab161be..19960cf139d4ee 100644 --- a/paddle/fluid/framework/data_layout_transform.cc +++ b/paddle/fluid/framework/data_layout_transform.cc @@ -92,7 +92,7 @@ void TransDataLayout(DataLayout from_layout, dst_dim[i] = src_dim[axis[i]]; } - out->Resize(phi::make_ddim(dst_dim)); + out->Resize(common::make_ddim(dst_dim)); out->mutable_data(place, in.dtype()); framework::VisitDataType( diff --git a/paddle/fluid/framework/data_set.h b/paddle/fluid/framework/data_set.h index 9af5fbfc6b4a59..459bea97b74463 100644 --- a/paddle/fluid/framework/data_set.h +++ b/paddle/fluid/framework/data_set.h @@ -25,7 +25,7 @@ #include #include #include -#include "paddle/phi/core/macros.h" +#include "paddle/common/macros.h" #ifdef PADDLE_WITH_GLOO #include diff --git a/paddle/fluid/framework/details/CMakeLists.txt b/paddle/fluid/framework/details/CMakeLists.txt index f76f6af9dce969..f0c2b60f41b69d 100644 --- a/paddle/fluid/framework/details/CMakeLists.txt +++ b/paddle/fluid/framework/details/CMakeLists.txt @@ -10,15 +10,15 @@ cc_library( cc_library( scale_loss_grad_op_handle SRCS scale_loss_grad_op_handle.cc - DEPS op_handle_base scope lod_tensor phi memory) + DEPS op_handle_base scope lod_tensor phi common memory) cc_library( fetch_op_handle SRCS fetch_op_handle.cc - DEPS op_handle_base scope lod_tensor phi memory) + DEPS op_handle_base scope lod_tensor phi common memory) cc_library( fetch_async_op_handle SRCS fetch_async_op_handle.cc - DEPS op_handle_base scope lod_tensor phi memory) + DEPS op_handle_base scope lod_tensor phi common memory) cc_library( share_tensor_buffer_functor @@ -71,7 +71,7 @@ if(WITH_GPU) nv_library( nan_inf_utils SRCS nan_inf_utils_detail.cc - DEPS framework_proto scope place phi) + DEPS framework_proto scope place phi common) nv_library( all_reduce_op_handle SRCS all_reduce_op_handle.cc @@ -80,6 +80,7 @@ if(WITH_GPU) scope lod_tensor phi + common memory dynload_cuda) nv_library( @@ -91,6 +92,7 @@ if(WITH_GPU) scope lod_tensor phi + common memory dynload_cuda place) @@ -102,6 +104,7 @@ if(WITH_GPU) scope lod_tensor phi + common memory dynload_cuda variable_visitor @@ -116,6 +119,7 @@ if(WITH_GPU) scope lod_tensor phi + common memory dynload_cuda variable_visitor @@ -127,17 +131,23 @@ if(WITH_GPU) nv_library( reduce_op_handle SRCS reduce_op_handle.cc - DEPS op_handle_base variable_visitor scope phi dynload_cuda) + DEPS op_handle_base variable_visitor scope phi common dynload_cuda) else() nv_library( reduce_op_handle SRCS reduce_op_handle.cc - DEPS op_handle_base variable_visitor scope phi dynload_cuda) + DEPS op_handle_base variable_visitor scope phi common dynload_cuda) endif() nv_library( broadcast_op_handle SRCS broadcast_op_handle.cc - DEPS op_handle_base scope phi memory variable_visitor dynload_cuda) + DEPS op_handle_base + scope + phi + common + memory + variable_visitor + dynload_cuda) nv_library( fused_broadcast_op_handle SRCS fused_broadcast_op_handle.cc @@ -146,7 +156,7 @@ elseif(WITH_ROCM) hip_library( nan_inf_utils SRCS nan_inf_utils_detail.cc - DEPS framework_proto scope place phi) + DEPS framework_proto scope place phi common) hip_library( all_reduce_op_handle SRCS all_reduce_op_handle.cc @@ -154,6 +164,7 @@ elseif(WITH_ROCM) scope lod_tensor phi + common memory dynload_cuda variable_visitor) @@ -166,6 +177,7 @@ elseif(WITH_ROCM) scope lod_tensor phi + common memory dynload_cuda place) @@ -177,6 +189,7 @@ elseif(WITH_ROCM) scope lod_tensor phi + common memory dynload_cuda variable_visitor @@ -187,17 +200,23 @@ elseif(WITH_ROCM) hip_library( reduce_op_handle SRCS reduce_op_handle.cc - DEPS op_handle_base variable_visitor scope phi dynload_cuda) + DEPS op_handle_base variable_visitor scope phi common dynload_cuda) else() hip_library( reduce_op_handle SRCS reduce_op_handle.cc - DEPS op_handle_base variable_visitor scope phi dynload_cuda) + DEPS op_handle_base variable_visitor scope phi common dynload_cuda) endif() hip_library( broadcast_op_handle SRCS broadcast_op_handle.cc - DEPS op_handle_base scope phi memory variable_visitor dynload_cuda) + DEPS op_handle_base + scope + phi + common + memory + variable_visitor + dynload_cuda) hip_library( fused_broadcast_op_handle SRCS fused_broadcast_op_handle.cc @@ -206,11 +225,17 @@ else() cc_library( nan_inf_utils SRCS nan_inf_utils_detail.cc - DEPS framework_proto scope place phi) + DEPS framework_proto scope place phi common) cc_library( all_reduce_op_handle SRCS all_reduce_op_handle.cc - DEPS op_handle_base scope lod_tensor phi memory variable_visitor) + DEPS op_handle_base + scope + lod_tensor + phi + common + memory + variable_visitor) cc_library( fused_all_reduce_op_handle SRCS fused_all_reduce_op_handle.cc @@ -219,6 +244,7 @@ else() scope lod_tensor phi + common memory variable_visitor place) @@ -230,6 +256,7 @@ else() scope lod_tensor phi + common memory variable_visitor place @@ -238,17 +265,17 @@ else() cc_library( reduce_op_handle SRCS reduce_op_handle.cc - DEPS op_handle_base variable_visitor scope phi) + DEPS op_handle_base variable_visitor scope phi common) else() cc_library( reduce_op_handle SRCS reduce_op_handle.cc - DEPS op_handle_base variable_visitor scope phi) + DEPS op_handle_base variable_visitor scope phi common) endif() cc_library( broadcast_op_handle SRCS broadcast_op_handle.cc - DEPS op_handle_base scope phi memory variable_visitor) + DEPS op_handle_base scope phi common memory variable_visitor) cc_library( fused_broadcast_op_handle SRCS fused_broadcast_op_handle.cc @@ -258,7 +285,7 @@ endif() cc_library( gather_op_handle SRCS gather_op_handle.cc - DEPS op_handle_base scope phi memory variable_visitor) + DEPS op_handle_base scope phi common memory variable_visitor) cc_library( eager_deletion_op_handle @@ -305,6 +332,7 @@ cc_test( op_handle_base scope phi + common memory device_context broadcast_op_handle) @@ -317,8 +345,9 @@ cc_test_old( var_handle op_handle_base scope - memory phi + common + memory device_context gather_op_handle) @@ -330,12 +359,17 @@ cc_library( scope_buffered_ssa_graph_executor SRCS scope_buffered_ssa_graph_executor.cc DEPS ssa_graph_executor scope_buffered_monitor) -#cc_test(reduce_op_handle_test SRCS reduce_op_handle_test.cc DEPS var_handle op_handle_base scope phi memory +#cc_test(reduce_op_handle_test SRCS reduce_op_handle_test.cc DEPS var_handle op_handle_base scope phi common memory # device_context reduce_op_handle ) cc_library( bind_threaded_ssa_graph_executor SRCS bind_threaded_ssa_graph_executor.cc - DEPS fetch_op_handle phi ssa_graph_executor scope simple_threadpool + DEPS fetch_op_handle + phi + common + ssa_graph_executor + scope + simple_threadpool device_context) cc_library( fast_threaded_ssa_graph_executor diff --git a/paddle/fluid/framework/details/broadcast_op_handle_test.h b/paddle/fluid/framework/details/broadcast_op_handle_test.h index dc0f4e3fe1762a..60e45d226dd50f 100644 --- a/paddle/fluid/framework/details/broadcast_op_handle_test.h +++ b/paddle/fluid/framework/details/broadcast_op_handle_test.h @@ -217,7 +217,7 @@ struct TestBroadcastOpHandle { platform::errors::NotFound( "Variable %s is not found in scope.", varname)); auto lod_tensor = var->GetMutable(); - std::vector send_vector(static_cast(phi::product(kDims))); + std::vector send_vector(static_cast(common::product(kDims))); for (size_t k = 0; k < send_vector.size(); ++k) { send_vector[k] = k + val_scalar; } @@ -233,7 +233,7 @@ struct TestBroadcastOpHandle { const std::vector& rows, int height, float value_scalar = 0.0) { - std::vector send_vector(static_cast(phi::product(kDims))); + std::vector send_vector(static_cast(common::product(kDims))); for (size_t k = 0; k < send_vector.size(); ++k) { send_vector[k] = k + value_scalar; } @@ -290,7 +290,7 @@ struct TestBroadcastOpHandle { f::TensorCopySync(rt, cpu_place, &result_tensor); float* ct = result_tensor.data(); - for (int64_t i = 0; i < phi::product(kDims); ++i) { + for (int64_t i = 0; i < common::product(kDims); ++i) { ASSERT_NEAR(ct[i], send_vector[i], 1e-5); } } @@ -315,7 +315,7 @@ struct TestBroadcastOpHandle { phi::DenseTensor result_tensor; f::TensorCopySync(tensor, cpu_place, &result_tensor); float* ct = result_tensor.mutable_data(cpu_place); - for (int64_t k = 0; k < phi::product(kDims); ++k) { + for (int64_t k = 0; k < common::product(kDims); ++k) { ASSERT_NEAR(ct[k], send_vec[k], 1e-5); } } diff --git a/paddle/fluid/framework/details/fetch_async_op_handle.cc b/paddle/fluid/framework/details/fetch_async_op_handle.cc index 900f0ebc4f111e..ee78d366711075 100644 --- a/paddle/fluid/framework/details/fetch_async_op_handle.cc +++ b/paddle/fluid/framework/details/fetch_async_op_handle.cc @@ -78,8 +78,8 @@ static void CheckTensorAttrs(const phi::DenseTensor *tensor, "(th) fetched variable. Please set the " "parameter `return_merged = False` when you " "call the `Executor.run()` method.", - phi::DataLayoutToString(layout), - phi::DataLayoutToString(tensor->layout()), + common::DataLayoutToString(layout), + common::DataLayoutToString(tensor->layout()), offset)); } @@ -175,7 +175,7 @@ void FetchAsyncOpHandle::FetchMergedLodTensor( // for 0D tensor, can't concat eath tensor. So stack 0D and concat 1+D tensor if (rank == 0) { int src_lodtensor_size = static_cast(src_lodtensors.size()); - new_dim = phi::make_ddim(std::vector({src_lodtensor_size})); + new_dim = common::make_ddim(std::vector({src_lodtensor_size})); } else { bool find_first_dims = false; for (auto *t : src_lodtensors) { diff --git a/paddle/fluid/framework/details/gather_op_handle_test.cc b/paddle/fluid/framework/details/gather_op_handle_test.cc index 12d84aef8a8aca..8070a63bf2ce63 100644 --- a/paddle/fluid/framework/details/gather_op_handle_test.cc +++ b/paddle/fluid/framework/details/gather_op_handle_test.cc @@ -137,7 +137,7 @@ struct TestGatherOpHandle { int height = static_cast(kDims[0] * 2); std::vector rows{0, 1, 2, 3, 3, 0, 14, 7, 3, 1, 2, 4, 6, 3, 1, 1, 1, 1, 3, 7}; - std::vector send_vector(phi::product(kDims)); + std::vector send_vector(common::product(kDims)); for (size_t k = 0; k < send_vector.size(); ++k) { send_vector[k] = static_cast(k); } @@ -209,7 +209,7 @@ struct TestGatherOpHandle { float* ct = result_tensor.data(); for (int64_t j = 0; - j < phi::product(kDims) * static_cast(gpu_list_.size()); + j < common::product(kDims) * static_cast(gpu_list_.size()); ++j) { ASSERT_NEAR(ct[j], send_vector[j % send_vector.size()], 1e-5); } diff --git a/paddle/fluid/framework/details/op_registry.h b/paddle/fluid/framework/details/op_registry.h index 31782e0d7bc9ea..8fb92fcfc12539 100644 --- a/paddle/fluid/framework/details/op_registry.h +++ b/paddle/fluid/framework/details/op_registry.h @@ -23,6 +23,7 @@ limitations under the License. */ #include #include +#include "paddle/common/macros.h" #include "paddle/fluid/framework/grad_op_desc_maker.h" #include "paddle/fluid/framework/inplace_op_inference.h" #include "paddle/fluid/framework/no_need_buffer_vars_inference.h" @@ -33,7 +34,6 @@ limitations under the License. */ #include "paddle/fluid/imperative/dygraph_grad_maker.h" #include "paddle/fluid/imperative/type_defs.h" #include "paddle/fluid/prim/utils/static/composite_grad_desc_maker.h" -#include "paddle/phi/core/macros.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/details/reduce_op_handle_test.cc b/paddle/fluid/framework/details/reduce_op_handle_test.cc index 7886fc5eae5286..459f4dfcff504e 100644 --- a/paddle/fluid/framework/details/reduce_op_handle_test.cc +++ b/paddle/fluid/framework/details/reduce_op_handle_test.cc @@ -162,7 +162,7 @@ struct TestReduceOpHandle { int height = kDims[0] * 2; std::vector rows{0, 1, 2, 3, 3, 0, 14, 7, 3, 1, 2, 4, 6, 3, 1, 1, 1, 1, 3, 7}; - std::vector send_vector(phi::product(kDims)); + std::vector send_vector(common::product(kDims)); for (size_t k = 0; k < send_vector.size(); ++k) { send_vector[k] = k; } @@ -232,13 +232,13 @@ struct TestReduceOpHandle { f::TensorCopySync(rt, cpu_place, &result_tensor); float *ct = result_tensor.data(); - for (int64_t j = 0; j < phi::product(result_tensor.dims()); ++j) { + for (int64_t j = 0; j < common::product(result_tensor.dims()); ++j) { ASSERT_NEAR(ct[j], send_vector[j % send_vector.size()], 1e-5); } } // namespace details void TestReduceLodTensors(size_t output_scope_idx) { - std::vector send_vector(static_cast(phi::product(kDims))); + std::vector send_vector(static_cast(common::product(kDims))); for (size_t k = 0; k < send_vector.size(); ++k) { send_vector[k] = k; } @@ -283,7 +283,7 @@ struct TestReduceOpHandle { f::TensorCopySync(rt, cpu_place, &result_tensor); float *ct = result_tensor.data(); - for (int64_t j = 0; j < phi::product(result_tensor.dims()); ++j) { + for (int64_t j = 0; j < common::product(result_tensor.dims()); ++j) { ASSERT_NEAR(ct[j], send_vector[j] * gpu_list_.size(), 1e-5); } } diff --git a/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc b/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc index 6aac8b6fd51cdf..8b486be9cc686a 100644 --- a/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc +++ b/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc @@ -108,7 +108,7 @@ void ScaleLossGradOpHandle::RunImpl() { void ScaleLossGradOpHandle::RunOnVar(Variable *var, bool record_event) { auto *tensor = var->GetMutable(); - tensor->Resize(phi::make_ddim({1})); + tensor->Resize(common::make_ddim({1})); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) ScaleLossGradFunctor func( diff --git a/paddle/fluid/framework/device_worker.h b/paddle/fluid/framework/device_worker.h index 25d29e469a4985..c9c0817a756020 100644 --- a/paddle/fluid/framework/device_worker.h +++ b/paddle/fluid/framework/device_worker.h @@ -30,6 +30,7 @@ limitations under the License. */ #if defined(PADDLE_WITH_PSCORE) #include "paddle/fluid/distributed/ps/wrapper/fleet.h" #endif +#include "paddle/common/macros.h" #include "paddle/fluid/framework/barrier.h" #include "paddle/fluid/framework/data_feed.h" #include "paddle/fluid/framework/executor_gc_helper.h" @@ -44,7 +45,6 @@ limitations under the License. */ #include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/timer.h" #include "paddle/phi/backends/dynload/port.h" -#include "paddle/phi/core/macros.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/dlpack_tensor.h b/paddle/fluid/framework/dlpack_tensor.h index 4cd6d97a0c5cc3..943ee88b67695b 100644 --- a/paddle/fluid/framework/dlpack_tensor.h +++ b/paddle/fluid/framework/dlpack_tensor.h @@ -41,7 +41,7 @@ class DLPackTensor { // The shape in DLTensor is defined as int64_t* // Add this member to make TVMTensor init without heap allocation - ShapeType shape_[DDim::kMaxRank]; + ShapeType shape_[phi::DDim::kMaxRank]; }; DLManagedTensor* toDLPack(const phi::DenseTensor& src); diff --git a/paddle/fluid/framework/eigen.h b/paddle/fluid/framework/eigen.h index bbf34c03130c11..2da048361d254b 100644 --- a/paddle/fluid/framework/eigen.h +++ b/paddle/fluid/framework/eigen.h @@ -88,8 +88,8 @@ struct EigenMatrix : public EigenTensor { "between 0 and %d, but received number is %d.", rank, num_col_dims)); - return EigenMatrix::From(tensor, - phi::flatten_to_2d(tensor.dims(), num_col_dims)); + return EigenMatrix::From( + tensor, common::flatten_to_2d(tensor.dims(), num_col_dims)); } static typename EigenMatrix::ConstType Reshape(const phi::DenseTensor& tensor, @@ -102,8 +102,8 @@ struct EigenMatrix : public EigenTensor { "between 0 and %d, but received number is %d.", rank, num_col_dims)); - return EigenMatrix::From(tensor, - phi::flatten_to_2d(tensor.dims(), num_col_dims)); + return EigenMatrix::From( + tensor, common::flatten_to_2d(tensor.dims(), num_col_dims)); } }; diff --git a/paddle/fluid/framework/fleet/heter_wrapper.cc b/paddle/fluid/framework/fleet/heter_wrapper.cc index 2cae0721aefa99..a8ce9be92bdf68 100644 --- a/paddle/fluid/framework/fleet/heter_wrapper.cc +++ b/paddle/fluid/framework/fleet/heter_wrapper.cc @@ -95,7 +95,7 @@ void HeterWrapper::SerializeToReq(const std::string& varname, req_var->set_data_type(static_cast( framework::TransToProtoVarType(tensor->dtype()))); - for (auto& dim : phi::vectorize(tensor->dims())) { + for (auto& dim : common::vectorize(tensor->dims())) { req_var->add_dims(dim); } const framework::LoD lod = tensor->lod(); @@ -154,7 +154,7 @@ void HeterWrapper::DeSerializeToTensor(Scope* scope, for (auto& x : req_var.dims()) { vec_dim.push_back(x); } - tensor->Resize(phi::make_ddim(vec_dim)); + tensor->Resize(common::make_ddim(vec_dim)); LoD lod; for (int i = 0; i < req_var.lod_level(); ++i) { @@ -199,7 +199,7 @@ void HeterWrapper::DeSerializeToTensor(Scope* scope, for (auto& x : req_var.dims()) { vec_dim.push_back(x); } - tensor->Resize(phi::make_ddim(vec_dim)); + tensor->Resize(common::make_ddim(vec_dim)); LoD lod; for (int i = 0; i < req_var.lod_level(); ++i) { diff --git a/paddle/fluid/framework/heter_section_worker.cc b/paddle/fluid/framework/heter_section_worker.cc index ea55d5cd6c818a..24be8e04d8d507 100644 --- a/paddle/fluid/framework/heter_section_worker.cc +++ b/paddle/fluid/framework/heter_section_worker.cc @@ -39,7 +39,7 @@ void SetMicroId(paddle::framework::Scope* scope, "the type of microbatch_id should be phi::DenseTensor")); auto* tensor = var->GetMutable(); std::vector dims{1}; - tensor->Resize(phi::make_ddim(dims)); + tensor->Resize(common::make_ddim(dims)); void* tensor_data = tensor->mutable_data( place, framework::TransToPhiDataType(framework::proto::VarType::FP32)); if (platform::is_gpu_place(place)) { diff --git a/paddle/fluid/framework/infershape_utils.cc b/paddle/fluid/framework/infershape_utils.cc index 4c41bc27f1730e..88f0b496a8e4c6 100644 --- a/paddle/fluid/framework/infershape_utils.cc +++ b/paddle/fluid/framework/infershape_utils.cc @@ -218,7 +218,7 @@ DDim CompatMetaTensor::dims() const { } else if (var->IsType()) { // use tensor array size as dims auto& tensor_array = var->Get(); - return phi::make_ddim({static_cast(tensor_array.size())}); + return common::make_ddim({static_cast(tensor_array.size())}); } else { PADDLE_THROW(platform::errors::Unimplemented( "Currently, only can get dims from DenseTensor or SelectedRows or " @@ -227,9 +227,9 @@ DDim CompatMetaTensor::dims() const { } else { auto* var = PADDLE_GET_CONST(VarDesc*, var_); - return phi::make_ddim(var->GetShape()); - // return var->GetShape().empty() ? phi::make_ddim({0UL}) : - // phi::make_ddim(var->GetShape()); + return common::make_ddim(var->GetShape()); + // return var->GetShape().empty() ? common::make_ddim({0UL}) : + // common::make_ddim(var->GetShape()); } } @@ -316,7 +316,7 @@ void CompatMetaTensor::set_dims(const DDim& dims) { } else { auto* var = PADDLE_GET(VarDesc*, var_); if (var) { - var->SetShape(vectorize(dims)); + var->SetShape(common::vectorize(dims)); } } } diff --git a/paddle/fluid/framework/inplace_op_inference.h b/paddle/fluid/framework/inplace_op_inference.h index 2a1a44e57a30a8..5c5589fd149512 100644 --- a/paddle/fluid/framework/inplace_op_inference.h +++ b/paddle/fluid/framework/inplace_op_inference.h @@ -16,9 +16,9 @@ #include #include +#include "paddle/common/macros.h" #include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/type_defs.h" -#include "paddle/phi/core/macros.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt index 8336340849fb46..bc08f1d72f0f91 100755 --- a/paddle/fluid/framework/ir/CMakeLists.txt +++ b/paddle/fluid/framework/ir/CMakeLists.txt @@ -32,7 +32,7 @@ cc_library( cc_library( cost_model SRCS cost_model.cc - DEPS executor graph profiler proto_desc phi) + DEPS executor graph profiler proto_desc phi common) set(GRAPH_PATTERN_DETECTOR_DEPS graph graph_helper graph_traits) if(WITH_TESTING) @@ -238,7 +238,7 @@ if(WITH_XPU) cc_library( xpu_quant_utils SRCS xpu/quant_utils.cc - DEPS pass phi) + DEPS pass phi common) cc_library( xpu_pass_utils SRCS xpu/pass_utils.cc @@ -541,7 +541,8 @@ if(WITH_MKLDNN) concat_and_split naive_executor device_context - phi) + phi + common) if(WITH_GPU OR WITH_ROCM) set(TEST_CONV_BN_PASS_DEPS ${TEST_CONV_BN_PASS_DEPS} depthwise_conv) endif() diff --git a/paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc b/paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc index 9764f78744974e..5e82534ba67452 100644 --- a/paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc @@ -264,12 +264,12 @@ void PrepareParameters(Graph* graph, const Param& param, ir::Node* lstm_op) { platform::errors::InvalidArgument( "phi::DenseTensor attention bias dimension size(%d) must be 1.", attention_bias_t->dims().size())); - attention_bias_t->Resize(phi::make_ddim({1, attention_bias_t->dims()[0]})); + attention_bias_t->Resize(common::make_ddim({1, attention_bias_t->dims()[0]})); auto* attention_scalar_bias_t = scope.FindVar(param.AttentionScalarBias)->GetMutable(); attention_scalar_bias_t->Resize( - phi::make_ddim({1, attention_scalar_bias_t->dims()[0]})); + common::make_ddim({1, attention_scalar_bias_t->dims()[0]})); PrepareLSTMWeight(W_forget_w0_t, W_forget_w1_t, @@ -296,7 +296,7 @@ void PrepareLSTMWeight(const phi::DenseTensor& W_forget_w0, phi::DenseTensor* out) { int D = static_cast(W_forget_w0.dims()[0]); int M = static_cast(W_forget_w1.dims()[0]); - out->Resize(phi::make_ddim({D + M, 4 * D})); + out->Resize(common::make_ddim({D + M, 4 * D})); VLOG(3) << "LSTMWeight resized to " << out->dims(); float* out_data = out->mutable_data(platform::CPUPlace()); @@ -343,7 +343,7 @@ void PrepareLSTMBias(const phi::DenseTensor& B_forget, "phi::DenseTensor B forget dimension size(%d) must be 1.", B_forget.dims().size())); int D = static_cast(B_forget.dims()[0]); - out->Resize(phi::make_ddim({1, 4 * D})); + out->Resize(common::make_ddim({1, 4 * D})); auto* out_data = out->mutable_data(platform::CPUPlace()); for (size_t i = 0; i < tensors.size(); i++) { memcpy(out_data + D * i, tensors[i], D * sizeof(float)); diff --git a/paddle/fluid/framework/ir/auto_mixed_precision_pass.cc b/paddle/fluid/framework/ir/auto_mixed_precision_pass.cc index d29ef0f9ad1fad..61080c52c94bac 100644 --- a/paddle/fluid/framework/ir/auto_mixed_precision_pass.cc +++ b/paddle/fluid/framework/ir/auto_mixed_precision_pass.cc @@ -14,6 +14,7 @@ #include "paddle/fluid/framework/ir/auto_mixed_precision_pass.h" +#include "paddle/common/errors.h" #include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/framework/operator.h" #include "paddle/phi/common/bfloat16.h" @@ -21,7 +22,6 @@ #include "paddle/phi/common/place.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #ifdef PADDLE_WITH_CUSTOM_DEVICE #include "paddle/phi/backends/device_manager.h" #endif diff --git a/paddle/fluid/framework/ir/conv2d_fusion_layout_transfer_pass.cc b/paddle/fluid/framework/ir/conv2d_fusion_layout_transfer_pass.cc index 7d3c105f749387..2640bd9cd74ee3 100644 --- a/paddle/fluid/framework/ir/conv2d_fusion_layout_transfer_pass.cc +++ b/paddle/fluid/framework/ir/conv2d_fusion_layout_transfer_pass.cc @@ -18,9 +18,9 @@ #include #include "paddle/fluid/framework/ir/cutlass_teller.h" +#include "paddle/common/layout.h" #include "paddle/fluid/framework/data_layout_transform.h" #include "paddle/fluid/framework/ir/graph_helper.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/dense_tensor.h" diff --git a/paddle/fluid/framework/ir/conv2d_trans_filter_dilations_nxn_to_1x1_pass.cc b/paddle/fluid/framework/ir/conv2d_trans_filter_dilations_nxn_to_1x1_pass.cc index 7ba36c11313859..af3187d9d64bbb 100644 --- a/paddle/fluid/framework/ir/conv2d_trans_filter_dilations_nxn_to_1x1_pass.cc +++ b/paddle/fluid/framework/ir/conv2d_trans_filter_dilations_nxn_to_1x1_pass.cc @@ -191,7 +191,7 @@ void Conv2dTransFilterDilationsNxNTo1x1Pass::conv2d_dilation_trans( VarDesc new_weights_desc(new_weights_name); new_weights_desc.SetPersistable(true); - new_weights_desc.SetShape(vectorize(new_weights->dims())); + new_weights_desc.SetShape(common::vectorize(new_weights->dims())); new_weights_desc.SetDataType( framework::TransToProtoVarType(new_weights->dtype())); auto* new_weights_node = graph->CreateVarNode(&new_weights_desc); diff --git a/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc b/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc index aa15b2696d7a12..335d7034ca6496 100644 --- a/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc @@ -167,7 +167,7 @@ void recompute_bias_and_weights(const Scope* scope, } } } else { - auto weights_shape_2d = phi::flatten_to_2d(weights_shape, 1); + auto weights_shape_2d = common::flatten_to_2d(weights_shape, 1); EigenMatrixArrayMap weights_array_2d( weights_data, weights_shape_2d[0], weights_shape_2d[1]); @@ -376,7 +376,7 @@ void ConvBNFusePass::ApplyImpl(ir::Graph* graph) const { if (!mkldnn_with_bias) { VarDesc eltwise_y_in_desc( patterns::PDNodeName("fuse_conv_bn", conv_type() + "_eltwise_y_in")); - eltwise_y_in_desc.SetShape(phi::vectorize(bn_bias_tensor->dims())); + eltwise_y_in_desc.SetShape(common::vectorize(bn_bias_tensor->dims())); eltwise_y_in_desc.SetDataType( framework::TransToProtoVarType(bn_bias_tensor->dtype())); eltwise_y_in_desc.SetLoDLevel(bn_bias->Var()->GetLoDLevel()); @@ -674,7 +674,8 @@ void ConvEltwiseAddBNFusePass::ApplyImpl(ir::Graph* graph) const { // Create eltwise_y (conv bias) variable VarDesc eltwise_y_in_desc(patterns::PDNodeName( name_scope_, "eltwise_y_in" + std::to_string(found_conv_bn_count))); - eltwise_y_in_desc.SetShape(phi::vectorize(eltwise_y_in_tensor->dims())); + eltwise_y_in_desc.SetShape( + common::vectorize(eltwise_y_in_tensor->dims())); eltwise_y_in_desc.SetDataType( framework::TransToProtoVarType(eltwise_y_in_tensor->dtype())); eltwise_y_in_desc.SetLoDLevel(eltwise_y_in->Var()->GetLoDLevel()); diff --git a/paddle/fluid/framework/ir/fc_elementwise_layernorm_fuse_pass.cc b/paddle/fluid/framework/ir/fc_elementwise_layernorm_fuse_pass.cc index cd8312214af8d4..75ac438a97f856 100644 --- a/paddle/fluid/framework/ir/fc_elementwise_layernorm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/fc_elementwise_layernorm_fuse_pass.cc @@ -255,8 +255,8 @@ void FCElementwiseLayerNormFusePass::ApplyImpl(ir::Graph *graph) const { int begin_norm_axis = PADDLE_GET_CONST(int, layer_norm->Op()->GetAttr("begin_norm_axis")); auto layer_norm_x_dims = fc_out->Var()->GetShape(); - auto layer_norm_x_mat_dims = - phi::flatten_to_2d(phi::make_ddim(layer_norm_x_dims), begin_norm_axis); + auto layer_norm_x_mat_dims = common::flatten_to_2d( + common::make_ddim(layer_norm_x_dims), begin_norm_axis); if (fc_w->Var()->GetShape()[1] != layer_norm_x_mat_dims[1]) { return; } diff --git a/paddle/fluid/framework/ir/fused_multi_transformer_encoder_pass.cc b/paddle/fluid/framework/ir/fused_multi_transformer_encoder_pass.cc index 99bb9c59206853..9422980a429f21 100644 --- a/paddle/fluid/framework/ir/fused_multi_transformer_encoder_pass.cc +++ b/paddle/fluid/framework/ir/fused_multi_transformer_encoder_pass.cc @@ -1474,7 +1474,7 @@ inline void QKVWeightsProcess(phi::DenseTensor* wq_tensor, auto* wk_data = wk_tensor->data(); auto* wv_data = wv_tensor->data(); - auto combined_w_dims = phi::make_ddim({3, num_head, dim_head, dim_embed}); + auto combined_w_dims = common::make_ddim({3, num_head, dim_head, dim_embed}); phi::DenseTensor tmp_combined_w_tensor; tmp_combined_w_tensor.Resize(combined_w_dims); @@ -1516,7 +1516,7 @@ inline void QKVBiasProcess(phi::DenseTensor* bq_tensor, auto* bk_data = bk_tensor->data(); auto* bv_data = bv_tensor->data(); - auto combined_bias_dims = phi::make_ddim({3, num_head, dim_head}); + auto combined_bias_dims = common::make_ddim({3, num_head, dim_head}); phi::DenseTensor tmp_combined_bias_tensor; tmp_combined_bias_tensor.Resize(combined_bias_dims); @@ -1590,7 +1590,7 @@ inline void QKVWeightsProcessFuseQKV(phi::DenseTensor* qkv_w_tensor, auto* dev_ctx = static_cast( platform::DeviceContextPool::Instance().Get(platform::CPUPlace())); auto* qkv_w_data = qkv_w_tensor->data(); - auto transpose_w_dims = phi::make_ddim({3, num_head, dim_head, dim_embed}); + auto transpose_w_dims = common::make_ddim({3, num_head, dim_head, dim_embed}); phi::DenseTensor tmp_transpose_w_tensor; tmp_transpose_w_tensor.Resize(transpose_w_dims); @@ -1628,7 +1628,7 @@ inline void QKVBiasProcessFuseQKV(phi::DenseTensor* qkv_b_tensor, auto* dev_ctx = static_cast( platform::DeviceContextPool::Instance().Get(platform::CPUPlace())); auto* qkv_b_data = qkv_b_tensor->data(); - auto transpose_b_dims = phi::make_ddim({3, num_head, dim_head}); + auto transpose_b_dims = common::make_ddim({3, num_head, dim_head}); phi::DenseTensor tmp_transpose_b_tensor; tmp_transpose_b_tensor.Resize(transpose_b_dims); diff --git a/paddle/fluid/framework/ir/fusion_group/CMakeLists.txt b/paddle/fluid/framework/ir/fusion_group/CMakeLists.txt index 2357247b37d794..570b081aae95ed 100644 --- a/paddle/fluid/framework/ir/fusion_group/CMakeLists.txt +++ b/paddle/fluid/framework/ir/fusion_group/CMakeLists.txt @@ -6,13 +6,13 @@ if(WITH_GPU OR WITH_ROCM) cc_test( test_code_generator SRCS code_generator_tester.cc - DEPS code_generator phi lod_tensor graph_viz_pass) + DEPS code_generator phi common lod_tensor graph_viz_pass) endif() cc_library( fusion_group_pass SRCS fusion_group_pass.cc elementwise_group_detector.cc - DEPS subgraph_detector fuse_pass_base code_generator phi) + DEPS subgraph_detector fuse_pass_base code_generator phi common) cc_test( test_fusion_group_pass SRCS fusion_group_pass_tester.cc diff --git a/paddle/fluid/framework/ir/fusion_group/code_generator_tester.cc b/paddle/fluid/framework/ir/fusion_group/code_generator_tester.cc index dd929f5329bae5..9749fb2bfa81c5 100644 --- a/paddle/fluid/framework/ir/fusion_group/code_generator_tester.cc +++ b/paddle/fluid/framework/ir/fusion_group/code_generator_tester.cc @@ -271,8 +271,8 @@ void TestElementwiseMain( // Prepare CPU tensors which always hold float. std::vector cpu_tensors(ids.size()); - auto dims = - phi::make_ddim({static_cast(256), static_cast(1024)}); + auto dims = common::make_ddim( + {static_cast(256), static_cast(1024)}); for (auto& cpu_tensor : cpu_tensors) { cpu_tensor.mutable_data(dims, paddle::platform::CPUPlace()); } diff --git a/paddle/fluid/framework/ir/ipu/delete_scale_op_pass.cc b/paddle/fluid/framework/ir/ipu/delete_scale_op_pass.cc index 6300c0e32971db..391373578cc24b 100644 --- a/paddle/fluid/framework/ir/ipu/delete_scale_op_pass.cc +++ b/paddle/fluid/framework/ir/ipu/delete_scale_op_pass.cc @@ -14,13 +14,13 @@ #include "paddle/fluid/framework/ir/ipu/delete_scale_op_pass.h" +#include "paddle/common/ddim.h" #include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/ir/pass_tester_helper.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/variable_helper.h" #include "paddle/fluid/platform/device/ipu/popart_canonicalization/canonicalization_utils.h" -#include "paddle/phi/core/ddim.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/ipu/infer_shape_pass.cc b/paddle/fluid/framework/ir/ipu/infer_shape_pass.cc index 5c14849dd01c36..3967e82ba6e804 100644 --- a/paddle/fluid/framework/ir/ipu/infer_shape_pass.cc +++ b/paddle/fluid/framework/ir/ipu/infer_shape_pass.cc @@ -14,12 +14,12 @@ #include "paddle/fluid/framework/ir/ipu/infer_shape_pass.h" +#include "paddle/common/ddim.h" #include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/framework/ir/pass_tester_helper.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/variable_helper.h" #include "paddle/fluid/platform/device/ipu/ipu_backend.h" -#include "paddle/phi/core/ddim.h" namespace paddle { namespace framework { @@ -74,7 +74,7 @@ void InferShapePass::ApplyImpl(ir::Graph* graph) const { paddle::framework::InitializeVariable(ptr, var_desc->GetType()); auto tensor = ptr->GetMutable(); - tensor->Resize(phi::make_ddim(var_desc->GetShape())); + tensor->Resize(common::make_ddim(var_desc->GetShape())); } // infer shape @@ -94,7 +94,7 @@ void InferShapePass::ApplyImpl(ir::Graph* graph) const { for (int i = 0; i < it->second.size(); i++) { auto output_name = op_desc->Output(it->first)[i]; auto dim = it->second[i]->GetMutable()->dims(); - auto new_shape = phi::vectorize(dim); + auto new_shape = common::vectorize(dim); for (auto output_node : node->outputs) { if (output_node->Name() == output_name) { output_node->Var()->SetShape(new_shape); diff --git a/paddle/fluid/framework/ir/layer_norm_fuse_pass.cc b/paddle/fluid/framework/ir/layer_norm_fuse_pass.cc index fc894c07e0966d..56323c16051367 100644 --- a/paddle/fluid/framework/ir/layer_norm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/layer_norm_fuse_pass.cc @@ -337,7 +337,7 @@ void LayerNormFusePass::ApplyImpl(Graph* graph) const { // gamma/beta must be a 1-dimensional tensor of size on layer_norm auto layer_norm_x_mat_dims = - phi::flatten_to_2d(phi::make_ddim(x_shape), begin_norm_axis); + common::flatten_to_2d(common::make_ddim(x_shape), begin_norm_axis); auto* gamma_tensor = scope->FindVar(gamma->Name())->GetMutable(); VarDesc new_gamma_desc(patterns::PDNodeName("layer_norm_fuse", "Scale")); @@ -349,7 +349,7 @@ void LayerNormFusePass::ApplyImpl(Graph* graph) const { auto* new_gamma_node = g->CreateVarNode(&new_gamma_desc); auto* new_gamma_tensor = scope->Var(new_gamma_node->Name())->GetMutable(); - new_gamma_tensor->Resize(phi::make_ddim({layer_norm_x_mat_dims[1]})); + new_gamma_tensor->Resize(common::make_ddim({layer_norm_x_mat_dims[1]})); memcpy(new_gamma_tensor->mutable_data(platform::CPUPlace()), gamma_tensor->mutable_data(platform::CPUPlace()), layer_norm_x_mat_dims[1] * sizeof(float)); @@ -366,7 +366,7 @@ void LayerNormFusePass::ApplyImpl(Graph* graph) const { auto* new_beta_tensor = scope->Var(new_beta_node->Name())->GetMutable(); - new_beta_tensor->Resize(phi::make_ddim({layer_norm_x_mat_dims[1]})); + new_beta_tensor->Resize(common::make_ddim({layer_norm_x_mat_dims[1]})); memcpy(new_beta_tensor->mutable_data(platform::CPUPlace()), beta_tensor->mutable_data(platform::CPUPlace()), layer_norm_x_mat_dims[1] * sizeof(float)); diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt b/paddle/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt index 96594cbd022dc5..d0618616619037 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt +++ b/paddle/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt @@ -43,7 +43,7 @@ if(WITH_CINN) cc_library( share_varinfo_into_cinn_pass SRCS share_varinfo_into_cinn_pass.cc - DEPS pass enforce graph_helper computation_op_handle + DEPS pass enforce common graph_helper computation_op_handle eager_deletion_op_handle) cc_test( share_varinfo_into_cinn_pass_test @@ -81,4 +81,5 @@ cc_library( cc_test( test_reference_count_pass_last_lived_ops SRCS test_reference_count_pass_last_lived_ops.cc - DEPS parallel_executor elementwise_mul_op elementwise_add_op generated_op phi) + DEPS parallel_executor elementwise_mul_op elementwise_add_op generated_op phi + common) diff --git a/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.cc b/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.cc index 1738259d60f004..d63c52060651ef 100644 --- a/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.cc @@ -121,7 +121,7 @@ void ComputePropagateScalesMkldnnPass::ComputeVarScales( phi::DenseTensor tmp_tensor; std::vector reshape_dims = {dims[0], volume}; - tmp_tensor.Resize(phi::make_ddim(reshape_dims)); + tmp_tensor.Resize(common::make_ddim(reshape_dims)); auto* weight_data = weight_tensor->data(); auto* tmp_data = tmp_tensor.mutable_data(phi::CPUPlace()); for (int i = 0; i < weight_tensor->numel(); i++) { diff --git a/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass_tester.cc index e4a37ad2c7a7ff..7a811aae50e236 100644 --- a/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass_tester.cc @@ -146,7 +146,7 @@ class ComputePropagateScalesMkldnnPassTest : public testing::Test { auto* wx_var = scope.FindVar(wx_var_names); auto* wx_tensor = wx_var->GetMutable(); - wx_tensor->Resize(phi::make_dim(wx.size(), wx[0].size())); + wx_tensor->Resize(common::make_dim(wx.size(), wx[0].size())); for (size_t i = 0; i < wx.size(); i++) std::copy( begin(wx[i]), @@ -155,7 +155,7 @@ class ComputePropagateScalesMkldnnPassTest : public testing::Test { auto* wh_var = scope.FindVar(wh_var_names); auto* wh_tensor = wh_var->GetMutable(); - wh_tensor->Resize(phi::make_dim(wh.size(), wh[0].size())); + wh_tensor->Resize(common::make_dim(wh.size(), wh[0].size())); for (size_t i = 0; i < wh.size(); i++) std::copy( begin(wh[i]), @@ -280,7 +280,7 @@ TEST_F(ComputePropagateScalesMkldnnPassTest, get_scales_function) { float max_val = *std::max_element(values.begin(), values.end()); phi::DenseTensor var_tensor; - var_tensor.Resize(phi::make_dim(values.size(), 1)); + var_tensor.Resize(common::make_dim(values.size(), 1)); std::copy(begin(values), end(values), var_tensor.mutable_data(phi::CPUPlace())); @@ -307,7 +307,7 @@ TEST_F(ComputePropagateScalesMkldnnPassTest, compute_var_scales) { auto* var = scope.FindVar(weight_var_name); auto* weight_tensor = var->GetMutable(); - weight_tensor->Resize(phi::make_dim(1, values.size())); + weight_tensor->Resize(common::make_dim(1, values.size())); std::copy(begin(values), end(values), weight_tensor->mutable_data(phi::CPUPlace())); diff --git a/paddle/fluid/framework/ir/mkldnn/conv_affine_channel_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/conv_affine_channel_mkldnn_fuse_pass.cc index 9639d3f374bef4..eedb5b3b60bd5e 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_affine_channel_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/conv_affine_channel_mkldnn_fuse_pass.cc @@ -90,7 +90,7 @@ void recompute_bias_and_weights(const Scope* scope, auto* weights = scope->FindVar(conv_weight->Name())->GetMutable(); auto weights_shape = weights->dims(); - auto weights_shape_2d = phi::flatten_to_2d(weights_shape, 1); + auto weights_shape_2d = common::flatten_to_2d(weights_shape, 1); auto* weights_data = weights->mutable_data(phi::CPUPlace()); EigenMatrixArrayMap weights_array_2d( @@ -266,7 +266,7 @@ void ConvAffineChannelFusePass::FuseConvAffineChannel( VarDesc eltwise_y_in_desc( patterns::PDNodeName(name_scope_, "eltwise_y_in")); // Set shape && datatype manually - eltwise_y_in_desc.SetShape(phi::vectorize(ac_bias_tensor->dims())); + eltwise_y_in_desc.SetShape(common::vectorize(ac_bias_tensor->dims())); eltwise_y_in_desc.SetDataType( framework::TransToProtoVarType(ac_bias_tensor->dtype())); eltwise_y_in_desc.SetLoDLevel(ac_bias->Var()->GetLoDLevel()); diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc index 8d8504708f0373..2f1e7e8a53865c 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc @@ -1178,7 +1178,7 @@ void CPUQuantizePass::QuantizeMultiGru(Graph* graph) const { VarDesc scale_var_desc(patterns::PDNodeName("multi_gru", "w_scale")); - scale_var_desc.SetShape(phi::vectorize(scale_tensor_src.dims())); + scale_var_desc.SetShape(common::vectorize(scale_tensor_src.dims())); scale_var_desc.SetDataType(proto::VarType::FP32); scale_var_desc.SetLoDLevel(scale_tensor_src.lod().size()); scale_var_desc.SetPersistable(true); diff --git a/paddle/fluid/framework/ir/mkldnn/multi_gru_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/multi_gru_fuse_pass.cc index 08aafa4a60a0e7..a1f74d3423006b 100644 --- a/paddle/fluid/framework/ir/mkldnn/multi_gru_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/multi_gru_fuse_pass.cc @@ -16,10 +16,10 @@ #include +#include "paddle/common/errors.h" #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/phi/core/errors.h" #include "paddle/utils/string/pretty_log.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/mkldnn/multi_gru_seq_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/multi_gru_seq_fuse_pass.cc index 697a34904c817e..f9e8722ccf3978 100644 --- a/paddle/fluid/framework/ir/mkldnn/multi_gru_seq_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/multi_gru_seq_fuse_pass.cc @@ -19,11 +19,11 @@ #include #include +#include "paddle/common/errors.h" #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/platform/mkldnn_helper.h" -#include "paddle/phi/core/errors.h" #include "paddle/utils/string/pretty_log.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass_tester.cc index 58e2a74ce1d405..72b07fc8934de5 100755 --- a/paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/params_quantization_mkldnn_pass_tester.cc @@ -47,7 +47,7 @@ struct TestScope { void CreateTensor(const std::string& var_name, const Data& data) { auto variable = scope.Var(var_name); auto tensor = variable->GetMutable(); - tensor->Resize(phi::make_ddim(data.getShape())); + tensor->Resize(common::make_ddim(data.getShape())); auto dptr = tensor->mutable_data(place); std::copy(data.getData().begin(), data.getData().end(), dptr); } diff --git a/paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.cc b/paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.cc index 0087886c1c8d7b..e5cd2a9007b381 100644 --- a/paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.cc @@ -420,10 +420,10 @@ void QuantDequantMkldnnPass::TransposeWeight(phi::DenseTensor* input) const { out_dim_v.push_back(in_dims[i]); } - const auto out_dims = phi::make_ddim(out_dim_v); + const auto out_dims = common::make_ddim(out_dim_v); const int rank = axis.size(); - auto in_stride = phi::stride(in_dims); - auto out_stride = phi::stride(out_dims); + auto in_stride = common::stride(in_dims); + auto out_stride = common::stride(out_dims); const int count = input->numel(); phi::DenseTensor trans_tensor; @@ -490,7 +490,7 @@ void QuantDequantMkldnnPass::ConvertFromINT8ToFP32( } weight_tensor->clear(); // clear int weight - weight_tensor->Resize(phi::make_ddim(phi::vectorize(weight_dims))); + weight_tensor->Resize(common::make_ddim(common::vectorize(weight_dims))); auto* new_weight_data = weight_tensor->mutable_data(phi::CPUPlace()); memcpy(new_weight_data, weight_data.data(), @@ -532,7 +532,7 @@ void QuantDequantMkldnnPass::ConvertFromINT8ToFP32( } } weight_tensor->clear(); // clear int weight - weight_tensor->Resize(phi::make_ddim(phi::vectorize(weight_dims))); + weight_tensor->Resize(common::make_ddim(common::vectorize(weight_dims))); auto* new_weight_data = weight_tensor->mutable_data(phi::CPUPlace()); memcpy(new_weight_data, weight_data.data(), diff --git a/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.cc b/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.cc index 85f62c4a293fce..295ef57cfdfead 100644 --- a/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.cc +++ b/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.cc @@ -868,8 +868,8 @@ size_t BalanceVarSSAGraphBuilder::GetAppropriateDeviceID( PADDLE_ENFORCE_NOT_NULL(var_desc, platform::errors::NotFound( "Can not find Var(%s) in Var Desc.", var_name)); - auto dim = phi::make_ddim(var_desc->GetShape()); - int64_t numel = phi::product(dim); + auto dim = common::make_ddim(var_desc->GetShape()); + int64_t numel = common::product(dim); PADDLE_ENFORCE_GT(numel, 0, platform::errors::InvalidArgument( diff --git a/paddle/fluid/framework/ir/multihead_matmul_fuse_pass.cc b/paddle/fluid/framework/ir/multihead_matmul_fuse_pass.cc index 0fd3a71754f6d9..22802dbddd8efe 100644 --- a/paddle/fluid/framework/ir/multihead_matmul_fuse_pass.cc +++ b/paddle/fluid/framework/ir/multihead_matmul_fuse_pass.cc @@ -653,8 +653,8 @@ inline void QKVWeightsProcess(phi::DenseTensor* wq_tensor, auto* bv_data = bv_tensor->mutable_data(platform::CPUPlace()); auto combined_w_dims = - phi::make_ddim({wq_tensor->dims()[0], 3, wq_tensor->dims()[1]}); - auto combined_bias_dims = phi::make_ddim({3, bq_tensor->dims()[0]}); + common::make_ddim({wq_tensor->dims()[0], 3, wq_tensor->dims()[1]}); + auto combined_bias_dims = common::make_ddim({3, bq_tensor->dims()[0]}); phi::DenseTensor tmp_combined_w_tensor; tmp_combined_w_tensor.Resize(combined_w_dims); @@ -1362,8 +1362,8 @@ int MultiHeadMatmulV3FusePass::BuildFusionV3(Graph* graph, auto* bv_data = bv_tensor->mutable_data(platform::CPUPlace()); auto combined_w_dims = - phi::make_ddim({wq_tensor->dims()[0], 3, wq_tensor->dims()[1]}); - auto combined_bias_dims = phi::make_ddim({3, bq_tensor->dims()[0]}); + common::make_ddim({wq_tensor->dims()[0], 3, wq_tensor->dims()[1]}); + auto combined_bias_dims = common::make_ddim({3, bq_tensor->dims()[0]}); // reuse the mul0_w and eltadd_0_b nodes for the combined nodes. auto* combined_w_desc = mul0_w->Var(); diff --git a/paddle/fluid/framework/ir/multihead_matmul_roformer_fuse_pass.cc b/paddle/fluid/framework/ir/multihead_matmul_roformer_fuse_pass.cc index be5fad23fd6e2d..1f91b6955aadfe 100644 --- a/paddle/fluid/framework/ir/multihead_matmul_roformer_fuse_pass.cc +++ b/paddle/fluid/framework/ir/multihead_matmul_roformer_fuse_pass.cc @@ -449,8 +449,8 @@ int MultiHeadMatmulRoformerFusePass::BuildFusion(Graph* graph, auto* bv_data = bv_tensor->mutable_data(platform::CPUPlace()); auto combined_w_dims = - phi::make_ddim({wq_tensor->dims()[0], 3, wq_tensor->dims()[1]}); - auto combined_bias_dims = phi::make_ddim({3, bq_tensor->dims()[0]}); + common::make_ddim({wq_tensor->dims()[0], 3, wq_tensor->dims()[1]}); + auto combined_bias_dims = common::make_ddim({3, bq_tensor->dims()[0]}); // reuse the mul0_w and eltadd_0_b nodes for the combined nodes. auto* combined_w_desc = mul0_w->Var(); diff --git a/paddle/fluid/framework/ir/pass.h b/paddle/fluid/framework/ir/pass.h index 473890a4b786ba..753fadd242ebc2 100644 --- a/paddle/fluid/framework/ir/pass.h +++ b/paddle/fluid/framework/ir/pass.h @@ -22,10 +22,10 @@ limitations under the License. */ #include #include +#include "paddle/common/macros.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/node.h" #include "paddle/fluid/framework/program_desc.h" -#include "paddle/phi/core/macros.h" #include "paddle/utils/any.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/pass_test_util.cc b/paddle/fluid/framework/ir/pass_test_util.cc index ea3532135fafbb..ee75794d7ccc42 100644 --- a/paddle/fluid/framework/ir/pass_test_util.cc +++ b/paddle/fluid/framework/ir/pass_test_util.cc @@ -187,7 +187,8 @@ void InitLoDTensorHolder(const Scope& scope, const T* data) { auto var = scope.FindLocalVar(var_name); auto tensor = var->GetMutable(); - auto* tensor_mem_ptr = tensor->mutable_data(phi::make_ddim(dims), place); + auto* tensor_mem_ptr = + tensor->mutable_data(common::make_ddim(dims), place); if (data != nullptr) { std::memcpy(tensor_mem_ptr, data, tensor->memory_size()); } else { diff --git a/paddle/fluid/framework/ir/pass_test_util.h b/paddle/fluid/framework/ir/pass_test_util.h index 44f6c66295466b..8032205fbe81db 100644 --- a/paddle/fluid/framework/ir/pass_test_util.h +++ b/paddle/fluid/framework/ir/pass_test_util.h @@ -18,13 +18,13 @@ #include #include +#include "paddle/common/ddim.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/platform/place.h" -#include "paddle/phi/core/ddim.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/split_layernorm_to_math_ops_pass.cc b/paddle/fluid/framework/ir/split_layernorm_to_math_ops_pass.cc index f28b768513f9f5..35e1fe74948f39 100644 --- a/paddle/fluid/framework/ir/split_layernorm_to_math_ops_pass.cc +++ b/paddle/fluid/framework/ir/split_layernorm_to_math_ops_pass.cc @@ -268,7 +268,7 @@ void SplitLayerNormPass::ApplyImpl(Graph* graph) const { pow_y->SetPersistable(true); auto* pow_y_node = graph->CreateVarNode(pow_y); auto* pow_y_tensor = scope->Var(pow_y_name)->GetMutable(); - pow_y_tensor->Resize(phi::make_ddim({1})); + pow_y_tensor->Resize(common::make_ddim({1})); dev_ctx->Alloc(pow_y_tensor); (pow_y_tensor->data())[0] = 2.0f; @@ -312,7 +312,7 @@ void SplitLayerNormPass::ApplyImpl(Graph* graph) const { add_y->SetPersistable(true); auto* add_y_node = graph->CreateVarNode(add_y); auto* add_y_tensor = scope->Var(add_y_name)->GetMutable(); - add_y_tensor->Resize(phi::make_ddim({1})); + add_y_tensor->Resize(common::make_ddim({1})); dev_ctx->Alloc(add_y_tensor); (add_y_tensor->data())[0] = eps; @@ -364,7 +364,7 @@ void SplitLayerNormPass::ApplyImpl(Graph* graph) const { scope->Var(new_scale_name)->GetMutable(); auto* scale_tensor = scope->Var(layer_norm_scale->Name())->GetMutable(); - new_scale_tensor->Resize(phi::make_ddim(shape_int64)); + new_scale_tensor->Resize(common::make_ddim(shape_int64)); dev_ctx->Alloc(new_scale_tensor); memcpy(new_scale_tensor->data(), scale_tensor->data(), @@ -393,7 +393,7 @@ void SplitLayerNormPass::ApplyImpl(Graph* graph) const { scope->Var(new_bias_name)->GetMutable(); auto* bias_tensor = scope->Var(layer_norm_bias->Name())->GetMutable(); - new_bias_tensor->Resize(phi::make_ddim(shape_int64)); + new_bias_tensor->Resize(common::make_ddim(shape_int64)); dev_ctx->Alloc(new_bias_tensor); memcpy(new_bias_tensor->data(), bias_tensor->data(), diff --git a/paddle/fluid/framework/ir/trt_cross_multihead_matmul_fuse_pass.cc b/paddle/fluid/framework/ir/trt_cross_multihead_matmul_fuse_pass.cc index 3f94c97baa6d8e..42a64e1a54007f 100644 --- a/paddle/fluid/framework/ir/trt_cross_multihead_matmul_fuse_pass.cc +++ b/paddle/fluid/framework/ir/trt_cross_multihead_matmul_fuse_pass.cc @@ -327,7 +327,7 @@ int TrtCrossMultiHeadMatmulFusePass::BuildCrossFusion( auto* wv_data = wv_tensor->data(); // combined_w_dims = [in,2,out] auto combined_w_kv_dims = - phi::make_ddim({wk_tensor->dims()[0], 2, wk_tensor->dims()[1]}); + common::make_ddim({wk_tensor->dims()[0], 2, wk_tensor->dims()[1]}); VLOG(5) << "trt cross attention trt wk_dim in:" << wk_tensor->dims()[0] << "trt cross attention trt wk_dim out:" << wk_tensor->dims()[1]; auto* combined_w_kv_desc = mul1_w->Var(); diff --git a/paddle/fluid/framework/ir/trt_delete_weight_dequant_linear_op_pass.cc b/paddle/fluid/framework/ir/trt_delete_weight_dequant_linear_op_pass.cc index 58ee274e1c62c6..deee6c9bb855bf 100644 --- a/paddle/fluid/framework/ir/trt_delete_weight_dequant_linear_op_pass.cc +++ b/paddle/fluid/framework/ir/trt_delete_weight_dequant_linear_op_pass.cc @@ -334,7 +334,7 @@ void TrtDeleteWeightQuantDequantLinearOpPass::ApplyImpl( "OP'attribute ")); } weight_tensor->clear(); // clear int weight - weight_tensor->Resize(phi::make_ddim(phi::vectorize(w_dims))); + weight_tensor->Resize(common::make_ddim(common::vectorize(w_dims))); float* new_quantized_weight_data = dev_ctx->HostAlloc( weight_tensor, weight_tensor->numel() * sizeof(float)); memcpy(new_quantized_weight_data, diff --git a/paddle/fluid/framework/ir/trt_flash_multihead_matmul_fuse_pass.cc b/paddle/fluid/framework/ir/trt_flash_multihead_matmul_fuse_pass.cc index dd0726dba572ba..200d2e8ad0d384 100644 --- a/paddle/fluid/framework/ir/trt_flash_multihead_matmul_fuse_pass.cc +++ b/paddle/fluid/framework/ir/trt_flash_multihead_matmul_fuse_pass.cc @@ -336,7 +336,7 @@ int TrtFlashMultiHeadMatmulFusePass::BuildFlashFusion( // auto dims = wq_tensor->dims(); // combined_w_dims = [in,3,out] auto combined_w_dims = - phi::make_ddim({wq_tensor->dims()[0], 3, wq_tensor->dims()[1]}); + common::make_ddim({wq_tensor->dims()[0], 3, wq_tensor->dims()[1]}); auto* combined_w_desc = mul0_w->Var(); combined_w_desc->SetShape( {wq_tensor->dims()[0], 3, wq_tensor->dims()[1]}); diff --git a/paddle/fluid/framework/ir/trt_multihead_matmul_fuse_pass.cc b/paddle/fluid/framework/ir/trt_multihead_matmul_fuse_pass.cc index db62c5dd3789cc..0bee108064d083 100644 --- a/paddle/fluid/framework/ir/trt_multihead_matmul_fuse_pass.cc +++ b/paddle/fluid/framework/ir/trt_multihead_matmul_fuse_pass.cc @@ -791,8 +791,8 @@ int TrtMultiHeadMatmulV2FusePass::BuildFusionV2(Graph* graph, auto* bv_data = bv_tensor->mutable_data(platform::CPUPlace()); auto combined_w_dims = - phi::make_ddim({wq_tensor->dims()[0], 3, wq_tensor->dims()[1]}); - auto combined_bias_dims = phi::make_ddim({3, bq_tensor->dims()[0]}); + common::make_ddim({wq_tensor->dims()[0], 3, wq_tensor->dims()[1]}); + auto combined_bias_dims = common::make_ddim({3, bq_tensor->dims()[0]}); // reuse the mul0_w and eltadd_0_b nodes for the combined nodes. auto* combined_w_desc = mul0_w->Var(); @@ -1257,8 +1257,8 @@ int TrtMultiHeadMatmulV3FusePass::BuildFusionV3(Graph* graph, auto* bv_data = bv_tensor->mutable_data(platform::CPUPlace()); auto combined_w_dims = - phi::make_ddim({wq_tensor->dims()[0], 3, wq_tensor->dims()[1]}); - auto combined_bias_dims = phi::make_ddim({3, bq_tensor->dims()[0]}); + common::make_ddim({wq_tensor->dims()[0], 3, wq_tensor->dims()[1]}); + auto combined_bias_dims = common::make_ddim({3, bq_tensor->dims()[0]}); // reuse the mul0_w and eltadd_0_b nodes for the combined nodes. auto* combined_w_desc = mul0_w->Var(); diff --git a/paddle/fluid/framework/ir/trt_qk_multihead_matmul_fuse_pass.cc b/paddle/fluid/framework/ir/trt_qk_multihead_matmul_fuse_pass.cc index df1476e9db3454..f9804070226ecf 100644 --- a/paddle/fluid/framework/ir/trt_qk_multihead_matmul_fuse_pass.cc +++ b/paddle/fluid/framework/ir/trt_qk_multihead_matmul_fuse_pass.cc @@ -324,8 +324,8 @@ int TrtQkMultiHeadMatmulFusePass::BuildQkFusion(Graph* graph, // combined_w_dims = [in,2,out] auto combined_w_qk_dims = - phi::make_ddim({wq_tensor->dims()[0], 2, wq_tensor->dims()[1]}); - auto combined_bias_dims = phi::make_ddim({2, bq_tensor->dims()[0]}); + common::make_ddim({wq_tensor->dims()[0], 2, wq_tensor->dims()[1]}); + auto combined_bias_dims = common::make_ddim({2, bq_tensor->dims()[0]}); VLOG(3) << "trt qk attention trt wq_dim in:" << wq_tensor->dims()[0] << "trt qk attention trt wk_dim out:" << wq_tensor->dims()[1]; diff --git a/paddle/fluid/framework/ir/trt_remove_amp_strategy_op_pass.cc b/paddle/fluid/framework/ir/trt_remove_amp_strategy_op_pass.cc index 2b3a702dcd5024..22115ea28d9f69 100644 --- a/paddle/fluid/framework/ir/trt_remove_amp_strategy_op_pass.cc +++ b/paddle/fluid/framework/ir/trt_remove_amp_strategy_op_pass.cc @@ -18,10 +18,10 @@ #include #include +#include "paddle/common/errors.h" #include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/framework/ir/node.h" #include "paddle/phi/common/data_type.h" -#include "paddle/phi/core/errors.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/trt_support_nhwc_pass.cc b/paddle/fluid/framework/ir/trt_support_nhwc_pass.cc index 5a086acd7cac2e..d9907555a17b57 100644 --- a/paddle/fluid/framework/ir/trt_support_nhwc_pass.cc +++ b/paddle/fluid/framework/ir/trt_support_nhwc_pass.cc @@ -18,13 +18,13 @@ #include #include +#include "paddle/common/errors.h" +#include "paddle/common/layout.h" #include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/data_layout_transform.h" #include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/framework/ir/node.h" #include "paddle/phi/common/data_type.h" -#include "paddle/phi/common/layout.h" -#include "paddle/phi/core/errors.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/vit_attention_fuse_pass.cc b/paddle/fluid/framework/ir/vit_attention_fuse_pass.cc index 23db75266310c6..382e1c60ee9895 100644 --- a/paddle/fluid/framework/ir/vit_attention_fuse_pass.cc +++ b/paddle/fluid/framework/ir/vit_attention_fuse_pass.cc @@ -103,12 +103,12 @@ void VitAttentionFusePass::ApplyImpl(ir::Graph* graph) const { auto* w_tensor = scope->FindVar(matmul0_in_y->Name())->GetMutable(); auto w_dims = - phi::make_ddim({w_tensor->dims()[0], 3, w_tensor->dims()[1] / 3}); + common::make_ddim({w_tensor->dims()[0], 3, w_tensor->dims()[1] / 3}); w_tensor->Resize(w_dims); auto* b_tensor = scope->FindVar(elementwise0_in_y->Name()) ->GetMutable(); - auto bias_dims = phi::make_ddim({3, b_tensor->dims()[0] / 3}); + auto bias_dims = common::make_ddim({3, b_tensor->dims()[0] / 3}); b_tensor->Resize(bias_dims); desc.SetInput("W", {matmul0_in_y->Name()}); diff --git a/paddle/fluid/framework/ir/xpu/conv1d_xpu_fuse_pass.cc b/paddle/fluid/framework/ir/xpu/conv1d_xpu_fuse_pass.cc index 6667cff8db8b14..c845ea657f48b1 100644 --- a/paddle/fluid/framework/ir/xpu/conv1d_xpu_fuse_pass.cc +++ b/paddle/fluid/framework/ir/xpu/conv1d_xpu_fuse_pass.cc @@ -473,7 +473,7 @@ int Conv1dXPUFusePass::ApplyImpl(ir::Graph* graph, scope->GetVar(conv_filter->Name())->GetMutable(); auto filter_dims = filter_t->dims(); auto original_f_dims = - phi::make_ddim({filter_dims[0], filter_dims[1], filter_dims[3]}); + common::make_ddim({filter_dims[0], filter_dims[1], filter_dims[3]}); filter_t->Resize(original_f_dims); filter_dims = original_f_dims; // conv_filter fp16 --> fp32 diff --git a/paddle/fluid/framework/ir/xpu/conv2d_bias_fuse_pass.cc b/paddle/fluid/framework/ir/xpu/conv2d_bias_fuse_pass.cc index 7f53507a85c83e..4ce1b239b9f891 100644 --- a/paddle/fluid/framework/ir/xpu/conv2d_bias_fuse_pass.cc +++ b/paddle/fluid/framework/ir/xpu/conv2d_bias_fuse_pass.cc @@ -224,7 +224,7 @@ void Conv2dBiasFusePass::FoldConv2dBias(ir::Graph* graph) const { ew_bias_add_y_desc->SetShape({y_shape[1]}); auto* ew_bias_add_y_tensor = scope->GetVar(ew_bias_add_y->Name())->GetMutable(); - ew_bias_add_y_tensor->Resize(phi::make_ddim({y_shape[1]})); + ew_bias_add_y_tensor->Resize(common::make_ddim({y_shape[1]})); ew_bias_add_desc->Flush(); found_subgraph_count++; diff --git a/paddle/fluid/framework/ir/xpu/conv2d_xpu_fuse_pass.cc b/paddle/fluid/framework/ir/xpu/conv2d_xpu_fuse_pass.cc index bb7fb9f7a7535d..f353a3fc952b39 100644 --- a/paddle/fluid/framework/ir/xpu/conv2d_xpu_fuse_pass.cc +++ b/paddle/fluid/framework/ir/xpu/conv2d_xpu_fuse_pass.cc @@ -501,7 +501,8 @@ void Conv2dXPUFusePass::CreateTheReplicatedWeights( VarDesc replicated_filter_desc(replicated_filter_name); replicated_filter_desc.SetPersistable(true); - replicated_filter_desc.SetShape(vectorize(replicated_filter_tensor.dims())); + replicated_filter_desc.SetShape( + common::vectorize(replicated_filter_tensor.dims())); replicated_filter_desc.SetDataType( framework::TransToProtoVarType(replicated_filter_tensor.dtype())); graph->CreateVarNode(&replicated_filter_desc); diff --git a/paddle/fluid/framework/ir/xpu/fc_xpu_fuse_pass.cc b/paddle/fluid/framework/ir/xpu/fc_xpu_fuse_pass.cc index d2d9e57084ef46..00788289402b51 100644 --- a/paddle/fluid/framework/ir/xpu/fc_xpu_fuse_pass.cc +++ b/paddle/fluid/framework/ir/xpu/fc_xpu_fuse_pass.cc @@ -311,7 +311,8 @@ void FcXPUFusePass::CreateTheReplicatedWeights( VarDesc replicated_filter_desc(replicated_w_name); replicated_filter_desc.SetPersistable(true); - replicated_filter_desc.SetShape(vectorize(replicated_filter_tensor.dims())); + replicated_filter_desc.SetShape( + common::vectorize(replicated_filter_tensor.dims())); replicated_filter_desc.SetDataType( framework::TransToProtoVarType(replicated_filter_tensor.dtype())); graph->CreateVarNode(&replicated_filter_desc); diff --git a/paddle/fluid/framework/ir/xpu/fused_multi_transformer_xpu_pass.cc b/paddle/fluid/framework/ir/xpu/fused_multi_transformer_xpu_pass.cc index ce3bd1754edb4a..5ee61cf88c73da 100644 --- a/paddle/fluid/framework/ir/xpu/fused_multi_transformer_xpu_pass.cc +++ b/paddle/fluid/framework/ir/xpu/fused_multi_transformer_xpu_pass.cc @@ -566,7 +566,7 @@ int FusedMultiTransformerXPUPass::FusedMultiTransformerXPUQuant( // Update dst var_desc in block VarDesc dst_desc(max_buffer_name); dst_desc.SetPersistable(true); - dst_desc.SetShape(vectorize(max_buffer_tensor.dims())); + dst_desc.SetShape(common::vectorize(max_buffer_tensor.dims())); dst_desc.SetDataType( framework::TransToProtoVarType(max_buffer_tensor.dtype())); max_buffer_node = graph->CreateVarNode(&dst_desc); diff --git a/paddle/fluid/framework/ir/xpu/multi_encoder_xpu_fuse_pass.cc b/paddle/fluid/framework/ir/xpu/multi_encoder_xpu_fuse_pass.cc index 905e15ecba265c..0ee61208a1cc3f 100644 --- a/paddle/fluid/framework/ir/xpu/multi_encoder_xpu_fuse_pass.cc +++ b/paddle/fluid/framework/ir/xpu/multi_encoder_xpu_fuse_pass.cc @@ -574,7 +574,7 @@ void MultiEncoderXPUFusePass::PrepareQKVWeight(Graph* graph, // Update qkv_w_int16 var_desc in block VarDesc qkv_w_int16_desc(qkv_w_int16_name); qkv_w_int16_desc.SetPersistable(true); - qkv_w_int16_desc.SetShape(vectorize(qkv_w_int16_t.dims())); + qkv_w_int16_desc.SetShape(common::vectorize(qkv_w_int16_t.dims())); qkv_w_int16_desc.SetDataType( framework::TransToProtoVarType(qkv_w_int16_t.dtype())); *qkv_w_int16 = graph->CreateVarNode(&qkv_w_int16_desc); @@ -586,7 +586,7 @@ void MultiEncoderXPUFusePass::PrepareQKVWeight(Graph* graph, // Update qkv_w_max var_desc in block VarDesc qkv_w_max_desc(qkv_w_max_name); qkv_w_max_desc.SetPersistable(true); - qkv_w_max_desc.SetShape(vectorize(qkv_w_max_t.dims())); + qkv_w_max_desc.SetShape(common::vectorize(qkv_w_max_t.dims())); qkv_w_max_desc.SetDataType(proto::VarType::Type::VarType_Type_FP32); *qkv_w_max = graph->CreateVarNode(&qkv_w_max_desc); auto* block_qkv_w_max_desc = block->Var(qkv_w_max_name); @@ -671,7 +671,7 @@ void MultiEncoderXPUFusePass::PrepareQKVBias(Graph* graph, // Update qkv_bias var_desc in block VarDesc qkv_bias_desc(qkv_bias_name); qkv_bias_desc.SetPersistable(true); - qkv_bias_desc.SetShape(vectorize(qkv_bias_tensor.dims())); + qkv_bias_desc.SetShape(common::vectorize(qkv_bias_tensor.dims())); qkv_bias_desc.SetDataType( framework::TransToProtoVarType(qkv_bias_tensor.dtype())); *qkv_bias = graph->CreateVarNode(&qkv_bias_desc); diff --git a/paddle/fluid/framework/ir/xpu/pass_utils.cc b/paddle/fluid/framework/ir/xpu/pass_utils.cc index 49b7fe7c7ba56d..dbc899c93120ac 100644 --- a/paddle/fluid/framework/ir/xpu/pass_utils.cc +++ b/paddle/fluid/framework/ir/xpu/pass_utils.cc @@ -105,7 +105,7 @@ size_t HashTensor(const phi::DenseTensor& in) { auto in_dims = in.dims(); HashCombine(&ret, phi::DataTypeToString(in.dtype()), - phi::DataLayoutToString(in.layout()), + common::DataLayoutToString(in.layout()), in_dims.size()); for (int i = 0; i < in_dims.size(); i++) { HashCombine(&ret, in_dims[i]); @@ -166,7 +166,7 @@ void PrepareWeight(Graph* graph, // Update dst_weight var_desc in block VarDesc dst_weight_desc(dst_weight_name); dst_weight_desc.SetPersistable(true); - dst_weight_desc.SetShape(vectorize(dst_weight_tensor.dims())); + dst_weight_desc.SetShape(common::vectorize(dst_weight_tensor.dims())); dst_weight_desc.SetDataType( framework::TransToProtoVarType(dst_weight_tensor.dtype())); *dst_weight = graph->CreateVarNode(&dst_weight_desc); @@ -178,7 +178,8 @@ void PrepareWeight(Graph* graph, // Update dst_weight_max var_desc in block VarDesc dst_weight_max_desc(dst_weight_max_name); dst_weight_max_desc.SetPersistable(true); - dst_weight_max_desc.SetShape(vectorize(dst_weight_max_tensor.dims())); + dst_weight_max_desc.SetShape( + common::vectorize(dst_weight_max_tensor.dims())); dst_weight_max_desc.SetDataType(proto::VarType::Type::VarType_Type_FP32); *dst_weight_max = graph->CreateVarNode(&dst_weight_max_desc); auto* block_dst_weight_max_desc = block->Var(dst_weight_max_name); @@ -226,7 +227,8 @@ void PrepareWeight(Graph* graph, // Update dst_scale_max var_desc in block VarDesc dst_scale_max_desc(dst_scale_max_name); dst_scale_max_desc.SetPersistable(true); - dst_scale_max_desc.SetShape(vectorize(dst_weight_max_tensor.dims())); + dst_scale_max_desc.SetShape( + common::vectorize(dst_weight_max_tensor.dims())); dst_scale_max_desc.SetDataType(proto::VarType::Type::VarType_Type_FP32); *dst_scale_max = graph->CreateVarNode(&dst_scale_max_desc); auto* block_dst_scale_max_desc = block->Var(dst_scale_max_name); @@ -309,7 +311,7 @@ void PrepareBias( // Update dst var_desc in block VarDesc dst_desc(dst_name); dst_desc.SetPersistable(true); - dst_desc.SetShape(vectorize(dst_tensor.dims())); + dst_desc.SetShape(common::vectorize(dst_tensor.dims())); dst_desc.SetDataType(framework::TransToProtoVarType(dst_tensor.dtype())); *dst = graph->CreateVarNode(&dst_desc); auto* block_dst_desc = block->Var(dst_name); diff --git a/paddle/fluid/framework/lod_tensor.cc b/paddle/fluid/framework/lod_tensor.cc index 96cff2521dfe7c..102719079eaae2 100644 --- a/paddle/fluid/framework/lod_tensor.cc +++ b/paddle/fluid/framework/lod_tensor.cc @@ -463,8 +463,8 @@ void MergeLoDTensor(phi::DenseTensor *target, platform::errors::InvalidArgument( "phi::DenseTensor layout does not match, expected layout is %s, " "actual layout is %s.", - phi::DataLayoutToString(new_layout), - phi::DataLayoutToString(t->layout()))); + common::DataLayoutToString(new_layout), + common::DataLayoutToString(t->layout()))); auto tensor_dims = t->dims(); PADDLE_ENFORCE_EQ(tensor_dims.size(), new_dim.size(), diff --git a/paddle/fluid/framework/lod_tensor.h b/paddle/fluid/framework/lod_tensor.h index 68aa8fceee96d4..524c9472b1c0cc 100644 --- a/paddle/fluid/framework/lod_tensor.h +++ b/paddle/fluid/framework/lod_tensor.h @@ -21,10 +21,10 @@ limitations under the License. */ #include #include +#include "paddle/common/ddim.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/place.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/mixed_vector.h" diff --git a/paddle/fluid/framework/new_executor/CMakeLists.txt b/paddle/fluid/framework/new_executor/CMakeLists.txt index 6d19aa474c7f75..0ae4e35fdf13e6 100644 --- a/paddle/fluid/framework/new_executor/CMakeLists.txt +++ b/paddle/fluid/framework/new_executor/CMakeLists.txt @@ -28,4 +28,4 @@ cc_library( cc_library( staticgraph_executor_statistics SRCS executor_statistics.cc - DEPS enforce glog phi) + DEPS enforce glog phi common) diff --git a/paddle/fluid/framework/new_executor/feed_fetch_utils.cc b/paddle/fluid/framework/new_executor/feed_fetch_utils.cc index dee86a8463d0fa..f5b4b6cceae65c 100644 --- a/paddle/fluid/framework/new_executor/feed_fetch_utils.cc +++ b/paddle/fluid/framework/new_executor/feed_fetch_utils.cc @@ -193,8 +193,8 @@ void MergeTensors(const std::vector& tensors, phi::errors::InvalidArgument( "phi::DenseTensor layout does not match, expected layout is %s, " "actual layout is %s.", - phi::DataLayoutToString(new_layout), - phi::DataLayoutToString(t->layout()))); + common::DataLayoutToString(new_layout), + common::DataLayoutToString(t->layout()))); if (rank > 0) { auto tensor_dims = t->dims(); PADDLE_ENFORCE_EQ(tensor_dims.size(), diff --git a/paddle/fluid/framework/new_executor/instruction/CMakeLists.txt b/paddle/fluid/framework/new_executor/instruction/CMakeLists.txt index 1d0d3edf7081b1..abc8e86fb1663f 100644 --- a/paddle/fluid/framework/new_executor/instruction/CMakeLists.txt +++ b/paddle/fluid/framework/new_executor/instruction/CMakeLists.txt @@ -9,11 +9,11 @@ cc_library( tuple_push_instruction.cc tuple_pop_instruction.cc instruction_util.cc - DEPS pir_adaptor phi framework_proto) + DEPS pir_adaptor phi common framework_proto) if(WITH_CINN AND NOT CINN_ONLY) cc_library( cinn_jit_instruction NOT_FOR_INFER SRCS cinn_jit_instruction.cc - DEPS phi cinnapi cinn_op_dialect cinn_runtime_dialect) + DEPS phi common cinnapi cinn_op_dialect cinn_runtime_dialect) endif() diff --git a/paddle/fluid/framework/new_executor/instruction/cinn_jit_instruction.cc b/paddle/fluid/framework/new_executor/instruction/cinn_jit_instruction.cc index f9124c57874f31..9ff10d0ae7c91c 100644 --- a/paddle/fluid/framework/new_executor/instruction/cinn_jit_instruction.cc +++ b/paddle/fluid/framework/new_executor/instruction/cinn_jit_instruction.cc @@ -18,9 +18,9 @@ #include "paddle/cinn/hlir/dialect/runtime/ir/runtime_dialect.h" #include "paddle/cinn/hlir/framework/instruction.h" #include "paddle/cinn/hlir/framework/pir_compiler.h" +#include "paddle/common/errors.h" #include "paddle/fluid/framework/new_executor/pir_adaptor/pir_adaptor_util.h" #include "paddle/fluid/framework/paddle2cinn/transform_type.h" -#include "paddle/phi/core/errors.h" #if defined(PADDLE_WITH_CUDA) #include "paddle/cinn/runtime/cinn_runtime.h" #endif diff --git a/paddle/fluid/framework/new_executor/interpreter/CMakeLists.txt b/paddle/fluid/framework/new_executor/interpreter/CMakeLists.txt index a0c3a06a02b52d..e2d221d167b139 100644 --- a/paddle/fluid/framework/new_executor/interpreter/CMakeLists.txt +++ b/paddle/fluid/framework/new_executor/interpreter/CMakeLists.txt @@ -31,6 +31,7 @@ set(INTERPRETER_DEPS scope glog phi + common ${DEVICE_EVENT_LIBS} glog) diff --git a/paddle/fluid/framework/new_executor/interpreter/job.h b/paddle/fluid/framework/new_executor/interpreter/job.h index 952702d6e2f0a5..21acaa54aed0b5 100644 --- a/paddle/fluid/framework/new_executor/interpreter/job.h +++ b/paddle/fluid/framework/new_executor/interpreter/job.h @@ -16,9 +16,9 @@ #include #include +#include "paddle/common/errors.h" +#include "paddle/common/macros.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" -#include "paddle/phi/core/macros.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/new_executor/interpreter/plan.h b/paddle/fluid/framework/new_executor/interpreter/plan.h index beb2c176f94ad8..20dbbfad0e1b20 100644 --- a/paddle/fluid/framework/new_executor/interpreter/plan.h +++ b/paddle/fluid/framework/new_executor/interpreter/plan.h @@ -20,8 +20,8 @@ #include "paddle/fluid/framework/new_executor/interpreter/job.h" +#include "paddle/common/macros.h" #include "paddle/fluid/framework/program_desc.h" -#include "paddle/phi/core/macros.h" #include "paddle/pir/core/program.h" namespace paddle { diff --git a/paddle/fluid/framework/new_executor/workqueue/CMakeLists.txt b/paddle/fluid/framework/new_executor/workqueue/CMakeLists.txt index b0ab1826fb4bfb..bbac454694e2f4 100644 --- a/paddle/fluid/framework/new_executor/workqueue/CMakeLists.txt +++ b/paddle/fluid/framework/new_executor/workqueue/CMakeLists.txt @@ -1,11 +1,11 @@ cc_library( workqueue_utils SRCS workqueue_utils.cc events_waiter.cc - DEPS enforce glog) + DEPS enforce glog common) cc_library( workqueue SRCS workqueue.cc - DEPS workqueue_utils enforce glog phi) + DEPS workqueue_utils enforce glog phi common) cc_test( workqueue_test SRCS workqueue_test.cc diff --git a/paddle/fluid/framework/new_executor/workqueue/event_count.h b/paddle/fluid/framework/new_executor/workqueue/event_count.h index c9fd47b92383f8..9f80b02904dad6 100644 --- a/paddle/fluid/framework/new_executor/workqueue/event_count.h +++ b/paddle/fluid/framework/new_executor/workqueue/event_count.h @@ -56,7 +56,7 @@ #include #include "glog/logging.h" -#include "paddle/phi/core/macros.h" +#include "paddle/common/macros.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/no_need_buffer_vars_inference.h b/paddle/fluid/framework/no_need_buffer_vars_inference.h index 37f790a0d3f415..767abb1f07e6c2 100644 --- a/paddle/fluid/framework/no_need_buffer_vars_inference.h +++ b/paddle/fluid/framework/no_need_buffer_vars_inference.h @@ -20,10 +20,10 @@ #include #include +#include "paddle/common/macros.h" #include "paddle/fluid/framework/type_defs.h" #include "paddle/fluid/imperative/type_defs.h" #include "paddle/fluid/platform/enforce.h" -#include "paddle/phi/core/macros.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/op_desc.cc b/paddle/fluid/framework/op_desc.cc index 1846b7c9f0f71b..357cd400ba5b69 100644 --- a/paddle/fluid/framework/op_desc.cc +++ b/paddle/fluid/framework/op_desc.cc @@ -367,7 +367,7 @@ class CompileTimeInferShapeContext : public InferShapeContext { DDim res; try { auto shape = var->GetShape(); - res = phi::make_ddim(shape); + res = common::make_ddim(shape); } catch (...) { VLOG(5) << "GetDim of variable " << name << " error"; std::rethrow_exception(std::current_exception()); @@ -1319,7 +1319,7 @@ std::vector CompileTimeInferShapeContext::GetRepeatedDims( try { auto shapes = var->GetShapes(); for (const auto &s : shapes) { - res.push_back(phi::make_ddim(s)); + res.push_back(common::make_ddim(s)); } } catch (...) { VLOG(5) << "GetRepeatedDim of variable " << name << " error."; @@ -1330,7 +1330,7 @@ std::vector CompileTimeInferShapeContext::GetRepeatedDims( void CompileTimeInferShapeContext::SetDim(const std::string &name, const DDim &dim) { - block_.FindVarRecursive(name)->SetShape(vectorize(dim)); + block_.FindVarRecursive(name)->SetShape(common::vectorize(dim)); } void CompileTimeInferShapeContext::SetRepeatedDims( @@ -1339,7 +1339,8 @@ void CompileTimeInferShapeContext::SetRepeatedDims( PADDLE_ENFORCE_NOT_NULL( var, platform::errors::NotFound("Variable %s is not found.", name)); std::vector> dim_vec(dims.size()); - std::transform(dims.begin(), dims.end(), dim_vec.begin(), phi::vectorize<>); + std::transform( + dims.begin(), dims.end(), dim_vec.begin(), common::vectorize<>); var->SetShapes(dim_vec); } diff --git a/paddle/fluid/framework/op_desc.h b/paddle/fluid/framework/op_desc.h index 852a09ed2fdc87..fa8f6fe5892134 100644 --- a/paddle/fluid/framework/op_desc.h +++ b/paddle/fluid/framework/op_desc.h @@ -21,11 +21,11 @@ limitations under the License. */ #include #include +#include "paddle/common/macros.h" #include "paddle/fluid/distributed/auto_parallel/dist_attr.h" #include "paddle/fluid/framework/attribute.h" #include "paddle/fluid/framework/type_defs.h" #include "paddle/fluid/framework/var_desc.h" -#include "paddle/phi/core/macros.h" #include "paddle/utils/test_macros.h" namespace paddle { diff --git a/paddle/fluid/framework/op_registry.h b/paddle/fluid/framework/op_registry.h index 6e33f74f432a62..84ee045918fcd7 100644 --- a/paddle/fluid/framework/op_registry.h +++ b/paddle/fluid/framework/op_registry.h @@ -26,6 +26,7 @@ limitations under the License. */ #define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h #include "glog/logging.h" // For VLOG() +#include "paddle/common/macros.h" #include "paddle/fluid/framework/attribute.h" #include "paddle/fluid/framework/details/op_registry.h" #include "paddle/fluid/framework/grad_op_desc_maker.h" @@ -35,7 +36,6 @@ limitations under the License. */ #include "paddle/fluid/framework/shape_inference.h" #include "paddle/phi/core/flags.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/core/macros.h" #include "paddle/utils/test_macros.h" namespace paddle { @@ -178,7 +178,7 @@ inline void RegisterKernelClass(const char* op_type, if (std::is_same::value) { OpKernelType key(ToDataType(std::type_index(typeid(T))), platform::CustomPlace(library_type), - phi::StringToDataLayout(data_layout), + common::StringToDataLayout(data_layout), LibraryType::kPlain, customized_type_value); OperatorWithKernel::AllOpKernels()[op_type][key] = func; @@ -187,7 +187,7 @@ inline void RegisterKernelClass(const char* op_type, #endif OpKernelType key(ToDataType(std::type_index(typeid(T))), PlaceType(), - phi::StringToDataLayout(data_layout), + common::StringToDataLayout(data_layout), StringToLibraryType(library_type), customized_type_value); OperatorWithKernel::AllOpKernels()[op_type][key] = func; diff --git a/paddle/fluid/framework/op_version_registry.h b/paddle/fluid/framework/op_version_registry.h index 278abe825da979..236a0e2b86187e 100644 --- a/paddle/fluid/framework/op_version_registry.h +++ b/paddle/fluid/framework/op_version_registry.h @@ -20,10 +20,10 @@ limitations under the License. */ #include #include +#include "paddle/common/macros.h" #include "paddle/fluid/framework/op_version_proto.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/phi/common/scalar.h" -#include "paddle/phi/core/macros.h" #include "paddle/utils/none.h" namespace paddle { diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index 3484c5cc05940e..4ae5e0ebdf8720 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -17,6 +17,7 @@ limitations under the License. */ #include #include +#include "paddle/common/ddim.h" #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/data_transform.h" #include "paddle/fluid/framework/data_type_transform.h" @@ -37,7 +38,6 @@ limitations under the License. */ #include "paddle/phi/common/int_array.h" #include "paddle/phi/common/scalar.h" #include "paddle/phi/core/compat/get_kerneltype_forvar_utils.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/flags.h" #include "paddle/phi/core/kernel_context.h" #include "paddle/phi/core/kernel_factory.h" diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h index 0bcaaef0b48b28..d51c0ce0f415d0 100644 --- a/paddle/fluid/framework/operator.h +++ b/paddle/fluid/framework/operator.h @@ -40,12 +40,12 @@ limitations under the License. */ #include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/platform/device_context.h" +#include "paddle/common/macros.h" #include "paddle/phi/core/compat/arg_map_context.h" #include "paddle/phi/core/compat/op_utils.h" #include "paddle/phi/core/flags.h" #include "paddle/phi/core/kernel_context.h" #include "paddle/phi/core/kernel_factory.h" -#include "paddle/phi/core/macros.h" #include "paddle/utils/flat_hash_map.h" #include "paddle/utils/test_macros.h" diff --git a/paddle/fluid/framework/paddle2cinn/CMakeLists.txt b/paddle/fluid/framework/paddle2cinn/CMakeLists.txt index 04241179e3c0de..4a0a869b8a2bd8 100644 --- a/paddle/fluid/framework/paddle2cinn/CMakeLists.txt +++ b/paddle/fluid/framework/paddle2cinn/CMakeLists.txt @@ -15,7 +15,8 @@ set(paddle2cinn_deps cinn_framework_proto schedule_desc_proto auto_schedule_proto - parallel_executor) + parallel_executor + common) if(WITH_MKLDNN) set(paddle2cinn ${paddle2cinn} mkldnn) endif() diff --git a/paddle/fluid/framework/paddle2cinn/cinn_cache_key.cc b/paddle/fluid/framework/paddle2cinn/cinn_cache_key.cc index 6fdbbaae9d70c0..00a6c94e4c0ed1 100644 --- a/paddle/fluid/framework/paddle2cinn/cinn_cache_key.cc +++ b/paddle/fluid/framework/paddle2cinn/cinn_cache_key.cc @@ -21,10 +21,10 @@ #include #include +#include "paddle/common/ddim.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/paddle2cinn/transform_type.h" -#include "paddle/phi/core/ddim.h" #include "paddle/utils/string/string_helper.h" namespace paddle { diff --git a/paddle/fluid/framework/paddle2cinn/cinn_cache_key.h b/paddle/fluid/framework/paddle2cinn/cinn_cache_key.h index d1797ddf6bbd48..601b18464c2c67 100644 --- a/paddle/fluid/framework/paddle2cinn/cinn_cache_key.h +++ b/paddle/fluid/framework/paddle2cinn/cinn_cache_key.h @@ -17,10 +17,10 @@ #include #include +#include "paddle/common/ddim.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/phi/common/data_type.h" -#include "paddle/phi/core/ddim.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/reader.h b/paddle/fluid/framework/reader.h index 103b9a3ca0d4bd..f926829dc9bd47 100644 --- a/paddle/fluid/framework/reader.h +++ b/paddle/fluid/framework/reader.h @@ -19,9 +19,9 @@ #include #include +#include "paddle/common/ddim.h" #include "paddle/fluid/framework/lod_tensor_array.h" #include "paddle/fluid/platform/place.h" -#include "paddle/phi/core/ddim.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/shape_inference.h b/paddle/fluid/framework/shape_inference.h index 72cfa4da2f2456..49603b34255db9 100644 --- a/paddle/fluid/framework/shape_inference.h +++ b/paddle/fluid/framework/shape_inference.h @@ -17,10 +17,10 @@ limitations under the License. */ #include #include +#include "paddle/common/ddim.h" #include "paddle/fluid/framework/attribute.h" #include "paddle/fluid/framework/var_desc.h" #include "paddle/fluid/framework/variable.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/type_defs.h" #include "paddle/utils/small_vector.h" diff --git a/paddle/fluid/framework/tensor_util.cc b/paddle/fluid/framework/tensor_util.cc index e92734a1e35dd4..18e5e87437ae2f 100644 --- a/paddle/fluid/framework/tensor_util.cc +++ b/paddle/fluid/framework/tensor_util.cc @@ -466,7 +466,7 @@ void TensorToStream(std::ostream& os, // void* protobuf message proto::VarType::TensorDesc desc; desc.set_data_type(framework::TransToProtoVarType(tensor.dtype())); - auto dims = phi::vectorize(tensor.dims()); + auto dims = common::vectorize(tensor.dims()); auto* pb_dims = desc.mutable_dims(); pb_dims->Resize(static_cast(dims.size()), 0); std::copy(dims.begin(), dims.end(), pb_dims->begin()); @@ -608,7 +608,7 @@ void TensorFromStream(std::istream& is, platform::errors::InvalidArgument("Cannot parse tensor desc")); } { // read tensor - tensor->Resize(phi::make_ddim(shape)); + tensor->Resize(common::make_ddim(shape)); size_t seekg = seek * framework::SizeOfType(desc.data_type()); is.seekg(seekg, is.cur); // NOLINT @@ -621,7 +621,7 @@ void TensorFromStream(std::istream& is, #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || \ defined(PADDLE_WITH_XPU) || defined(PADDLE_WITH_CUSTOM_DEVICE) phi::DenseTensor cpu_tensor; - cpu_tensor.Resize(phi::make_ddim(shape)); + cpu_tensor.Resize(common::make_ddim(shape)); framework::VisitDataType( desc.data_type(), DeserializedDataFunctor(&buf, &cpu_tensor, ctx.GetPlace())); @@ -684,7 +684,7 @@ void TensorFromStream(std::istream& is, std::vector dims; dims.reserve(static_cast(desc.dims().size())); std::copy(desc.dims().begin(), desc.dims().end(), std::back_inserter(dims)); - tensor->Resize(phi::make_ddim(dims)); + tensor->Resize(common::make_ddim(dims)); void* buf = nullptr; phi::CPUContext ctx; size_t size = tensor->numel() * framework::SizeOfType(desc.data_type()); @@ -694,7 +694,7 @@ void TensorFromStream(std::istream& is, #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || \ defined(PADDLE_WITH_XPU) || defined(PADDLE_WITH_CUSTOM_DEVICE) phi::DenseTensor cpu_tensor; - cpu_tensor.Resize(phi::make_ddim(dims)); + cpu_tensor.Resize(common::make_ddim(dims)); framework::VisitDataType( desc.data_type(), DeserializedDataFunctor(&buf, &cpu_tensor, ctx.GetPlace())); @@ -802,14 +802,14 @@ void TensorFromDLPack(const ::DLTensor& dl_tensor, phi::DenseTensor* dst) { dl_tensor.shape + dl_tensor.ndim, std::back_inserter(vec)); - framework::DDim vddim = phi::make_ddim(vec); + framework::DDim vddim = common::make_ddim(vec); dst->Resize(vddim); ::DLDataType type = dl_tensor.dtype; void* dst_ptr = GetDstPtrByDLDataType(type, dst, dst_place); auto src_ptr = static_cast(dl_tensor.data); - auto size = phi::product(vddim) * type.bits / 8; + auto size = common::product(vddim) * type.bits / 8; if (dl_tensor.device.device_type == kDLCPU) { memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size); @@ -841,12 +841,12 @@ void TensorFromDLPack(const DLManagedTensor* src, phi::DenseTensor* dst) { src->dl_tensor.shape + src->dl_tensor.ndim, std::back_inserter(vec)); - framework::DDim vddim = phi::make_ddim(vec); + framework::DDim vddim = common::make_ddim(vec); dst->Resize(vddim); ::DLDataType type = src->dl_tensor.dtype; auto src_ptr = static_cast(src->dl_tensor.data); - auto size = phi::product(vddim) * type.bits / 8; + auto size = common::product(vddim) * type.bits / 8; if (src->dl_tensor.device.device_type == kDLCPU) { platform::CPUPlace dst_place = platform::CPUPlace(); @@ -973,7 +973,7 @@ TEST_API std::ostream& operator<<(std::ostream& os, const phi::DenseTensor& t) { os << " - lod: " << t.lod() << "\n"; } os << " - shape: [" << t.dims() << "]\n"; - os << " - layout: " << phi::DataLayoutToString(t.layout()) << "\n"; + os << " - layout: " << common::DataLayoutToString(t.layout()) << "\n"; if (!t.initialized()) { os << "uninited\n"; diff --git a/paddle/fluid/imperative/CMakeLists.txt b/paddle/fluid/imperative/CMakeLists.txt index e6e3c01be624a4..b6d846e9a0c12d 100644 --- a/paddle/fluid/imperative/CMakeLists.txt +++ b/paddle/fluid/imperative/CMakeLists.txt @@ -1,11 +1,11 @@ cc_library( imperative_flag SRCS flags.cc - DEPS phi) + DEPS phi common) cc_library( var_helper SRCS var_helper.cc - DEPS tensor phi) + DEPS tensor phi common) if(WITH_XPU) cc_library( prepared_operator @@ -21,6 +21,7 @@ if(WITH_XPU) data_transform nan_inf_utils phi + common var_helper profiler place) @@ -38,6 +39,7 @@ else() data_transform nan_inf_utils phi + common var_helper profiler place) @@ -45,14 +47,19 @@ endif() cc_library( layer SRCS layer.cc - DEPS prepared_operator phi imperative_flag variable_helper op_registry + DEPS prepared_operator + phi + common + imperative_flag + variable_helper + op_registry var_helper) add_subdirectory(jit) if(WITH_GPU) cc_library( layout_autotune SRCS layout_autotune.cc - DEPS op_info phi) + DEPS op_info phi common) else() cc_library( layout_autotune @@ -75,19 +82,20 @@ cc_library( var_helper layout_autotune ops_extra_info - phi) + phi + common) cc_library( basic_engine SRCS basic_engine.cc - DEPS layer gradient_accumulator phi) + DEPS layer gradient_accumulator phi common) cc_library( engine SRCS basic_engine.cc partial_grad_engine.cc - DEPS layer gradient_accumulator phi) + DEPS layer gradient_accumulator phi common) cc_library( imperative_profiler SRCS profiler.cc - DEPS phi) + DEPS phi common) if(NOT WIN32) if(WITH_NCCL OR WITH_RCCL) cc_library( @@ -150,7 +158,7 @@ if(NOT WIN32) cc_library( data_loader SRCS data_loader.cc - DEPS enforce) + DEPS enforce common) endif() if(WITH_GLOO) cc_library( @@ -173,4 +181,10 @@ endif() cc_library( gradient_accumulator SRCS gradient_accumulator.cc - DEPS operator lod_tensor selected_rows_utils var_type_traits layer phi) + DEPS operator + lod_tensor + selected_rows_utils + var_type_traits + layer + phi + common) diff --git a/paddle/fluid/imperative/all_reduce.cc b/paddle/fluid/imperative/all_reduce.cc index d5f03924e28c17..c4bb42e4c22bb4 100644 --- a/paddle/fluid/imperative/all_reduce.cc +++ b/paddle/fluid/imperative/all_reduce.cc @@ -146,7 +146,7 @@ static void AllReduce(const phi::SelectedRows &src, auto *dst_tensor = dst->mutable_value(); auto dims = src_tensor.dims(); dims[0] = rows_num; - auto feature_size = phi::product(dims) / dims[0]; + auto feature_size = common::product(dims) / dims[0]; dst_tensor->Resize(dims); auto *dst_tensor_ptr = dst_tensor->mutable_data(place, src_tensor.dtype()); const auto *src_tensor_ptr = src_tensor.data(); diff --git a/paddle/fluid/imperative/gloo_context.cc b/paddle/fluid/imperative/gloo_context.cc index 16de1baf72da96..4e0df45e840f25 100644 --- a/paddle/fluid/imperative/gloo_context.cc +++ b/paddle/fluid/imperative/gloo_context.cc @@ -165,7 +165,7 @@ void GLOOParallelContext::AllReduce(const phi::SelectedRows &src, auto *dst_tensor = dst->mutable_value(); auto dims = src_tensor.dims(); dims[0] = rows_num; - auto feature_size = phi::product(dims) / dims[0]; + auto feature_size = common::product(dims) / dims[0]; dst_tensor->Resize(dims); std::vector element_nums = rows_num_vector; diff --git a/paddle/fluid/imperative/infer_shape_context.h b/paddle/fluid/imperative/infer_shape_context.h index 80c52ab4ac10f8..e4aeb477db8ad1 100644 --- a/paddle/fluid/imperative/infer_shape_context.h +++ b/paddle/fluid/imperative/infer_shape_context.h @@ -17,13 +17,13 @@ #include #include +#include "paddle/common/ddim.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/shape_inference.h" #include "paddle/fluid/framework/type_defs.h" #include "paddle/fluid/imperative/type_defs.h" #include "paddle/fluid/imperative/var_helper.h" #include "paddle/fluid/imperative/variable_wrapper.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/kernel_factory.h" namespace paddle { diff --git a/paddle/fluid/imperative/jit/program_desc_tracer.cc b/paddle/fluid/imperative/jit/program_desc_tracer.cc index deda1ff572a704..86a38f3942aaa7 100644 --- a/paddle/fluid/imperative/jit/program_desc_tracer.cc +++ b/paddle/fluid/imperative/jit/program_desc_tracer.cc @@ -264,7 +264,7 @@ void ProgramDescTracer::InsertVarIfNotExist( if (inner_var.IsType()) { const auto &tensor = inner_var.Get(); new_var_desc->SetType(framework::proto::VarType::LOD_TENSOR); - new_var_desc->SetShape(phi::vectorize(tensor.dims())); + new_var_desc->SetShape(common::vectorize(tensor.dims())); new_var_desc->SetLoDLevel(static_cast(tensor.lod().size())); if (tensor.IsInitialized()) { new_var_desc->SetDataType(framework::TransToProtoVarType(tensor.dtype())); diff --git a/paddle/fluid/imperative/layout_autotune.cc b/paddle/fluid/imperative/layout_autotune.cc index 18baaf98fdf11c..4075be6491ff19 100644 --- a/paddle/fluid/imperative/layout_autotune.cc +++ b/paddle/fluid/imperative/layout_autotune.cc @@ -14,12 +14,12 @@ #include "paddle/fluid/imperative/layout_autotune.h" +#include "paddle/common/errors.h" #include "paddle/fluid/eager/api/utils/global_utils.h" #include "paddle/fluid/framework/op_info.h" #include "paddle/fluid/imperative/layout_transformer.h" #include "paddle/phi/backends/gpu/gpu_info.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" namespace paddle { namespace imperative { @@ -207,7 +207,7 @@ paddle::imperative::NameVarMap AutoTuneLayout( VLOG(3) << "Tune the layout from " << PADDLE_GET_CONST(std::string, (*attrs)["data_format"]) << " to " - << phi::DataLayoutToString( + << common::DataLayoutToString( LayoutAutoTune::Instance().GetDesiredLayout()); } } diff --git a/paddle/fluid/imperative/layout_autotune.h b/paddle/fluid/imperative/layout_autotune.h index 4b08a34f943f43..bee6529493fecd 100644 --- a/paddle/fluid/imperative/layout_autotune.h +++ b/paddle/fluid/imperative/layout_autotune.h @@ -18,9 +18,9 @@ #include #include +#include "paddle/common/layout.h" #include "paddle/fluid/framework/type_defs.h" #include "paddle/fluid/imperative/tracer.h" -#include "paddle/phi/common/layout.h" namespace paddle { namespace imperative { diff --git a/paddle/fluid/imperative/layout_transformer.h b/paddle/fluid/imperative/layout_transformer.h index 61bd4f9dfe2b8f..5827e824dad90e 100644 --- a/paddle/fluid/imperative/layout_transformer.h +++ b/paddle/fluid/imperative/layout_transformer.h @@ -13,12 +13,12 @@ // limitations under the License. #pragma once +#include "paddle/common/errors.h" #include "paddle/fluid/framework/framework.pb.h" #include "paddle/fluid/imperative/layout_autotune.h" #include "paddle/fluid/imperative/tracer.h" #include "paddle/fluid/imperative/var_helper.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/core/tensor_utils.h" namespace paddle { namespace imperative { @@ -62,10 +62,10 @@ std::shared_ptr TraceTransposeOp( tracer->TraceOp("transpose2", ins, outs, std::move(attrs)); paddle::imperative::SetDataLayout(out, layout); VLOG(4) << "Transpose " << paddle::imperative::GetNameFromVar(var) << "[" - << phi::DataLayoutToString(paddle::imperative::GetDataLayout(var)) + << common::DataLayoutToString(paddle::imperative::GetDataLayout(var)) << "]" << " to " << paddle::imperative::GetNameFromVar(out) << "[" - << phi::DataLayoutToString(paddle::imperative::GetDataLayout(out)) + << common::DataLayoutToString(paddle::imperative::GetDataLayout(out)) << "]"; return out; } @@ -102,7 +102,7 @@ class LayoutTransformer { } } VLOG(3) << "Optimze Layout agnostic op: " << type_ << " " - << phi::DataLayoutToString(in_layout); + << common::DataLayoutToString(in_layout); if (in_layout != DataLayout::UNDEFINED) { SetVarsLayout(outs, in_layout); } @@ -184,8 +184,8 @@ class HeavilyLayoutSensitiveOpTransformer : public LayoutTransformer { // Step 1: Adjust the data_layout attr to the desired layout auto desired_layout = LayoutAutoTune::Instance().GetDesiredLayout(); - std::string desired_layout_str = - phi::DataLayoutToString(LayoutAutoTune::Instance().GetDesiredLayout()); + std::string desired_layout_str = common::DataLayoutToString( + LayoutAutoTune::Instance().GetDesiredLayout()); if (attrs->find("data_format") != attrs->end() && PADDLE_GET_CONST(std::string, (*attrs)["data_format"]) != desired_layout_str) { @@ -251,10 +251,10 @@ class LightlyLayoutSensitiveOpTransformer : public LayoutTransformer { for (auto& var : pair.second) { if (var != nullptr) { VLOG(3) << "Tune the layout from " - << phi::DataLayoutToString( + << common::DataLayoutToString( paddle::imperative::GetDataLayout(var)) << " to " - << phi::DataLayoutToString( + << common::DataLayoutToString( LayoutAutoTune::Instance().GetDesiredLayout()); } if (var != nullptr && diff --git a/paddle/fluid/imperative/reducer.cc b/paddle/fluid/imperative/reducer.cc index b03aadd4dc6aa2..4bbc52662fc96e 100644 --- a/paddle/fluid/imperative/reducer.cc +++ b/paddle/fluid/imperative/reducer.cc @@ -841,7 +841,7 @@ void Reducer::MarkGroupReady(size_t group_index) { UNUSED const int run_order = next_group_ % nrings_; auto *tensor = group.dense_contents_.GetMutable(); - tensor->Resize(phi::make_ddim({group.all_length_})) + tensor->Resize(common::make_ddim({group.all_length_})) .mutable_data(place_, framework::TransToPhiDataType(group.dtype_)); // For CUDA or XPU, compute_stream --> comm_stream. diff --git a/paddle/fluid/imperative/variable_wrapper.h b/paddle/fluid/imperative/variable_wrapper.h index d4438e8b47b970..d18750a26f0337 100644 --- a/paddle/fluid/imperative/variable_wrapper.h +++ b/paddle/fluid/imperative/variable_wrapper.h @@ -19,13 +19,13 @@ #include #include +#include "paddle/common/layout.h" #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/op_kernel_type.h" #include "paddle/fluid/framework/string_array.h" #include "paddle/fluid/framework/variable.h" #include "paddle/fluid/imperative/hooks.h" #include "paddle/fluid/imperative/op_base.h" -#include "paddle/phi/common/layout.h" namespace paddle { namespace imperative { diff --git a/paddle/fluid/inference/CMakeLists.txt b/paddle/fluid/inference/CMakeLists.txt index 81b0abe570e77d..f8bd690c885ea3 100644 --- a/paddle/fluid/inference/CMakeLists.txt +++ b/paddle/fluid/inference/CMakeLists.txt @@ -69,8 +69,14 @@ if(WIN32 AND WITH_GPU) cc_library(paddle_inference DEPS ${fluid_modules} ${STATIC_INFERENCE_API} ${utils_modules}) else() - create_static_lib(paddle_inference ${phi_modules} ${fluid_modules} - ${ir_targets} ${STATIC_INFERENCE_API} ${utils_modules}) + create_static_lib( + paddle_inference + ${phi_modules} + ${fluid_modules} + ${ir_targets} + ${STATIC_INFERENCE_API} + ${utils_modules} + common_static) endif() if(NOT APPLE) @@ -103,7 +109,7 @@ list(REMOVE_ITEM fluid_modules cinn_op_dialect) # shared library to prune library size. list(REMOVE_ITEM fluid_modules ${not_infer_modules}) -set(SHARED_INFERENCE_DEPS phi ${fluid_modules} analysis_predictor +set(SHARED_INFERENCE_DEPS phi common ${fluid_modules} analysis_predictor ${utils_modules}) if(NOT WIN32) list(APPEND SHARED_INFERENCE_DEPS ${ir_targets}) @@ -134,7 +140,7 @@ target_link_libraries(paddle_inference_shared ${os_dependency_modules}) if(WIN32) set_property(TARGET paddle_inference_shared PROPERTY WINDOWS_EXPORT_ALL_SYMBOLS ON) - target_link_libraries(paddle_inference_shared phi) + target_link_libraries(paddle_inference_shared phi common) endif() set_target_properties(paddle_inference_shared PROPERTIES LINK_FLAGS "-Wl,-rpath,'$ORIGIN'") diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc index 1ecc067f3b90ee..122dbbda8fabdd 100644 --- a/paddle/fluid/inference/analysis/ir_pass_manager.cc +++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc @@ -22,13 +22,13 @@ #include #include +#include "paddle/common/errors.h" #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/inference/analysis/argument.h" #include "paddle/fluid/string/pretty_log.h" #include "paddle/phi/common/data_type.h" -#include "paddle/phi/core/errors.h" namespace paddle { namespace inference { diff --git a/paddle/fluid/inference/api/CMakeLists.txt b/paddle/fluid/inference/api/CMakeLists.txt index c5f00cb08355f6..ea648d8574c94b 100755 --- a/paddle/fluid/inference/api/CMakeLists.txt +++ b/paddle/fluid/inference/api/CMakeLists.txt @@ -39,7 +39,7 @@ if(WITH_CRYPTO) list(APPEND paddle_inference_api_deps framework_io) endif() if(WITH_CUSTOM_DEVICE) - set(paddle_inference_api_deps ${paddle_inference_api_deps} phi) + set(paddle_inference_api_deps ${paddle_inference_api_deps} phi common) endif() if(WIN32) @@ -61,7 +61,7 @@ cc_library( table_printer utf8proc) if(WIN32) - target_link_libraries(paddle_inference_api phi) + target_link_libraries(paddle_inference_api phi common) endif() set(inference_deps diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 32083da55e6542..c821aad73459a8 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -236,7 +236,7 @@ phi::Backend ConvertBackend(paddle_infer::PlaceType backend) { bool PaddleTensorToDenseTensor(const PaddleTensor &pt, phi::DenseTensor *t, const platform::Place &place) { - framework::DDim ddim = phi::make_ddim(pt.shape); + framework::DDim ddim = common::make_ddim(pt.shape); void *input_ptr = nullptr; if (pt.dtype == PaddleDType::INT64) { input_ptr = t->mutable_data(ddim, place); @@ -254,7 +254,7 @@ bool PaddleTensorToDenseTensor(const PaddleTensor &pt, } // NOTE(Aurelius84): Some kernels support zero shape input // without memory holder, we should skip enforce logic. - bool has_zero_dim = (phi::product(ddim) == 0); + bool has_zero_dim = (common::product(ddim) == 0); VLOG(3) << "Found zero dim: " << has_zero_dim << " from input with ddim: " << ddim; if (!has_zero_dim) { @@ -1138,7 +1138,7 @@ void AnalysisPredictor::MkldnnPreSet( #ifdef PADDLE_WITH_DNNL std::vector> inputs_shape; for (const auto &input : inputs) { - inputs_shape.emplace_back(phi::vectorize(input.dims())); + inputs_shape.emplace_back(common::vectorize(input.dims())); } MkldnnPreSet(inputs_shape); #endif @@ -1416,7 +1416,7 @@ template void AnalysisPredictor::GetFetchOne(const phi::DenseTensor &fetch, PaddleTensor *output) { // set shape. - auto shape = phi::vectorize(fetch.dims()); + auto shape = common::vectorize(fetch.dims()); output->shape.assign(shape.begin(), shape.end()); // set data. int num_elems = inference::VecReduceToInt(shape); diff --git a/paddle/fluid/inference/api/api_impl.cc b/paddle/fluid/inference/api/api_impl.cc index c3f50fd6f6bb39..d886885edb5ba5 100644 --- a/paddle/fluid/inference/api/api_impl.cc +++ b/paddle/fluid/inference/api/api_impl.cc @@ -213,7 +213,7 @@ bool NativePaddlePredictor::SetFeed(const std::vector &inputs, for (size_t i = 0; i < inputs.size(); ++i) { auto &input = feed_tensors_[i]; - framework::DDim ddim = phi::make_ddim(inputs[i].shape); + framework::DDim ddim = common::make_ddim(inputs[i].shape); void *input_ptr = nullptr; if (inputs[i].dtype == PaddleDType::INT64) { input_ptr = input.mutable_data(ddim, place_); @@ -299,7 +299,7 @@ template void NativePaddlePredictor::GetFetchOne(const phi::DenseTensor &fetch, PaddleTensor *output) { // set shape. - auto shape = phi::vectorize(fetch.dims()); + auto shape = common::vectorize(fetch.dims()); output->shape.assign(shape.begin(), shape.end()); // set data. const T *data = fetch.data(); diff --git a/paddle/fluid/inference/api/api_impl.h b/paddle/fluid/inference/api/api_impl.h index e86dbe14d1746e..75bc0b3cafa17a 100644 --- a/paddle/fluid/inference/api/api_impl.h +++ b/paddle/fluid/inference/api/api_impl.h @@ -21,6 +21,7 @@ limitations under the License. */ #include #include +#include "paddle/common/ddim.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor_array.h" #include "paddle/fluid/framework/naive_executor.h" @@ -31,7 +32,6 @@ limitations under the License. */ #include "paddle/fluid/platform/init.h" #include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/profiler.h" -#include "paddle/phi/core/ddim.h" namespace paddle { diff --git a/paddle/fluid/inference/api/details/CMakeLists.txt b/paddle/fluid/inference/api/details/CMakeLists.txt index 105ff16747dfd0..b7eb0030155b72 100644 --- a/paddle/fluid/inference/api/details/CMakeLists.txt +++ b/paddle/fluid/inference/api/details/CMakeLists.txt @@ -21,20 +21,20 @@ if(WITH_ONNXRUNTIME) cc_library( zero_copy_tensor SRCS zero_copy_tensor.cc - DEPS scope lod_tensor enforce onnxruntime) + DEPS scope lod_tensor enforce onnxruntime common) cc_library( zero_copy_tensor_dummy SRCS zero_copy_tensor_dummy.cc - DEPS onnxruntime phi) + DEPS onnxruntime phi common) else() cc_library( zero_copy_tensor SRCS zero_copy_tensor.cc - DEPS scope lod_tensor enforce) + DEPS scope lod_tensor enforce common) cc_library( zero_copy_tensor_dummy SRCS zero_copy_tensor_dummy.cc - DEPS phi) + DEPS phi common) endif() cc_test( diff --git a/paddle/fluid/inference/api/details/zero_copy_tensor.cc b/paddle/fluid/inference/api/details/zero_copy_tensor.cc index cbd74f644d40d2..eee3a707a03b14 100644 --- a/paddle/fluid/inference/api/details/zero_copy_tensor.cc +++ b/paddle/fluid/inference/api/details/zero_copy_tensor.cc @@ -59,7 +59,7 @@ void Tensor::Reshape(const std::vector &shape) { paddle::platform::errors::PreconditionNotMet( "No tensor called [%s] in the runtime scope", name_)); auto *tensor = var->GetMutable(); - tensor->Resize(phi::make_ddim(shape)); + tensor->Resize(common::make_ddim(shape)); } void Tensor::ReshapeStrings(const size_t &shape) { @@ -337,7 +337,7 @@ void Tensor::ShareExternalData(const T *data, std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()) * sizeof(T); phi::DenseTensorMeta meta( - DataTypeInfo().TYPE, phi::make_ddim(shape), LayoutConvert(layout)); + DataTypeInfo().TYPE, common::make_ddim(shape), LayoutConvert(layout)); if (place == PlaceType::kCPU) { phi::DenseTensor dtensor( std::make_shared( @@ -733,18 +733,19 @@ std::vector Tensor::shape() const { // at last nhwC, so for dim==2 these layouts are the same and nothing should // be done. Similarly for dim==1 when you have just one possible // combination. - if (tensor->dims().size() < 3) return phi::vectorize(tensor->dims()); + if (tensor->dims().size() < 3) + return common::vectorize(tensor->dims()); if (out_layout == phi::DataLayout::kNHWC || out_layout == phi::DataLayout::kNDHWC) { - auto dims = phi::vectorize(tensor->dims()); + auto dims = common::vectorize(tensor->dims()); std::rotate(dims.begin() + 1, dims.begin() + 2, dims.end()); return dims; } else { - return phi::vectorize(tensor->dims()); + return common::vectorize(tensor->dims()); } } #endif - return phi::vectorize(tensor->dims()); + return common::vectorize(tensor->dims()); } void Tensor::SetLoD(const std::vector> &x) { diff --git a/paddle/fluid/inference/api/mkldnn_quantizer.cc b/paddle/fluid/inference/api/mkldnn_quantizer.cc index 1b604b544b9475..54a198b4e2f590 100644 --- a/paddle/fluid/inference/api/mkldnn_quantizer.cc +++ b/paddle/fluid/inference/api/mkldnn_quantizer.cc @@ -428,7 +428,7 @@ AnalysisPredictor::MkldnnQuantizer::GetMaxChScalingFactor( auto dims = var_tensor.dims(); constexpr int num_col_dims = 1; - auto flattened_dims = phi::flatten_to_2d(dims, num_col_dims); + auto flattened_dims = common::flatten_to_2d(dims, num_col_dims); ConstEigenMatrixArrayMap eigen_tensor_mat{ var_tensor.data(), flattened_dims[0], flattened_dims[1]}; diff --git a/paddle/fluid/inference/api/onnxruntime_predictor.cc b/paddle/fluid/inference/api/onnxruntime_predictor.cc index 4f8435ca505c0e..25970440469168 100644 --- a/paddle/fluid/inference/api/onnxruntime_predictor.cc +++ b/paddle/fluid/inference/api/onnxruntime_predictor.cc @@ -317,7 +317,7 @@ Ort::Value ONNXRuntimePredictor::GetOrtValue(const ONNXDesc &desc, size_t size = tensor->numel() * framework::SizeOfType(framework::TransToProtoVarType(tensor->dtype())); - std::vector shape = phi::vectorize(tensor->dims()); + std::vector shape = common::vectorize(tensor->dims()); return Ort::Value::CreateTensor(memory_info, static_cast(tensor->data()), size, diff --git a/paddle/fluid/inference/api/resource_manager.cc b/paddle/fluid/inference/api/resource_manager.cc index 2414aaee1b78b5..2806204f4b9406 100644 --- a/paddle/fluid/inference/api/resource_manager.cc +++ b/paddle/fluid/inference/api/resource_manager.cc @@ -20,6 +20,7 @@ #include #include +#include "paddle/common/errors.h" #include "paddle/fluid/memory/allocation/allocator_facade.h" #include "paddle/fluid/platform/device/gpu/gpu_types.h" #include "paddle/phi/backends/gpu/forwards.h" @@ -28,7 +29,6 @@ #include "paddle/phi/backends/gpu/gpu_resources.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/allocator.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/core/generator.h" #include "unsupported/Eigen/CXX11/Tensor" diff --git a/paddle/fluid/inference/capi_exp/CMakeLists.txt b/paddle/fluid/inference/capi_exp/CMakeLists.txt index 3a72f0e880c4b7..97a7910669a108 100644 --- a/paddle/fluid/inference/capi_exp/CMakeLists.txt +++ b/paddle/fluid/inference/capi_exp/CMakeLists.txt @@ -36,6 +36,7 @@ if(APPLE) cryptopp protobuf phi + common pir cblas) endif() diff --git a/paddle/fluid/inference/lite/tensor_utils.cc b/paddle/fluid/inference/lite/tensor_utils.cc index 6de5f9cfa0ca17..9b36b6dc745e85 100644 --- a/paddle/fluid/inference/lite/tensor_utils.cc +++ b/paddle/fluid/inference/lite/tensor_utils.cc @@ -218,7 +218,7 @@ void TensorCopyAsync(paddle::lite_api::Tensor* dst, const platform::Place& dst_place = GetNativePlace(dst->target()); const size_t bytes = static_cast(src.numel()) * phi::SizeOf(src.dtype()); - dst->Resize(phi::vectorize(src.dims())); + dst->Resize(common::vectorize(src.dims())); const void* src_data = src.data(); void* dst_data{nullptr}; dst_data = GetLiteTensorDataPtr( @@ -236,7 +236,7 @@ template <> void TensorCopyAsync(phi::DenseTensor* dst, const paddle::lite_api::Tensor& src, const platform::DeviceContext& ctx) { - dst->Resize(phi::make_ddim(src.shape())); + dst->Resize(common::make_ddim(src.shape())); InitDstTensor(dst, src); const platform::Place& src_place = GetNativePlace(src.target()); const platform::Place& dst_place = dst->place(); @@ -254,7 +254,7 @@ void TensorCopyAsync(phi::DenseTensor* dst, template <> void TensorDataShare(paddle::lite_api::Tensor* dst, phi::DenseTensor* src) { - dst->Resize(phi::vectorize(src->dims())); + dst->Resize(common::vectorize(src->dims())); dst->ShareExternalMemory( src->data(), src->memory_size(), GetLiteTargetType(src->place())); dst->SetPrecision( @@ -273,7 +273,7 @@ void TensorDataShare(phi::DenseTensor* dst, paddle::lite_api::Tensor* src) { framework::SizeOfType(GetNativePrecisionType(src->precision())); std::shared_ptr holder(new phi::Allocation( src_raw_data, memory_size, GetNativePlace(src->target()))); - dst->Resize(phi::make_ddim(src->shape())); + dst->Resize(common::make_ddim(src->shape())); SetLoD(dst->mutable_lod(), src->lod()); dst->ResetHolderWithType( holder, diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt index 1d62d0aec013c6..b5b7bc857c1174 100755 --- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt @@ -150,7 +150,8 @@ if(WIN32) nv_test( test_custom_plugin_creater SRCS test_custom_plugin_creater.cc - DEPS paddle_framework tensorrt_converter phi custom_operator init_phi) + DEPS paddle_framework tensorrt_converter phi common custom_operator + init_phi) elseif(WITH_CINN) nv_test( test_custom_plugin_creater @@ -158,6 +159,7 @@ elseif(WITH_CINN) DEPS paddle_framework tensorrt_converter phi + common custom_operator init_phi fleet_executor @@ -169,6 +171,7 @@ else() DEPS paddle_framework tensorrt_converter phi + common custom_operator init_phi fleet_executor diff --git a/paddle/fluid/inference/tensorrt/convert/bilinear_interp_v2_op.cc b/paddle/fluid/inference/tensorrt/convert/bilinear_interp_v2_op.cc index 77153d8ade56db..4cd7378c17b443 100644 --- a/paddle/fluid/inference/tensorrt/convert/bilinear_interp_v2_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/bilinear_interp_v2_op.cc @@ -33,7 +33,7 @@ class BilinearInterpolateV2OpConverter : public OpConverter { auto input = engine_->GetITensor(input_name); - auto data_layout = phi::StringToDataLayout( + auto data_layout = common::StringToDataLayout( PADDLE_GET_CONST(std::string, op_desc.GetAttr("data_layout"))); auto interp_method = PADDLE_GET_CONST(std::string, op_desc.GetAttr("interp_method")); diff --git a/paddle/fluid/inference/tensorrt/convert/dropout_op.cc b/paddle/fluid/inference/tensorrt/convert/dropout_op.cc index c0d93a7588b4b2..bec18da482e41a 100644 --- a/paddle/fluid/inference/tensorrt/convert/dropout_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/dropout_op.cc @@ -49,7 +49,7 @@ class DropoutOpConverter : public OpConverter { platform::CPUPlace cpu_place; std::unique_ptr weight_tensor(new phi::DenseTensor()); - weight_tensor->Resize(phi::make_ddim({1})); + weight_tensor->Resize(common::make_ddim({1})); auto* weight_data = weight_tensor->mutable_data(platform::CPUPlace()); weight_data[0] = 1 - dropout_prob; diff --git a/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc b/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc index 198a164894c0b1..f5f7e53cf4e0d4 100644 --- a/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc @@ -33,7 +33,7 @@ class ElementwiseTensorOpConverter : public OpConverter { if (Y_v && !engine_->with_dynamic_shape()) { // Y is weight auto* Y_t = Y_v->GetMutable(); - std::vector dims_y = phi::vectorize(Y_t->dims()); + std::vector dims_y = common::vectorize(Y_t->dims()); auto y_weight = engine_->GetTrtWeight(op_desc.Input("Y").front(), *Y_t); nvinfer1::Dims trt_dims_y; diff --git a/paddle/fluid/inference/tensorrt/convert/emb_eltwise_layernorm.cc b/paddle/fluid/inference/tensorrt/convert/emb_eltwise_layernorm.cc index dba8086f2952e7..6ccb22e072f1b2 100644 --- a/paddle/fluid/inference/tensorrt/convert/emb_eltwise_layernorm.cc +++ b/paddle/fluid/inference/tensorrt/convert/emb_eltwise_layernorm.cc @@ -9,13 +9,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/common/ddim.h" #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/convert/utils.h" #include "paddle/fluid/inference/tensorrt/engine.h" #include "paddle/fluid/inference/tensorrt/helper.h" #include "paddle/fluid/inference/tensorrt/plugin/many_emb_layernorm_plugin.h" #include "paddle/fluid/inference/tensorrt/plugin/many_emb_layernorm_varseqlen_plugin.h" -#include "paddle/phi/core/ddim.h" namespace paddle { namespace inference { @@ -54,8 +54,8 @@ class EmbEltwiseLayerNormOpConverter : public OpConverter { framework::DDim bias_dims, scale_dims; TensorRTEngine::Weight bias_weight, scale_weight; - int64_t bias_size = phi::product(bias_dims); - int64_t scale_size = phi::product(scale_dims); + int64_t bias_size = common::product(bias_dims); + int64_t scale_size = common::product(scale_dims); nvinfer1::ILayer* layer = nullptr; bool enable_int8 = op_desc.HasAttr("enable_int8"); @@ -81,8 +81,8 @@ class EmbEltwiseLayerNormOpConverter : public OpConverter { } bias_weight = GetWeight(op_desc.Input("Bias").front(), &bias_dims); scale_weight = GetWeight(op_desc.Input("Scale").front(), &scale_dims); - bias_size = phi::product(bias_dims); - scale_size = phi::product(scale_dims); + bias_size = common::product(bias_dims); + scale_size = common::product(scale_dims); // other_id(except pos_id) engine_->SetITensor("word_id", input_ids[1]); @@ -189,8 +189,8 @@ class EmbEltwiseLayerNormOpConverter : public OpConverter { } bias_weight = GetWeight(op_desc.Input("Bias").front(), &bias_dims); scale_weight = GetWeight(op_desc.Input("Scale").front(), &scale_dims); - bias_size = phi::product(bias_dims); - scale_size = phi::product(scale_dims); + bias_size = common::product(bias_dims); + scale_size = common::product(scale_dims); int output_fp16 = static_cast((engine_->WithFp16() == 1) ? 1 : 0); if (enable_int8) { diff --git a/paddle/fluid/inference/tensorrt/convert/fill_constant_op.cc b/paddle/fluid/inference/tensorrt/convert/fill_constant_op.cc index f6f476dc204851..4da409f9097337 100644 --- a/paddle/fluid/inference/tensorrt/convert/fill_constant_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/fill_constant_op.cc @@ -36,7 +36,7 @@ class FillConstantOpConverter : public OpConverter { str_value = std::to_string(value); } std::unique_ptr out_tensor(new phi::DenseTensor()); - out_tensor->Resize(phi::make_ddim(shape)); + out_tensor->Resize(common::make_ddim(shape)); nvinfer1::DataType trt_dtype = nvinfer1::DataType::kFLOAT; void* trt_data = nullptr; size_t trt_num; diff --git a/paddle/fluid/inference/tensorrt/convert/generic_and_custom_plugin_creater.cc b/paddle/fluid/inference/tensorrt/convert/generic_and_custom_plugin_creater.cc index 9f14c8c1b64fb8..e811827a7296c1 100644 --- a/paddle/fluid/inference/tensorrt/convert/generic_and_custom_plugin_creater.cc +++ b/paddle/fluid/inference/tensorrt/convert/generic_and_custom_plugin_creater.cc @@ -12,13 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/common/errors.h" #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/helper.h" #include "paddle/fluid/inference/tensorrt/plugin/generic_plugin.h" #include "paddle/fluid/inference/tensorrt/plugin_arg_mapping_context.h" #include "paddle/phi/api/ext/op_meta_info.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" namespace paddle { namespace inference { diff --git a/paddle/fluid/inference/tensorrt/convert/leaky_relu_op.cc b/paddle/fluid/inference/tensorrt/convert/leaky_relu_op.cc index d14be87e3ffd94..85a085aa221c41 100644 --- a/paddle/fluid/inference/tensorrt/convert/leaky_relu_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/leaky_relu_op.cc @@ -47,7 +47,7 @@ class LeakyReluOpConverter : public OpConverter { #else platform::CPUPlace place; std::unique_ptr alpha_tensor(new phi::DenseTensor()); - alpha_tensor->Resize(phi::make_ddim({2})); + alpha_tensor->Resize(common::make_ddim({2})); float* alpha_data = alpha_tensor->mutable_data(place); alpha_data[0] = alpha; alpha_data[1] = 1.f - alpha; diff --git a/paddle/fluid/inference/tensorrt/convert/nearest_interp_op.cc b/paddle/fluid/inference/tensorrt/convert/nearest_interp_op.cc index 84c2cc54f955d2..4c0b1a027640bc 100644 --- a/paddle/fluid/inference/tensorrt/convert/nearest_interp_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/nearest_interp_op.cc @@ -33,7 +33,7 @@ class NearestInterpolateOpConverter : public OpConverter { auto data_layout = !op_desc.HasAttr("data_layout") ? phi::DataLayout::kNCHW - : phi::StringToDataLayout(PADDLE_GET_CONST( + : common::StringToDataLayout(PADDLE_GET_CONST( std::string, op_desc.GetAttr("data_layout"))); auto interp_method = PADDLE_GET_CONST(std::string, op_desc.GetAttr("interp_method")); diff --git a/paddle/fluid/inference/tensorrt/convert/nearest_interp_v2_op.cc b/paddle/fluid/inference/tensorrt/convert/nearest_interp_v2_op.cc index 997a467077043b..6f33a710469776 100644 --- a/paddle/fluid/inference/tensorrt/convert/nearest_interp_v2_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/nearest_interp_v2_op.cc @@ -31,7 +31,7 @@ class NearestInterpolateV2OpConverter : public OpConverter { auto input = engine_->GetITensor(input_name); auto inputs = op_desc.Inputs(); - auto data_layout = phi::StringToDataLayout( + auto data_layout = common::StringToDataLayout( PADDLE_GET_CONST(std::string, op_desc.GetAttr("data_layout"))); auto interp_method = PADDLE_GET_CONST(std::string, op_desc.GetAttr("interp_method")); diff --git a/paddle/fluid/inference/tensorrt/convert/preln_emb_eltwise_layernorm.cc b/paddle/fluid/inference/tensorrt/convert/preln_emb_eltwise_layernorm.cc index afeacd5cd6b981..529175c7de81a8 100644 --- a/paddle/fluid/inference/tensorrt/convert/preln_emb_eltwise_layernorm.cc +++ b/paddle/fluid/inference/tensorrt/convert/preln_emb_eltwise_layernorm.cc @@ -63,8 +63,8 @@ class PrelnEmbEltwiseLayerNormOpConverter : public OpConverter { framework::DDim bias_dims, scale_dims; TensorRTEngine::Weight bias_weight, scale_weight; - int64_t bias_size = phi::product(bias_dims); - int64_t scale_size = phi::product(scale_dims); + int64_t bias_size = common::product(bias_dims); + int64_t scale_size = common::product(scale_dims); std::vector id_names = op_desc.Input("Ids"); std::vector emb_names = op_desc.Input("Embs"); @@ -135,8 +135,8 @@ class PrelnEmbEltwiseLayerNormOpConverter : public OpConverter { } bias_weight = GetWeight(op_desc.Input("Bias").front(), &bias_dims); scale_weight = GetWeight(op_desc.Input("Scale").front(), &scale_dims); - bias_size = phi::product(bias_dims); - scale_size = phi::product(scale_dims); + bias_size = common::product(bias_dims); + scale_size = common::product(scale_dims); // other_id(except pos_id) engine_->SetITensor("word_id", input_ids[1]); diff --git a/paddle/fluid/inference/tensorrt/convert/preln_residual_bias.cc b/paddle/fluid/inference/tensorrt/convert/preln_residual_bias.cc index d8ffe4da595bdd..9091cfd10e3e8f 100644 --- a/paddle/fluid/inference/tensorrt/convert/preln_residual_bias.cc +++ b/paddle/fluid/inference/tensorrt/convert/preln_residual_bias.cc @@ -53,10 +53,10 @@ class PrelnResidualBiasOpConverter : public OpConverter { float* ele_bias = has_bias ? get_persistable_data("Bias", &ele_bias_dims) : nullptr; - int bias_size = phi::product(bias_dims); + int bias_size = common::product(bias_dims); - int scale_size = phi::product(scale_dims); - int ele_bias_size = has_bias ? phi::product(ele_bias_dims) : 0; + int scale_size = common::product(scale_dims); + int ele_bias_size = has_bias ? common::product(ele_bias_dims) : 0; float epsilon = PADDLE_GET_CONST(float, op_desc.GetAttr("ln_epsilon")); bool with_fp16 = engine_->WithFp16() && !engine_->disable_trt_plugin_fp16(); if (engine_->precision() == phi::DataType::INT8) { diff --git a/paddle/fluid/inference/tensorrt/convert/preln_skip_layernorm.cc b/paddle/fluid/inference/tensorrt/convert/preln_skip_layernorm.cc index f9d4be7c55f1a2..d21247e877cec4 100644 --- a/paddle/fluid/inference/tensorrt/convert/preln_skip_layernorm.cc +++ b/paddle/fluid/inference/tensorrt/convert/preln_skip_layernorm.cc @@ -57,8 +57,8 @@ class PrelnSkipLayerNormOpConverter : public OpConverter { framework::DDim bias_dims, scale_dims; auto* bias = get_persistable_data("Bias", &bias_dims); auto* scale = get_persistable_data("Scale", &scale_dims); - int bias_size = phi::product(bias_dims); - int scale_size = phi::product(scale_dims); + int bias_size = common::product(bias_dims); + int scale_size = common::product(scale_dims); nvinfer1::ILayer* layer = nullptr; diff --git a/paddle/fluid/inference/tensorrt/convert/prompt_tuning_emb_eltwise_layernorm.cc b/paddle/fluid/inference/tensorrt/convert/prompt_tuning_emb_eltwise_layernorm.cc index f5fc773135c565..9fc4c96ab7b93f 100644 --- a/paddle/fluid/inference/tensorrt/convert/prompt_tuning_emb_eltwise_layernorm.cc +++ b/paddle/fluid/inference/tensorrt/convert/prompt_tuning_emb_eltwise_layernorm.cc @@ -9,12 +9,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/common/ddim.h" #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/convert/utils.h" #include "paddle/fluid/inference/tensorrt/engine.h" #include "paddle/fluid/inference/tensorrt/helper.h" #include "paddle/fluid/inference/tensorrt/plugin/prompt_tuning_emb_layernorm_varseqlen_plugin.h" -#include "paddle/phi/core/ddim.h" namespace paddle { namespace inference { @@ -55,8 +55,8 @@ class PromptTuningEmbEltwiseLayerNormOpConverter : public OpConverter { framework::DDim bias_dims, scale_dims; TensorRTEngine::Weight bias_weight, scale_weight; - int64_t bias_size = phi::product(bias_dims); - int64_t scale_size = phi::product(scale_dims); + int64_t bias_size = common::product(bias_dims); + int64_t scale_size = common::product(scale_dims); bool enable_int8 = op_desc.HasAttr("enable_int8"); std::vector id_names = op_desc.Input("Ids"); @@ -80,8 +80,8 @@ class PromptTuningEmbEltwiseLayerNormOpConverter : public OpConverter { } bias_weight = GetWeight(op_desc.Input("Bias").front(), &bias_dims); scale_weight = GetWeight(op_desc.Input("Scale").front(), &scale_dims); - bias_size = phi::product(bias_dims); - scale_size = phi::product(scale_dims); + bias_size = common::product(bias_dims); + scale_size = common::product(scale_dims); // other_id(except pos_id) engine_->SetITensor("word_id", input_ids[1]); diff --git a/paddle/fluid/inference/tensorrt/convert/test_op_converter.cc b/paddle/fluid/inference/tensorrt/convert/test_op_converter.cc index b653df0bca83bc..7ef6d1f3241d8b 100644 --- a/paddle/fluid/inference/tensorrt/convert/test_op_converter.cc +++ b/paddle/fluid/inference/tensorrt/convert/test_op_converter.cc @@ -59,7 +59,7 @@ TEST(OpConverter, ConvertBlock) { std::vector dim_vec = {3, 2, 3, 3}; auto* x = scope.Var("conv2d-Y"); auto* x_tensor = x->GetMutable(); - x_tensor->Resize(phi::make_ddim(dim_vec)); + x_tensor->Resize(common::make_ddim(dim_vec)); x_tensor->mutable_data(platform::CUDAPlace(0)); OpTeller::Global().SetOpConverterType(conv2d_op, OpConverterType::Default); diff --git a/paddle/fluid/inference/tensorrt/convert/test_split_op.cc b/paddle/fluid/inference/tensorrt/convert/test_split_op.cc index 1d23aeedc5a8d7..738097190767a3 100644 --- a/paddle/fluid/inference/tensorrt/convert/test_split_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/test_split_op.cc @@ -28,20 +28,20 @@ void TensorRTSplitTest(const std::vector &in_shape, framework::Scope scope; TRTConvertValidation validator(BatchSize + 1, parameters, scope, 10000); - auto make_dim = [](const std::vector &shape) { + auto common::make_dim = [](const std::vector &shape) { nvinfer1::Dims3 dim; dim.c() = shape[0]; dim.h() = shape[1]; dim.w() = shape[2]; return dim; }; - validator.DeclInputVar("split_input", make_dim(in_shape)); + validator.DeclInputVar("split_input", common::make_dim(in_shape)); std::vector output_vars; for (size_t i = 0; i < sections.size(); ++i) { auto out_shape = in_shape; out_shape[Axis - 1] = sections[i]; std::string output_name = "split_out" + std::to_string(i); - validator.DeclOutputVar(output_name, make_dim(out_shape)); + validator.DeclOutputVar(output_name, common::make_dim(out_shape)); output_vars.push_back(output_name); } diff --git a/paddle/fluid/inference/tensorrt/convert/ut_helper.h b/paddle/fluid/inference/tensorrt/convert/ut_helper.h index 8a41d564a09da3..8901d0a43fd41b 100644 --- a/paddle/fluid/inference/tensorrt/convert/ut_helper.h +++ b/paddle/fluid/inference/tensorrt/convert/ut_helper.h @@ -131,7 +131,7 @@ class TRTConvertValidation { auto* x = scope_.Var(name); auto* x_tensor = x->GetMutable(); - x_tensor->Resize(phi::make_ddim(dim_vec)); + x_tensor->Resize(common::make_ddim(dim_vec)); RandomizeTensor(x_tensor, place_, ctx); } // Declare a variable in a fluid Scope. @@ -226,7 +226,7 @@ class TRTConvertValidation { size_t fluid_out_size = fluid_outs[index].size(); if (if_add_batch_ == true) { fluid_out_size = - batch_size * (phi::product(tensor->dims()) / max_batch_size_); + batch_size * (common::product(tensor->dims()) / max_batch_size_); } for (size_t i = 0; i < fluid_out_size; i++) { diff --git a/paddle/fluid/inference/tensorrt/dynamic_shape_infermeta_factory.h b/paddle/fluid/inference/tensorrt/dynamic_shape_infermeta_factory.h index 0196d81754fdd9..599b8557f8ad8a 100644 --- a/paddle/fluid/inference/tensorrt/dynamic_shape_infermeta_factory.h +++ b/paddle/fluid/inference/tensorrt/dynamic_shape_infermeta_factory.h @@ -17,10 +17,10 @@ #include #include +#include "paddle/common/macros.h" #include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/platform/macros.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/macros.h" #include "paddle/utils/flat_hash_map.h" namespace paddle { diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index 983b19ca4a8a12..dba6582eb36538 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -741,7 +741,7 @@ struct SimpleOpTypeSetTeller : public Teller { if (op_type == "affine_channel") { if (!desc.HasAttr("data_layout")) return false; - auto data_layout = phi::StringToDataLayout( + auto data_layout = common::StringToDataLayout( PADDLE_GET_CONST(std::string, desc.GetAttr("data_layout"))); if (data_layout != phi::DataLayout::kNCHW) return false; @@ -816,7 +816,7 @@ struct SimpleOpTypeSetTeller : public Teller { if (!desc.HasAttr(attr)) return false; } if (desc.HasAttr("data_layout")) { - auto data_layout = phi::StringToDataLayout( + auto data_layout = common::StringToDataLayout( PADDLE_GET_CONST(std::string, desc.GetAttr("data_layout"))); if (data_layout != phi::DataLayout::kNCHW && data_layout != phi::DataLayout::kNHWC) @@ -861,7 +861,7 @@ struct SimpleOpTypeSetTeller : public Teller { for (auto const& attr : attrs) { if (!desc.HasAttr(attr)) return false; } - auto data_layout = phi::StringToDataLayout( + auto data_layout = common::StringToDataLayout( PADDLE_GET_CONST(std::string, desc.GetAttr("data_layout"))); if (data_layout != phi::DataLayout::kNCHW && data_layout != phi::DataLayout::kNHWC) @@ -928,7 +928,7 @@ struct SimpleOpTypeSetTeller : public Teller { } } - auto data_layout = phi::StringToDataLayout( + auto data_layout = common::StringToDataLayout( PADDLE_GET_CONST(std::string, desc.GetAttr("data_layout"))); if (data_layout != phi::DataLayout::kNCHW && data_layout != phi::DataLayout::kNHWC) { diff --git a/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt b/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt index bfc9e6b9072daf..9f45d49aa420f2 100644 --- a/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt @@ -57,7 +57,11 @@ endif() nv_library( tensorrt_plugin SRCS ${TRT_FILES} - DEPS enforce tensorrt_engine tensor bert_encoder_functor + DEPS enforce + tensorrt_engine + tensor + bert_encoder_functor + common tensorrt_dynamic_shape_infermeta_factory tensorrt_plugin_arg_mapping_context) diff --git a/paddle/fluid/inference/tensorrt/plugin/elementwiseadd_transpose_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/elementwiseadd_transpose_op_plugin.cu index d2f373bca07de8..aee768b5df4b48 100644 --- a/paddle/fluid/inference/tensorrt/plugin/elementwiseadd_transpose_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/elementwiseadd_transpose_op_plugin.cu @@ -131,7 +131,7 @@ void ElementwiseAddTransposePluginDynamic::configurePlugin( if (x_numel <= 0) { return; } - ele_out_tensor_.Resize(phi::make_ddim(x_shape)); + ele_out_tensor_.Resize(common::make_ddim(x_shape)); paddle::platform::DeviceContextPool &pool = paddle::platform::DeviceContextPool::Instance(); platform::CUDAPlace place(platform::GetCurrentDeviceId()); @@ -139,20 +139,20 @@ void ElementwiseAddTransposePluginDynamic::configurePlugin( const phi::GPUContext &dev_ctx = *device_context; if (x_type == nvinfer1::DataType::kFLOAT) { - x_meta_ = - phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::make_ddim(x_shape)); - y_meta_ = - phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::make_ddim(y_shape)); - out_meta_ = - phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::make_ddim(out_shape)); + x_meta_ = phi::DenseTensorMeta(phi::DataType::FLOAT32, + common::make_ddim(x_shape)); + y_meta_ = phi::DenseTensorMeta(phi::DataType::FLOAT32, + common::make_ddim(y_shape)); + out_meta_ = phi::DenseTensorMeta(phi::DataType::FLOAT32, + common::make_ddim(out_shape)); dev_ctx.template Alloc(&ele_out_tensor_, x_numel * sizeof(float)); } else if (x_type == nvinfer1::DataType::kHALF) { - x_meta_ = - phi::DenseTensorMeta(phi::DataType::FLOAT16, phi::make_ddim(x_shape)); - y_meta_ = - phi::DenseTensorMeta(phi::DataType::FLOAT16, phi::make_ddim(y_shape)); - out_meta_ = - phi::DenseTensorMeta(phi::DataType::FLOAT16, phi::make_ddim(out_shape)); + x_meta_ = phi::DenseTensorMeta(phi::DataType::FLOAT16, + common::make_ddim(x_shape)); + y_meta_ = phi::DenseTensorMeta(phi::DataType::FLOAT16, + common::make_ddim(y_shape)); + out_meta_ = phi::DenseTensorMeta(phi::DataType::FLOAT16, + common::make_ddim(out_shape)); dev_ctx.template Alloc( &ele_out_tensor_, x_numel * sizeof(phi::dtype::float16)); } diff --git a/paddle/fluid/inference/tensorrt/plugin/generic_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/generic_plugin.cu index c56f8da2044737..ba91d96b7b59a2 100644 --- a/paddle/fluid/inference/tensorrt/plugin/generic_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/generic_plugin.cu @@ -593,7 +593,7 @@ int GenericPlugin::enqueue(const nvinfer1::PluginTensorDesc* input_desc, for (int k = 0; k < input_shape.size(); k++) input_numel *= input_shape[k]; auto data_type_and_size = nvType2PhiType(input_desc[i].type); phi::DenseTensorMeta input_meta(data_type_and_size.first, - phi::make_ddim(input_shape)); + common::make_ddim(input_shape)); std::shared_ptr input_alloc( new phi::Allocation((void*)(inputs[i]), // NOLINT input_numel * data_type_and_size.second, @@ -617,7 +617,7 @@ int GenericPlugin::enqueue(const nvinfer1::PluginTensorDesc* input_desc, auto data_type_and_size = nvType2PhiType(output_desc[i].type); phi::DenseTensorMeta output_meta(data_type_and_size.first, - phi::make_ddim(output_shape)); + common::make_ddim(output_shape)); std::shared_ptr output_alloc( new phi::Allocation(reinterpret_cast(outputs[i]), output_numel * data_type_and_size.second, diff --git a/paddle/fluid/inference/tensorrt/plugin/group_norm_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/group_norm_op_plugin.cu index 4d5517ef111ed4..c9e56f1d63823d 100644 --- a/paddle/fluid/inference/tensorrt/plugin/group_norm_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/group_norm_op_plugin.cu @@ -18,8 +18,8 @@ limitations under the License. */ #include "paddle/phi/kernels/group_norm_kernel.h" #include +#include "paddle/common/layout.h" #include "paddle/phi/backends/gpu/gpu_context.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/kernels/funcs/math_function.h" namespace paddle { @@ -395,7 +395,7 @@ int GroupNormPlugin::enqueue(int batch_size, for (int i = 0; i < input_dims.nbDims; i++) { input_shape.push_back(input_dims.d[i]); } - const auto input_ddim = phi::make_ddim(input_shape); + const auto input_ddim = common::make_ddim(input_shape); int C = input_shape[1]; @@ -578,7 +578,7 @@ int GroupNormPluginDynamic::enqueue( input_shape.push_back(input_dims.d[i]); } - const auto input_ddim = phi::make_ddim(input_shape); + const auto input_ddim = common::make_ddim(input_shape); int C = input_shape[1]; int image_size = input_shape[2] * input_shape[3]; diff --git a/paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.cu index 82e24bea09aaca..fe666415c6c00d 100644 --- a/paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.cu @@ -75,8 +75,8 @@ int InstanceNormPlugin::enqueue(int batch_size, int h = input_dims.d[1]; int w = input_dims.d[2]; - scale_t.Resize(phi::make_ddim({batch_size, c})); - bias_t.Resize(phi::make_ddim({batch_size, c})); + scale_t.Resize(common::make_ddim({batch_size, c})); + bias_t.Resize(common::make_ddim({batch_size, c})); int device_id; cudaGetDevice(&device_id); float *scale_d = scale_t.mutable_data(platform::CUDAPlace(device_id)); @@ -170,8 +170,8 @@ int InstanceNormPluginDynamic::enqueue( int h = input_dims.d[2]; int w = input_dims.d[3]; - scale_t.Resize(phi::make_ddim({n, c})); - bias_t.Resize(phi::make_ddim({n, c})); + scale_t.Resize(common::make_ddim({n, c})); + bias_t.Resize(common::make_ddim({n, c})); int device_id; cudaGetDevice(&device_id); float *scale_d = scale_t.mutable_data(platform::CUDAPlace(device_id)); diff --git a/paddle/fluid/inference/tensorrt/plugin/layer_norm_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/layer_norm_op_plugin.cu index 09a93d661bd351..da26780c975a11 100644 --- a/paddle/fluid/inference/tensorrt/plugin/layer_norm_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/layer_norm_op_plugin.cu @@ -107,8 +107,8 @@ int LayerNormPlugin::enqueue(int batch_size, for (int i = 0; i < input_dims.nbDims; i++) { input_shape.push_back(input_dims.d[i]); } - const auto input_ddim = phi::make_ddim(input_shape); - auto matrix_dim = phi::flatten_to_2d(input_ddim, begin_norm_axis); + const auto input_ddim = common::make_ddim(input_shape); + auto matrix_dim = common::flatten_to_2d(input_ddim, begin_norm_axis); int feature_size = static_cast(matrix_dim[1]); PADDLE_ENFORCE_EQ(feature_size, scale_.size(), @@ -127,8 +127,8 @@ int LayerNormPlugin::enqueue(int batch_size, int device_id; cudaGetDevice(&device_id); - mean_t.Resize(phi::make_ddim({batched_mean_shape})); - variance_t.Resize(phi::make_ddim({batched_variance_shape})); + mean_t.Resize(common::make_ddim({batched_mean_shape})); + variance_t.Resize(common::make_ddim({batched_variance_shape})); float *mean_d = mean_t.mutable_data(platform::CUDAPlace(device_id)); float *variance_d = variance_t.mutable_data(platform::CUDAPlace(device_id)); @@ -309,8 +309,8 @@ int LayerNormPluginDynamic::enqueue( "but got:%d", variance_shape_[0])); - const auto input_ddim = phi::make_ddim(input_shape); - auto matrix_dim = phi::flatten_to_2d(input_ddim, begin_norm_axis); + const auto input_ddim = common::make_ddim(input_shape); + auto matrix_dim = common::flatten_to_2d(input_ddim, begin_norm_axis); int feature_size = static_cast(matrix_dim[1]); PADDLE_ENFORCE_EQ(feature_size, scale_.size(), @@ -329,8 +329,8 @@ int LayerNormPluginDynamic::enqueue( int device_id; cudaGetDevice(&device_id); - mean_t.Resize(phi::make_ddim(mean_shape_)); - variance_t.Resize(phi::make_ddim(variance_shape_)); + mean_t.Resize(common::make_ddim(mean_shape_)); + variance_t.Resize(common::make_ddim(variance_shape_)); float *mean_d = mean_t.mutable_data(platform::CUDAPlace(device_id)); float *variance_d = variance_t.mutable_data(platform::CUDAPlace(device_id)); diff --git a/paddle/fluid/inference/tensorrt/plugin/preln_groupnorm_act_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/preln_groupnorm_act_op_plugin.cu index 01a91662c2f251..7ccf5d8a8a1bc7 100644 --- a/paddle/fluid/inference/tensorrt/plugin/preln_groupnorm_act_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/preln_groupnorm_act_op_plugin.cu @@ -16,8 +16,8 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/plugin/preln_groupnorm_act_op_plugin.h" #include +#include "paddle/common/layout.h" #include "paddle/phi/backends/gpu/gpu_context.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/kernels/funcs/math_function.h" namespace paddle { diff --git a/paddle/fluid/inference/tensorrt/plugin/skip_groupnorm_act_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/skip_groupnorm_act_op_plugin.cu index 45bd8688da18b4..95c408fa859251 100644 --- a/paddle/fluid/inference/tensorrt/plugin/skip_groupnorm_act_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/skip_groupnorm_act_op_plugin.cu @@ -16,8 +16,8 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/plugin/skip_groupnorm_act_op_plugin.h" #include +#include "paddle/common/layout.h" #include "paddle/phi/backends/gpu/gpu_context.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/kernels/funcs/math_function.h" namespace paddle { diff --git a/paddle/fluid/inference/tensorrt/plugin/trans_layernorm_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/trans_layernorm_op_plugin.cu index a9177ee2d8f6ae..7fd486c30acc40 100644 --- a/paddle/fluid/inference/tensorrt/plugin/trans_layernorm_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/trans_layernorm_op_plugin.cu @@ -337,7 +337,7 @@ int TransLayerNormPluginDynamic::enqueue( std::vector trans_result_shape{ input_shape[0], input_shape[2], input_shape[3], input_shape[1]}; - const auto input_ddim = phi::make_ddim(input_shape); + const auto input_ddim = common::make_ddim(input_shape); int feature_size = static_cast(input_ddim[1]); PADDLE_ENFORCE_EQ(feature_size, scale_.size(), @@ -371,8 +371,8 @@ int TransLayerNormPluginDynamic::enqueue( auto *device_context = static_cast(pool.Get(place)); const phi::GPUContext &dev_ctx = *device_context; - mean_t.Resize(phi::make_ddim(mean_shape_)); - variance_t.Resize(phi::make_ddim(variance_shape_)); + mean_t.Resize(common::make_ddim(mean_shape_)); + variance_t.Resize(common::make_ddim(variance_shape_)); float *mean_d = dev_ctx.template Alloc(&mean_t, mean_shape_[0] * sizeof(float)); float *variance_d = dev_ctx.template Alloc( @@ -388,15 +388,15 @@ int TransLayerNormPluginDynamic::enqueue( int trans_result_numel = input_numel; int norm_result_numel = input_numel; phi::DenseTensorMeta input_meta(phi::DataType::FLOAT32, - phi::make_ddim(input_shape)); + common::make_ddim(input_shape)); phi::DenseTensorMeta bias_meta(phi::DataType::FLOAT32, - phi::make_ddim({feature_size})); + common::make_ddim({feature_size})); phi::DenseTensorMeta scale_meta(phi::DataType::FLOAT32, - phi::make_ddim({feature_size})); - phi::DenseTensorMeta trans_result_meta(phi::DataType::FLOAT32, - phi::make_ddim(trans_result_shape)); - phi::DenseTensorMeta norm_result_meta(phi::DataType::FLOAT32, - phi::make_ddim(trans_result_shape)); + common::make_ddim({feature_size})); + phi::DenseTensorMeta trans_result_meta( + phi::DataType::FLOAT32, common::make_ddim(trans_result_shape)); + phi::DenseTensorMeta norm_result_meta( + phi::DataType::FLOAT32, common::make_ddim(trans_result_shape)); std::shared_ptr input_alloc(new phi::Allocation( static_cast(const_cast(input)), // NOLINT input_numel * sizeof(float), @@ -446,13 +446,13 @@ int TransLayerNormPluginDynamic::enqueue( if (input_desc[0].format == nvinfer1::PluginFormat::kLINEAR) { VLOG(1) << "TRT Plugin format selected. trans_layernorm-->kLINEAR"; phi::DenseTensorMeta input_meta(phi::DataType::FLOAT16, - phi::make_ddim(input_shape)); + common::make_ddim(input_shape)); std::shared_ptr input_alloc(new phi::Allocation( static_cast(const_cast(input)), // NOLINT input_numel * sizeof(half), place)); phi::DenseTensorMeta trans_result_meta( - phi::DataType::FLOAT16, phi::make_ddim(trans_result_shape)); + phi::DataType::FLOAT16, common::make_ddim(trans_result_shape)); std::shared_ptr trans_result_alloc( new phi::Allocation(static_cast(dst), // NOLINT trans_result_numel * sizeof(half), diff --git a/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc b/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc index 8d2eb4cb4919b4..b565df0ec3d8cd 100644 --- a/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc +++ b/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc @@ -15,11 +15,11 @@ limitations under the License. */ #include #include +#include "paddle/common/layout.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/engine.h" #include "paddle/phi/common/data_type.h" -#include "paddle/phi/common/layout.h" #if PADDLE_WITH_CUSPARSELT && IS_TRT_VERSION_GE(8000) #include "paddle/fluid/inference/tensorrt/plugin/spmm_plugin.h" #endif @@ -86,7 +86,7 @@ class TensorRTDynamicShapeValueEngineTest : public ::testing::Test { void PrepareInputOutput(const std::vector &input, std::vector output_shape) { paddle::framework::TensorFromVector(input, *ctx_, &input_); - output_.Resize(phi::make_ddim(output_shape)); + output_.Resize(common::make_ddim(output_shape)); } void PrepareShapeInput(const std::vector &input) { paddle::framework::TensorFromVector(input, *ctx_, &shape_); @@ -202,7 +202,7 @@ class TensorRTDynamicEngineTest : public ::testing::Test { void PrepareInputOutput(const std::vector &input, std::vector output_shape) { paddle::framework::TensorFromVector(input, *ctx_, &input_); - output_.Resize(phi::make_ddim(output_shape)); + output_.Resize(common::make_ddim(output_shape)); } void GetOutput(std::vector *output) { @@ -377,7 +377,7 @@ class TensorRTDynamicTestFusedTokenPrune : public ::testing::Test { paddle::framework::TensorFromVector(inputs[i], *ctx_, &inputs_[i]); } for (int i = 0; i < num_outputs; ++i) { - outputs_[i].Resize(phi::make_ddim(output_shapes[i])); + outputs_[i].Resize(common::make_ddim(output_shapes[i])); } } @@ -573,7 +573,7 @@ class TensorRTDynamicTestFusedTokenPruneHalf : public ::testing::Test { paddle::framework::TensorFromVector(inputs[i], *ctx_, &inputs_[i]); } for (int i = 0; i < num_outputs; ++i) { - outputs_[i].Resize(phi::make_ddim(output_shapes[i])); + outputs_[i].Resize(common::make_ddim(output_shapes[i])); } } diff --git a/paddle/fluid/inference/tensorrt/test_engine.cc b/paddle/fluid/inference/tensorrt/test_engine.cc index 256af16fb155fc..8d64d0d0891445 100644 --- a/paddle/fluid/inference/tensorrt/test_engine.cc +++ b/paddle/fluid/inference/tensorrt/test_engine.cc @@ -59,7 +59,7 @@ class TensorRTEngineTest : public ::testing::Test { void PrepareInputOutput(const std::vector &input, std::vector output_shape) { paddle::framework::TensorFromVector(input, *ctx_, &input_); - output_.Resize(phi::make_ddim(output_shape)); + output_.Resize(common::make_ddim(output_shape)); } void GetOutput(std::vector *output) { diff --git a/paddle/fluid/inference/tensorrt/trt_int8_calibrator.cc b/paddle/fluid/inference/tensorrt/trt_int8_calibrator.cc index 176738ce6295e0..3cb30da55e407e 100644 --- a/paddle/fluid/inference/tensorrt/trt_int8_calibrator.cc +++ b/paddle/fluid/inference/tensorrt/trt_int8_calibrator.cc @@ -37,7 +37,7 @@ TRTInt8Calibrator::TRTInt8Calibrator( std::string input_name = it.first; int data_size = it.second; int num_ele = data_size / sizeof(int16_t); - framework::DDim data_shape = phi::make_ddim({num_ele}); + framework::DDim data_shape = common::make_ddim({num_ele}); temp_tensor.Resize(data_shape); data_tensors_.push_back(temp_tensor); data_buffers_[input_name] = std::pair( diff --git a/paddle/fluid/inference/utils/CMakeLists.txt b/paddle/fluid/inference/utils/CMakeLists.txt index 5804a637574f11..46b74a60ad4449 100644 --- a/paddle/fluid/inference/utils/CMakeLists.txt +++ b/paddle/fluid/inference/utils/CMakeLists.txt @@ -1,7 +1,7 @@ cc_library( benchmark SRCS benchmark.cc - DEPS enforce) + DEPS enforce common) paddle_test(test_benchmark SRCS benchmark_tester.cc DEPS benchmark) cc_library( infer_io_utils @@ -10,7 +10,7 @@ cc_library( cc_library( model_utils SRCS model_utils.cc - DEPS proto_desc enforce) + DEPS proto_desc enforce common) cc_test_old( infer_io_utils_tester diff --git a/paddle/fluid/ir_adaptor/translator/attribute_translator.cc b/paddle/fluid/ir_adaptor/translator/attribute_translator.cc index ebb58cc0ebf61d..928087c8cb8d8c 100644 --- a/paddle/fluid/ir_adaptor/translator/attribute_translator.cc +++ b/paddle/fluid/ir_adaptor/translator/attribute_translator.cc @@ -17,14 +17,14 @@ #include #include +#include "paddle/common/enforce.h" +#include "paddle/common/layout.h" #include "paddle/fluid/pir/dialect/operator/ir/op_attribute.h" #include "paddle/phi/common/data_type.h" #include "paddle/phi/common/int_array.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/common/place.h" #include "paddle/phi/common/scalar.h" #include "paddle/phi/core/utils/data_type.h" -#include "paddle/pir/core/enforce.h" #include "paddle/utils/variant.h" namespace paddle { diff --git a/paddle/fluid/ir_adaptor/translator/op_translator.cc b/paddle/fluid/ir_adaptor/translator/op_translator.cc index 3c21bffebabdbb..8db987eb20fd70 100644 --- a/paddle/fluid/ir_adaptor/translator/op_translator.cc +++ b/paddle/fluid/ir_adaptor/translator/op_translator.cc @@ -22,6 +22,7 @@ #include #include +#include "paddle/common/enforce.h" #include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/ir_adaptor/translator/attribute_translator.h" #include "paddle/fluid/ir_adaptor/translator/op_compat_info.h" @@ -37,7 +38,6 @@ #include "paddle/pir/core/builder.h" #include "paddle/pir/core/builtin_op.h" #include "paddle/pir/core/builtin_type.h" -#include "paddle/pir/core/enforce.h" #include "paddle/pir/core/ir_context.h" #include "paddle/pir/core/operation.h" #include "paddle/pir/core/value.h" @@ -980,7 +980,7 @@ pir::OpResult TranslateDropOutStateIn(pir::IrContext* ctx, pir::Builder builder(ctx, block); dialect::FullOp full_op = builder.Build( - phi::vectorize(tensor_type.dims()), + common::vectorize(tensor_type.dims()), 0.0f, dialect::TransToPhiDataType(tensor_type.dtype()), phi::CPUPlace()); @@ -1331,7 +1331,7 @@ ValueInfo GetTensorInfoByVarName(const OpDesc& op_desc, dialect::DenseTensorType tensor_type = type.dyn_cast(); - std::vector shape = phi::vectorize(tensor_type.dims()); + std::vector shape = common::vectorize(tensor_type.dims()); return std::make_tuple(shape, tensor_type, value); } @@ -1416,7 +1416,7 @@ struct MulOpTranscriber : public OpTranscriber { builder.Build(x_value, x_new_shape); pir::OpResult x_new = reshape_op_x.out(); VLOG(6) << "[" << op_desc.Type() << "] x_shape change from " - << x_tensor_type.dims() << " to " << phi::make_ddim(x_new_shape); + << x_tensor_type.dims() << " to " << common::make_ddim(x_new_shape); std::vector y_new_shape( {std::max(std::accumulate(y_shape.begin(), @@ -1434,7 +1434,7 @@ struct MulOpTranscriber : public OpTranscriber { builder.Build(y_value, y_new_shape); pir::OpResult y_new = reshape_op_y.out(); VLOG(6) << "[" << op_desc.Type() << "] y_shape change from " - << y_tensor_type.dims() << " to " << phi::make_ddim(y_new_shape); + << y_tensor_type.dims() << " to " << common::make_ddim(y_new_shape); return {x_new, y_new}; } @@ -1482,7 +1482,7 @@ struct MulOpTranscriber : public OpTranscriber { pir::OpResult out_new = reshape_op_out.out().dyn_cast(); VLOG(6) << "[" << op_desc.Type() << "] out_shape change from " << out_tensor_type.dims() << " to " - << phi::make_ddim(out_new_shape); + << common::make_ddim(out_new_shape); param_map->PushValue(output_name, VariableDefiningInfo(out_new, false, -1)); @@ -1579,7 +1579,7 @@ struct MulGradOpTranscriber : public OpTranscriber { builder.Build(x_value, x_new_shape); pir::OpResult x_new = reshape_op_x.out(); VLOG(6) << "[" << op_desc.Type() << "] x_shape change from " - << x_tensor_type.dims() << " to " << phi::make_ddim(x_new_shape); + << x_tensor_type.dims() << " to " << common::make_ddim(x_new_shape); std::vector y_new_shape( {std::max(std::accumulate(y_shape.begin(), @@ -1597,7 +1597,7 @@ struct MulGradOpTranscriber : public OpTranscriber { builder.Build(y_value, y_new_shape); pir::OpResult y_new = reshape_op_y.out(); VLOG(6) << "[" << op_desc.Type() << "] y_shape change from " - << y_tensor_type.dims() << " to " << phi::make_ddim(y_new_shape); + << y_tensor_type.dims() << " to " << common::make_ddim(y_new_shape); std::vector out_grad_new_shape( {x_new_shape.front(), y_new_shape.back()}); @@ -1607,7 +1607,7 @@ struct MulGradOpTranscriber : public OpTranscriber { pir::OpResult out_grad_new = reshape_op_out_grad.out(); VLOG(6) << "[" << op_desc.Type() << "] out_grad_shape change from " << out_grad_tensor_type.dims() << " to " - << phi::make_ddim(out_grad_new_shape); + << common::make_ddim(out_grad_new_shape); return {x_new, y_new, out_grad_new}; } @@ -1653,7 +1653,7 @@ struct MulGradOpTranscriber : public OpTranscriber { op_desc.Type(), var_name.substr(0, 1)); std::vector shape = var_desc->GetShape(); - DenseTensorTypeStorage::Dim dim = phi::make_ddim(shape); + DenseTensorTypeStorage::Dim dim = common::make_ddim(shape); pir::OpResult value_res = operation->result(idx_in_op); auto reshape_op = builder.Build(value_res, shape); @@ -2016,7 +2016,7 @@ struct ElementwiseTranscriber : public OpTranscriber { x_type); dialect::DenseTensorType x_tensor_type = x_type.dyn_cast(); - std::vector x_shape = phi::vectorize(x_tensor_type.dims()); + std::vector x_shape = common::vectorize(x_tensor_type.dims()); auto y_names = op_desc.Input("Y", true); IR_ENFORCE(y_names.size() == 1, @@ -2047,7 +2047,7 @@ struct ElementwiseTranscriber : public OpTranscriber { y_type); dialect::DenseTensorType y_tensor_type = y_type.dyn_cast(); - std::vector y_shape = phi::vectorize(y_tensor_type.dims()); + std::vector y_shape = common::vectorize(y_tensor_type.dims()); if (axis < 0) { axis += static_cast(x_shape.size()); @@ -2075,7 +2075,8 @@ struct ElementwiseTranscriber : public OpTranscriber { builder.Build(y_value, y_new_shape); y_new = reshape_op.out(); VLOG(6) << "[" << op_desc.Type() << "] y_shape change from " - << y_tensor_type.dims() << " to " << phi::make_ddim(y_new_shape); + << y_tensor_type.dims() << " to " + << common::make_ddim(y_new_shape); } else { auto shape_op = builder.Build(y_value); auto append_shape_op = builder.Build( @@ -2182,7 +2183,7 @@ struct ElementwiseGradTranscriber : public OpTranscriber { return; } - std::vector y_shape = phi::vectorize(y_tensor_type.dims()); + std::vector y_shape = common::vectorize(y_tensor_type.dims()); pir::Builder builder(ctx, operation->GetParent()); auto reshape_op = builder.Build(value, y_shape); param_map->PushValue(y_grad_var_name, @@ -2400,7 +2401,7 @@ struct RandIntOpTranscriber : public OpTranscriber { pir::Type dtype = type_translator[var_type](ctx, *var); paddle::dialect::DenseTensorTypeStorage::Dim dim = - phi::make_ddim(var->GetShape()); + common::make_ddim(var->GetShape()); paddle::dialect::DenseTensorTypeStorage::DataLayout layout = paddle::dialect::DenseTensorTypeStorage::DataLayout::UNDEFINED; paddle::dialect::DenseTensorTypeStorage::LoD lod = {}; diff --git a/paddle/fluid/ir_adaptor/translator/program_translator.cc b/paddle/fluid/ir_adaptor/translator/program_translator.cc index 468f1f6b1d0282..9f8f2259550a84 100644 --- a/paddle/fluid/ir_adaptor/translator/program_translator.cc +++ b/paddle/fluid/ir_adaptor/translator/program_translator.cc @@ -17,6 +17,7 @@ #include #include "glog/logging.h" +#include "paddle/common/enforce.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/var_desc.h" #include "paddle/fluid/ir_adaptor/translator/attribute_translator.h" @@ -33,7 +34,6 @@ #include "paddle/pir/core/builtin_attribute.h" #include "paddle/pir/core/builtin_op.h" #include "paddle/pir/core/builtin_type.h" -#include "paddle/pir/core/enforce.h" #include "paddle/pir/core/operation.h" #include "paddle/pir/core/value.h" #include "paddle/pir/dialect/control_flow/ir/cf_dialect.h" diff --git a/paddle/fluid/ir_adaptor/translator/type_translator.cc b/paddle/fluid/ir_adaptor/translator/type_translator.cc index b251ff5ae45da4..8a9a26373f085d 100644 --- a/paddle/fluid/ir_adaptor/translator/type_translator.cc +++ b/paddle/fluid/ir_adaptor/translator/type_translator.cc @@ -88,7 +88,8 @@ TypeTranslator::TypeTranslator() { pir::Type dtype = this->operator[](var_desc.GetDataType())(ctx, var_desc); - DenseTensorTypeStorage::Dim dim = phi::make_ddim(var_desc.GetShape()); + DenseTensorTypeStorage::Dim dim = + common::make_ddim(var_desc.GetShape()); DenseTensorTypeStorage::DataLayout layout = DenseTensorTypeStorage::DataLayout::UNDEFINED; DenseTensorTypeStorage::LoD lod = {}; @@ -114,7 +115,8 @@ TypeTranslator::TypeTranslator() { pir::Type dtype = this->operator[](var_desc.GetDataType())(ctx, var_desc); - SelectedRowsTypeStorage::Dim dim = phi::make_ddim(var_desc.GetShape()); + SelectedRowsTypeStorage::Dim dim = + common::make_ddim(var_desc.GetShape()); SelectedRowsTypeStorage::DataLayout layout = SelectedRowsTypeStorage::DataLayout::UNDEFINED; SelectedRowsTypeStorage::LoD lod = {}; diff --git a/paddle/fluid/ir_adaptor/translator/utils.cc b/paddle/fluid/ir_adaptor/translator/utils.cc index 7f50115c5c578e..ebba4428220f70 100644 --- a/paddle/fluid/ir_adaptor/translator/utils.cc +++ b/paddle/fluid/ir_adaptor/translator/utils.cc @@ -16,12 +16,12 @@ #include +#include "paddle/common/enforce.h" #include "paddle/fluid/ir_adaptor/translator/op_translator.h" #include "paddle/fluid/pir/dialect/operator/ir/op_dialect.h" #include "paddle/fluid/pir/dialect/operator/ir/pd_op.h" #include "paddle/pir/core/builtin_attribute.h" #include "paddle/pir/core/builtin_type.h" -#include "paddle/pir/core/enforce.h" #include "paddle/pir/core/utils.h" namespace paddle { diff --git a/paddle/fluid/jit/layer.cc b/paddle/fluid/jit/layer.cc index d6986b51306ebd..0b2e20f77837a2 100644 --- a/paddle/fluid/jit/layer.cc +++ b/paddle/fluid/jit/layer.cc @@ -14,9 +14,9 @@ #include "paddle/fluid/jit/layer.h" +#include "paddle/common/errors.h" #include "paddle/fluid/framework/variable.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #include "paddle/fluid/jit/compilation_unit.h" #include "paddle/fluid/jit/engine/base_engine.h" diff --git a/paddle/fluid/jit/property.cc b/paddle/fluid/jit/property.cc index 9b0c50a954624c..687468df83a3dc 100644 --- a/paddle/fluid/jit/property.cc +++ b/paddle/fluid/jit/property.cc @@ -18,10 +18,10 @@ limitations under the License. */ #include "glog/logging.h" +#include "paddle/common/errors.h" #include "paddle/fluid/framework/variable.h" #include "paddle/fluid/jit/property.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" namespace paddle { namespace jit { diff --git a/paddle/fluid/memory/CMakeLists.txt b/paddle/fluid/memory/CMakeLists.txt index d0f131ec931156..5b49d927ae6762 100644 --- a/paddle/fluid/memory/CMakeLists.txt +++ b/paddle/fluid/memory/CMakeLists.txt @@ -9,7 +9,7 @@ endif() cc_library( malloc SRCS malloc.cc - DEPS place enforce allocator profiler ${MKLDNN_CTX_DEPS}) + DEPS place enforce common allocator profiler ${MKLDNN_CTX_DEPS}) cc_library( memcpy SRCS memcpy.cc @@ -17,7 +17,7 @@ cc_library( cc_library( stats SRCS stats.cc - DEPS enforce) + DEPS enforce common) cc_library(memory DEPS malloc memcpy stats) cc_test( diff --git a/paddle/fluid/memory/allocation/CMakeLists.txt b/paddle/fluid/memory/allocation/CMakeLists.txt index 21ffde20022afc..ffce57d78f1642 100644 --- a/paddle/fluid/memory/allocation/CMakeLists.txt +++ b/paddle/fluid/memory/allocation/CMakeLists.txt @@ -1,6 +1,6 @@ include(ExternalProject) -set(ALLOCATOR_DEPS place stats profiler phi device_context) +set(ALLOCATOR_DEPS place stats profiler phi common device_context) set(ALLOCATOR_SRCS allocator.cc cpu_allocator.cc @@ -32,7 +32,7 @@ if(WITH_GPU OR WITH_ROCM) endif() if(WITH_GPU) - list(APPEND ALLOCATOR_DEPS phi) + list(APPEND ALLOCATOR_DEPS phi common) endif() if(CUDA_VERSION VERSION_GREATER_EQUAL 10.2) diff --git a/paddle/fluid/memory/allocation/allocator_facade.cc b/paddle/fluid/memory/allocation/allocator_facade.cc index 6af73d8f48958d..d469b8e278f64d 100644 --- a/paddle/fluid/memory/allocation/allocator_facade.cc +++ b/paddle/fluid/memory/allocation/allocator_facade.cc @@ -14,6 +14,7 @@ #include "paddle/fluid/memory/allocation/allocator_facade.h" +#include "paddle/common/macros.h" #include "paddle/fluid/memory/allocation/aligned_allocator.h" #include "paddle/fluid/memory/allocation/allocator.h" #include "paddle/fluid/memory/allocation/allocator_strategy.h" @@ -25,7 +26,6 @@ #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/place.h" -#include "paddle/phi/core/macros.h" #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #include diff --git a/paddle/fluid/memory/stats.cc b/paddle/fluid/memory/stats.cc index e18646f0e82bf9..fd61c4f2c39524 100644 --- a/paddle/fluid/memory/stats.cc +++ b/paddle/fluid/memory/stats.cc @@ -14,9 +14,9 @@ limitations under the License. */ #include "paddle/fluid/memory/stats.h" +#include "paddle/common/macros.h" #include "paddle/fluid/memory/allocation/spin_lock.h" #include "paddle/fluid/platform/flags.h" -#include "paddle/phi/core/macros.h" PADDLE_DEFINE_EXPORTED_bool( log_memory_stats, diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index 60efffc107dccd..fe5fae7bafaebb 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -90,7 +90,7 @@ if(WITH_UNITY_BUILD) include(unity_build_rule.cmake) endif() -set(OP_HEADER_DEPS ${OP_HEADER_DEPS} phi phi_utils static_prim_api get_expected_kernel_func) +set(OP_HEADER_DEPS ${OP_HEADER_DEPS} phi common phi_utils static_prim_api get_expected_kernel_func) register_operators(EXCLUDES py_func_op dgc_op generated_op1 generated_op2 generated_op3 generated_op4 load_combine_op lstm_op run_program_op quantize_linear_op recurrent_op save_combine_op sparse_attention_op sync_batch_norm_op activation_op ${OP_MKL_DEPS} DEPS ${OP_HEADER_DEPS} processgroup_comm_utils) @@ -98,8 +98,8 @@ register_operators(EXCLUDES py_func_op dgc_op generated_op1 generated_op2 genera op_library(generated_op UNITY SRCS generated_op1.cc generated_op2.cc generated_op3.cc generated_op4.cc DEPS ${OP_HEADER_DEPS}) op_library(run_program_op DEPS executor_cache ${OP_HEADER_DEPS}) target_link_libraries(run_program_op cuda_graph_with_memory_pool) -op_library(quantize_linear_op DEPS phi) -op_library(save_combine_op DEPS string_array phi) +op_library(quantize_linear_op DEPS phi common) +op_library(save_combine_op DEPS string_array phi common) op_library(load_combine_op DEPS string_array) if (WITH_GPU OR WITH_ROCM) @@ -141,9 +141,9 @@ if (WITH_DGC) endif() cc_library(common_infer_shape_functions SRCS common_infer_shape_functions.cc DEPS operator) -cc_library(ops_extra_info SRCS ops_extra_info.cc DEPS attribute phi) +cc_library(ops_extra_info SRCS ops_extra_info.cc DEPS attribute phi common) -set(COMMON_OP_DEPS ${COMMON_OP_DEPS} phi) +set(COMMON_OP_DEPS ${COMMON_OP_DEPS} phi common) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} selected_rows_utils lod_tensor unpooling lod_rank_table context_project executor static_prim_api) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} dynload_warpctc static_prim_api static_utils static_global_utils prim_utils) @@ -191,7 +191,7 @@ endif() copy_if_different(${pybind_file} ${pybind_file_final}) if (WITH_CUSTOM_DEVICE) -cc_library(custom_device_common_op_registry SRCS custom_device_common_op_registry.cc DEPS operator phi type_info) +cc_library(custom_device_common_op_registry SRCS custom_device_common_op_registry.cc DEPS operator phi common type_info) endif() if(NOT "${OP_LIST}" STREQUAL "") diff --git a/paddle/fluid/operators/affine_channel_op.cc b/paddle/fluid/operators/affine_channel_op.cc index 137249a30d4553..f44c181cca0977 100644 --- a/paddle/fluid/operators/affine_channel_op.cc +++ b/paddle/fluid/operators/affine_channel_op.cc @@ -70,8 +70,8 @@ class AffineChannelOp : public framework::OperatorWithKernel { auto x_dims = ctx->GetInputDim("X"); auto scale_dims = ctx->GetInputDim("Scale"); auto b_dims = ctx->GetInputDim("Bias"); - const phi::DataLayout data_layout = - phi::StringToDataLayout(ctx->Attrs().Get("data_layout")); + const phi::DataLayout data_layout = common::StringToDataLayout( + ctx->Attrs().Get("data_layout")); const int64_t C = (data_layout == phi::DataLayout::kNCHW ? x_dims[1] @@ -196,7 +196,7 @@ class AffineChannelKernel : public framework::OpKernel { y->mutable_data(ctx.GetPlace()); const phi::DataLayout layout = - phi::StringToDataLayout(ctx.Attr("data_layout")); + common::StringToDataLayout(ctx.Attr("data_layout")); auto dims = x->dims(); int N = static_cast(dims[0]); @@ -243,7 +243,7 @@ class AffineChannelGradKernel : public framework::OpKernel { auto* dbias = ctx.Output(framework::GradVarName("Bias")); const phi::DataLayout layout = - phi::StringToDataLayout(ctx.Attr("data_layout")); + common::StringToDataLayout(ctx.Attr("data_layout")); auto dims = x->dims(); int N = static_cast(dims[0]); diff --git a/paddle/fluid/operators/affine_channel_op.cu b/paddle/fluid/operators/affine_channel_op.cu index 6ec8d77da2c856..a07f311c6125ef 100644 --- a/paddle/fluid/operators/affine_channel_op.cu +++ b/paddle/fluid/operators/affine_channel_op.cu @@ -60,7 +60,7 @@ class AffineChannelCUDAKernel : public framework::OpKernel { y->mutable_data(ctx.GetPlace()); const phi::DataLayout layout = - phi::StringToDataLayout(ctx.Attr("data_layout")); + common::StringToDataLayout(ctx.Attr("data_layout")); auto& dev_ctx = ctx.template device_context(); auto dims = x->dims(); @@ -147,7 +147,7 @@ class AffineChannelGradCUDAKernel : public framework::OpKernel { auto* dbias = ctx.Output(framework::GradVarName("Bias")); const phi::DataLayout layout = - phi::StringToDataLayout(ctx.Attr("data_layout")); + common::StringToDataLayout(ctx.Attr("data_layout")); auto& dev_ctx = ctx.template device_context(); auto dims = dy->dims(); diff --git a/paddle/fluid/operators/affine_channel_op_xpu.cc b/paddle/fluid/operators/affine_channel_op_xpu.cc index 944a516f6c8f43..799bb87cf9892b 100644 --- a/paddle/fluid/operators/affine_channel_op_xpu.cc +++ b/paddle/fluid/operators/affine_channel_op_xpu.cc @@ -37,7 +37,7 @@ class AffineChannelXPUKernel : public framework::OpKernel { y->mutable_data(ctx.GetPlace()); const phi::DataLayout layout = - phi::StringToDataLayout(ctx.Attr("data_layout")); + common::StringToDataLayout(ctx.Attr("data_layout")); auto dims = x->dims(); int N = dims[0]; @@ -99,7 +99,7 @@ class AffineChannelGradXPUKernel : public framework::OpKernel { auto* dbias = ctx.Output(framework::GradVarName("Bias")); const phi::DataLayout layout = - phi::StringToDataLayout(ctx.Attr("data_layout")); + common::StringToDataLayout(ctx.Attr("data_layout")); auto dims = x->dims(); int N = dims[0]; diff --git a/paddle/fluid/operators/array_to_lod_tensor_op.cc b/paddle/fluid/operators/array_to_lod_tensor_op.cc index 2325de03211a30..2c85ec6ea2076b 100644 --- a/paddle/fluid/operators/array_to_lod_tensor_op.cc +++ b/paddle/fluid/operators/array_to_lod_tensor_op.cc @@ -107,11 +107,12 @@ class ArrayToLoDTensorOp : public framework::OperatorBase { platform::Place place = x[0].place(); auto data_type = x[0].dtype(); int64_t batch_size = x[0].dims()[0]; - framework::DDim ins_dims = - rank > 1 ? phi::slice_ddim(x[0].dims(), 1, rank) : phi::make_ddim({0}); + framework::DDim ins_dims = rank > 1 + ? common::slice_ddim(x[0].dims(), 1, rank) + : common::make_ddim({0}); for (size_t i = 1; i < x.size(); ++i) { - auto ins_i_dims = rank > 1 ? phi::slice_ddim(x[i].dims(), 1, rank) - : phi::make_ddim({0}); + auto ins_i_dims = rank > 1 ? common::slice_ddim(x[i].dims(), 1, rank) + : common::make_ddim({0}); PADDLE_ENFORCE_EQ( ins_i_dims, ins_dims, @@ -144,9 +145,9 @@ class ArrayToLoDTensorOp : public framework::OperatorBase { data_type)); batch_size += x[i].dims()[0]; } - auto ins_dim_vec = phi::vectorize(ins_dims); + auto ins_dim_vec = common::vectorize(ins_dims); ins_dim_vec.insert(ins_dim_vec.begin(), batch_size); - framework::DDim out_dims = phi::make_ddim(ins_dim_vec); + framework::DDim out_dims = common::make_ddim(ins_dim_vec); out->Resize(out_dims); out->mutable_data(place, data_type); diff --git a/paddle/fluid/operators/assign_value_op.h b/paddle/fluid/operators/assign_value_op.h index f5b74c5441174a..2a6a31ba03004d 100644 --- a/paddle/fluid/operators/assign_value_op.h +++ b/paddle/fluid/operators/assign_value_op.h @@ -126,7 +126,7 @@ class AssignValueKernel : public framework::OpKernel { break; } CopyVectorToTensor(value_name, out, ctx); - out->Resize(phi::make_ddim(shape)); + out->Resize(common::make_ddim(shape)); } }; diff --git a/paddle/fluid/operators/attention_lstm_op.cc b/paddle/fluid/operators/attention_lstm_op.cc index 4ec16e62f2ffad..7986bc8499427a 100644 --- a/paddle/fluid/operators/attention_lstm_op.cc +++ b/paddle/fluid/operators/attention_lstm_op.cc @@ -107,7 +107,7 @@ void AttentionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { "Expected input(H0)'s dimension is 2. But received %d.", h_dims.size())); if (ctx->IsRuntime() || - (phi::product(c_dims) > 0 && phi::product(h_dims) > 0)) { + (common::product(c_dims) > 0 && common::product(h_dims) > 0)) { PADDLE_ENFORCE_EQ(h_dims, c_dims, platform::errors::InvalidArgument( diff --git a/paddle/fluid/operators/batch_norm_op.cc b/paddle/fluid/operators/batch_norm_op.cc index 270e0debbdb1b6..fd05b018bbfb66 100644 --- a/paddle/fluid/operators/batch_norm_op.cc +++ b/paddle/fluid/operators/batch_norm_op.cc @@ -79,7 +79,7 @@ void BatchNormOp::InferShape(framework::InferShapeContext *ctx) const { } const DataLayout data_layout = - phi::StringToDataLayout(ctx->Attrs().Get("data_layout")); + common::StringToDataLayout(ctx->Attrs().Get("data_layout")); if (ctx->IsRuntime() && ctx->HasInput("MomentumTensor")) { auto mom = ctx->Inputs("MomentumTensor"); @@ -144,8 +144,9 @@ void BatchNormOp::InferShape(framework::InferShapeContext *ctx) const { bool check = true; if (!ctx->HasInput("Scale") || !ctx->HasInput("Bias") || - ((!ctx->IsRuntime()) && (phi::product(ctx->GetInputDim("Scale")) <= 0 || - phi::product(ctx->GetInputDim("Bias")) <= 0))) { + ((!ctx->IsRuntime()) && + (common::product(ctx->GetInputDim("Scale")) <= 0 || + common::product(ctx->GetInputDim("Bias")) <= 0))) { check = false; } @@ -229,7 +230,7 @@ phi::KernelKey BatchNormOp::GetKernelTypeForVar( auto attrs = Attrs(); auto ar = paddle::framework::AttrReader(attrs); const std::string data_layout = ar.Get("data_layout"); - auto dl = phi::StringToDataLayout(data_layout); + auto dl = common::StringToDataLayout(data_layout); // Some models may have intentionally set "AnyLayout" for pool // op. Treat this as NCHW (default data_format value) if (dl != phi::DataLayout::kAnyLayout) { @@ -368,7 +369,7 @@ void BatchNormGradOp::InferShape(framework::InferShapeContext *ctx) const { OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "BatchNormGrad"); const auto x_dims = ctx->GetInputDim("X"); const DataLayout data_layout = - phi::StringToDataLayout(ctx->Attrs().Get("data_layout")); + common::StringToDataLayout(ctx->Attrs().Get("data_layout")); const int C = static_cast( ((ctx->IsRunMKLDNNKernel() == true) || (data_layout == DataLayout::kNCHW) @@ -418,7 +419,7 @@ phi::KernelKey BatchNormGradOp::GetKernelTypeForVar( auto attrs = Attrs(); auto ar = paddle::framework::AttrReader(attrs); const std::string data_layout = ar.Get("data_layout"); - auto dl = phi::StringToDataLayout(data_layout); + auto dl = common::StringToDataLayout(data_layout); // Some models may have intentionally set "AnyLayout" for pool // op. Treat this as NCHW (default data_format value) if (dl != phi::DataLayout::kAnyLayout) { @@ -510,7 +511,7 @@ void BatchNormDoubleGradOp::InferShape( const auto x_dims = ctx->GetInputDim("X"); const DataLayout data_layout = - phi::StringToDataLayout(ctx->Attrs().Get("data_layout")); + common::StringToDataLayout(ctx->Attrs().Get("data_layout")); const int C = static_cast( ((ctx->IsRunMKLDNNKernel() == true) || (data_layout == DataLayout::kNCHW) ? x_dims[1] diff --git a/paddle/fluid/operators/bilateral_slice_op.cc b/paddle/fluid/operators/bilateral_slice_op.cc index 1a6561fc383cc6..111f128fc3cc6b 100644 --- a/paddle/fluid/operators/bilateral_slice_op.cc +++ b/paddle/fluid/operators/bilateral_slice_op.cc @@ -81,7 +81,7 @@ class BilateralSliceOp : public framework::OperatorWithKernel { output_dims.push_back(h); output_dims.push_back(w); - ctx->SetOutputDim("Out", phi::make_ddim(output_dims)); + ctx->SetOutputDim("Out", common::make_ddim(output_dims)); } protected: diff --git a/paddle/fluid/operators/bpr_loss_op.cc b/paddle/fluid/operators/bpr_loss_op.cc index 4a2928338251e1..c628bad0aa3c05 100644 --- a/paddle/fluid/operators/bpr_loss_op.cc +++ b/paddle/fluid/operators/bpr_loss_op.cc @@ -38,10 +38,10 @@ class BprLossOp : public framework::OperatorWithKernel { "Input(X) and Input(Label) shall have the same rank.")); if (ctx->IsRuntime() || - (phi::product(x_dims) > 0 && phi::product(label_dims) > 0)) { + (common::product(x_dims) > 0 && common::product(label_dims) > 0)) { PADDLE_ENFORCE_EQ( - phi::slice_ddim(x_dims, 0, rank - 1), - phi::slice_ddim(label_dims, 0, rank - 1), + common::slice_ddim(x_dims, 0, rank - 1), + common::slice_ddim(label_dims, 0, rank - 1), platform::errors::InvalidArgument( "Input(X) and Input(Label) shall have the same shape " "except the last dimension.")); @@ -93,13 +93,13 @@ class BprLossGradientOp : public framework::OperatorWithKernel { rank, platform::errors::InvalidArgument( "Input(Label) and Input(X) should have the same rank.")); - PADDLE_ENFORCE_EQ(phi::slice_ddim(x_dims, 0, rank - 1), - phi::slice_ddim(label_dims, 0, rank - 1), + PADDLE_ENFORCE_EQ(common::slice_ddim(x_dims, 0, rank - 1), + common::slice_ddim(label_dims, 0, rank - 1), platform::errors::InvalidArgument( "The Input(X) and Input(Label) should have the same " "shape except the last dimension.")); - PADDLE_ENFORCE_EQ(phi::slice_ddim(x_dims, 0, rank - 1), - phi::slice_ddim(dy_dims, 0, rank - 1), + PADDLE_ENFORCE_EQ(common::slice_ddim(x_dims, 0, rank - 1), + common::slice_ddim(dy_dims, 0, rank - 1), platform::errors::InvalidArgument( "The Input(X) and Input(Y@Grad) should have the same " "shape except the last dimension.")); diff --git a/paddle/fluid/operators/cinn/CMakeLists.txt b/paddle/fluid/operators/cinn/CMakeLists.txt index 5527aefff3cfa9..d56fd36c55c649 100644 --- a/paddle/fluid/operators/cinn/CMakeLists.txt +++ b/paddle/fluid/operators/cinn/CMakeLists.txt @@ -8,6 +8,7 @@ cc_library( cinn_launch_context SRCS cinn_launch_context.cc DEPS phi + common lod_tensor scope proto_desc diff --git a/paddle/fluid/operators/cinn/cinn_launch_context.cc b/paddle/fluid/operators/cinn/cinn_launch_context.cc index 0700028807fc05..bd32fa2a875dbe 100644 --- a/paddle/fluid/operators/cinn/cinn_launch_context.cc +++ b/paddle/fluid/operators/cinn/cinn_launch_context.cc @@ -26,6 +26,7 @@ #include "paddle/cinn/hlir/framework/tensor.h" #include "paddle/cinn/runtime/cinn_runtime.h" #include "paddle/cinn/runtime/intrinsic.h" +#include "paddle/common/ddim.h" #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/details/build_strategy.h" #include "paddle/fluid/framework/details/execution_strategy.h" @@ -42,7 +43,6 @@ #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/place.h" #include "paddle/fluid/string/printf.h" -#include "paddle/phi/core/ddim.h" #include "paddle/pir/core/program.h" #include "paddle/pir/core/value.h" #include "paddle/utils/string/string_helper.h" @@ -267,12 +267,12 @@ void CinnLaunchContext::CheckTensorEquivalent( "Variable(%s) not applied in cinn", var_name)); // check dimension auto cinn_tensor = GetCinnTensorOfVar(var_name); - auto cinn_dims = phi::make_ddim(cinn_tensor->shape().data()); + auto cinn_dims = common::make_ddim(cinn_tensor->shape().data()); if (paddle_tensor.dims().size() == 0) { // VLOG when paddle inputs 0D-Tensor VLOG(4) << "Paddle inputs 0D-Tensor, CINN changes 0D-Tensor " << var_name << " to 1D-Tensor"; - PADDLE_ENFORCE_EQ(phi::make_ddim({1}), + PADDLE_ENFORCE_EQ(common::make_ddim({1}), cinn_dims, phi::errors::PreconditionNotMet( "Tensor's shape of variable(%s) are not consistent, " diff --git a/paddle/fluid/operators/cinn/cinn_launch_context.h b/paddle/fluid/operators/cinn/cinn_launch_context.h index 34667bddc423d3..3d0b8d5f64b1d2 100644 --- a/paddle/fluid/operators/cinn/cinn_launch_context.h +++ b/paddle/fluid/operators/cinn/cinn_launch_context.h @@ -21,10 +21,10 @@ #include #include +#include "paddle/common/ddim.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/parallel_executor.h" #include "paddle/fluid/platform/place.h" -#include "paddle/phi/core/ddim.h" // type declaration forward struct cinn_buffer_t; diff --git a/paddle/fluid/operators/class_center_sample_op.cu b/paddle/fluid/operators/class_center_sample_op.cu index efac6332c6d29c..ecfae25270f911 100644 --- a/paddle/fluid/operators/class_center_sample_op.cu +++ b/paddle/fluid/operators/class_center_sample_op.cu @@ -604,7 +604,7 @@ void ClassCenterSampleKernel(const Context& dev_ctx, true, &num_classes_per_device); T actual_num_samples = num_classes_per_device.data()[rank + 1]; - sampled_local_class_center->Resize(phi::make_ddim({actual_num_samples})); + sampled_local_class_center->Resize(common::make_ddim({actual_num_samples})); T* sampled_local_class_center_ptr = dev_ctx.template Alloc(sampled_local_class_center); diff --git a/paddle/fluid/operators/collective/CMakeLists.txt b/paddle/fluid/operators/collective/CMakeLists.txt index cef1390ed23907..1c8c8f00217cc5 100644 --- a/paddle/fluid/operators/collective/CMakeLists.txt +++ b/paddle/fluid/operators/collective/CMakeLists.txt @@ -18,7 +18,7 @@ foreach(src ${OPS}) endforeach() if(WITH_GLOO) - set(COLLECTIVE_DEPS ${COLLECTIVE_DEPS} gloo_wrapper phi) + set(COLLECTIVE_DEPS ${COLLECTIVE_DEPS} gloo_wrapper phi common) endif() register_operators( @@ -31,7 +31,8 @@ register_operators( ${COLLECTIVE_DEPS}) if(WITH_NCCL OR WITH_RCCL) - set(COLLECTIVE_DEPS ${COLLECTIVE_DEPS} nccl_common collective_helper phi) + set(COLLECTIVE_DEPS ${COLLECTIVE_DEPS} nccl_common collective_helper phi + common) op_library(c_gen_nccl_id_op DEPS ${COLLECTIVE_DEPS}) op_library(gen_nccl_id_op DEPS ${COLLECTIVE_DEPS}) endif() diff --git a/paddle/fluid/operators/collective/barrier_op.h b/paddle/fluid/operators/collective/barrier_op.h index 099d6cccb9a039..b05f2de53a0739 100644 --- a/paddle/fluid/operators/collective/barrier_op.h +++ b/paddle/fluid/operators/collective/barrier_op.h @@ -18,10 +18,10 @@ limitations under the License. */ #include #include +#include "paddle/common/ddim.h" #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/core/ddim.h" #if defined(PADDLE_WITH_GLOO) #include diff --git a/paddle/fluid/operators/collective/c_allgather_op.cc b/paddle/fluid/operators/collective/c_allgather_op.cc index ab5d28b3a9db27..2a0087cd8aa72b 100644 --- a/paddle/fluid/operators/collective/c_allgather_op.cc +++ b/paddle/fluid/operators/collective/c_allgather_op.cc @@ -33,7 +33,7 @@ class CAllGatherOp : public framework::OperatorWithKernel { framework::DDim dim = ctx->GetInputDim("X"); // 0D use stack/unstack while others use concat/split if (dim.size() == 0) { - dim = phi::make_ddim({nranks}); + dim = common::make_ddim({nranks}); } else { dim[0] = dim[0] * nranks; if (dim[0] < 0) dim[0] = -1; diff --git a/paddle/fluid/operators/collective/c_allgather_op.h b/paddle/fluid/operators/collective/c_allgather_op.h index c5373bf1304380..b4aff2c2363ec2 100644 --- a/paddle/fluid/operators/collective/c_allgather_op.h +++ b/paddle/fluid/operators/collective/c_allgather_op.h @@ -18,10 +18,10 @@ limitations under the License. */ #include #include +#include "paddle/common/ddim.h" #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/core/ddim.h" #if defined(PADDLE_WITH_GLOO) #include diff --git a/paddle/fluid/operators/collective/c_broadcast_op.cu.cc b/paddle/fluid/operators/collective/c_broadcast_op.cu.cc index d7cbd5e8653669..4d49bc4990c6ec 100644 --- a/paddle/fluid/operators/collective/c_broadcast_op.cu.cc +++ b/paddle/fluid/operators/collective/c_broadcast_op.cu.cc @@ -74,7 +74,7 @@ class CBroadcastOpCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclBcast( out->data(), numel, dtype, root, comm->comm(), stream)); VLOG(3) << "rank " << comm->rank() << " invoke Bcast. received " - << phi::product(out->dims()); + << common::product(out->dims()); } } diff --git a/paddle/fluid/operators/collective/c_embedding_op.cc b/paddle/fluid/operators/collective/c_embedding_op.cc index 637490e59b2d9d..c40f582085a082 100644 --- a/paddle/fluid/operators/collective/c_embedding_op.cc +++ b/paddle/fluid/operators/collective/c_embedding_op.cc @@ -40,9 +40,9 @@ class CEmbeddingOp : public framework::OperatorWithKernel { table_dims.size(), table_dims)); - auto output_dims = phi::vectorize(ids_dims); + auto output_dims = common::vectorize(ids_dims); output_dims.push_back(table_dims[1]); - ctx->SetOutputDim("Out", phi::make_ddim(output_dims)); + ctx->SetOutputDim("Out", common::make_ddim(output_dims)); if (ctx->GetOutputsVarType("Out")[0] == framework::proto::VarType::LOD_TENSOR) { diff --git a/paddle/fluid/operators/collective/c_reduce_op.h b/paddle/fluid/operators/collective/c_reduce_op.h index ada47430d4b562..20884d1ae8a969 100644 --- a/paddle/fluid/operators/collective/c_reduce_op.h +++ b/paddle/fluid/operators/collective/c_reduce_op.h @@ -19,11 +19,11 @@ limitations under the License. */ #include #include +#include "paddle/common/ddim.h" #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/distributed/comm_context_manager.h" #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) || \ diff --git a/paddle/fluid/operators/collective/c_reducescatter_op.h b/paddle/fluid/operators/collective/c_reducescatter_op.h index e523293e8e68c4..52af0b9c435412 100644 --- a/paddle/fluid/operators/collective/c_reducescatter_op.h +++ b/paddle/fluid/operators/collective/c_reducescatter_op.h @@ -18,11 +18,11 @@ limitations under the License. */ #include #include +#include "paddle/common/ddim.h" #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/collective/c_allreduce_op.h" -#include "paddle/phi/core/ddim.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/collective/global_gather_op.cc b/paddle/fluid/operators/collective/global_gather_op.cc index a78f40686e9250..de93ca747b4e9f 100644 --- a/paddle/fluid/operators/collective/global_gather_op.cc +++ b/paddle/fluid/operators/collective/global_gather_op.cc @@ -44,7 +44,7 @@ class GlobalGatherOp : public framework::OperatorWithKernel { "The input tensor's dimension must be 2. " "But received input's dimension = %d.", ndim_input)); - framework::DDim out_dims = phi::make_ddim({-1, -1}); + framework::DDim out_dims = common::make_ddim({-1, -1}); ctx->SetOutputDim("Out", out_dims); } diff --git a/paddle/fluid/operators/collective/global_gather_op.cu.cc b/paddle/fluid/operators/collective/global_gather_op.cu.cc index d95c194452174e..7a9c02628088fd 100644 --- a/paddle/fluid/operators/collective/global_gather_op.cu.cc +++ b/paddle/fluid/operators/collective/global_gather_op.cu.cc @@ -128,7 +128,7 @@ struct GlobalGatherFunctor { for (auto i = 0; i < local_count_len; ++i) { fwd_count += cpu_local_count_data[i]; } - framework::DDim out_dims = phi::make_ddim({fwd_count, in_feat}); + framework::DDim out_dims = common::make_ddim({fwd_count, in_feat}); int64_t* expert_ptr = new int64_t[n_expert * nranks]; expert_ptr[0] = 0; auto tot_experts = n_expert * nranks; @@ -268,7 +268,7 @@ struct GlobalGatherProcessGroupFunctor { for (auto i = 0; i < local_count_len; ++i) { fwd_count += cpu_local_count_data[i]; } - framework::DDim out_dims = phi::make_ddim({fwd_count, in_feat}); + framework::DDim out_dims = common::make_ddim({fwd_count, in_feat}); int64_t* expert_ptr = new int64_t[n_expert * nranks]; expert_ptr[0] = 0; auto tot_experts = n_expert * nranks; diff --git a/paddle/fluid/operators/collective/global_scatter_op.cc b/paddle/fluid/operators/collective/global_scatter_op.cc index dc6f1fd735baca..095f968306bdc2 100644 --- a/paddle/fluid/operators/collective/global_scatter_op.cc +++ b/paddle/fluid/operators/collective/global_scatter_op.cc @@ -47,7 +47,7 @@ class GlobalScatterOp : public framework::OperatorWithKernel { "But received input's dimension = %d.", ndim_input)); - framework::DDim out_dims = phi::make_ddim({-1, -1}); + framework::DDim out_dims = common::make_ddim({-1, -1}); ctx->SetOutputDim("Out", out_dims); } diff --git a/paddle/fluid/operators/collective/global_scatter_op.cu.cc b/paddle/fluid/operators/collective/global_scatter_op.cu.cc index d8cd6d4be5f54a..6b915d35be0430 100644 --- a/paddle/fluid/operators/collective/global_scatter_op.cu.cc +++ b/paddle/fluid/operators/collective/global_scatter_op.cu.cc @@ -133,7 +133,7 @@ struct GlobalScatterFunctor { for (auto i = 0; i < global_count_len; ++i) { fwd_count += cpu_global_count_data[i]; } - framework::DDim out_dims = phi::make_ddim({fwd_count, in_feat}); + framework::DDim out_dims = common::make_ddim({fwd_count, in_feat}); int64_t* expert_ptr = new int64_t[n_expert * nranks]; expert_ptr[0] = 0; auto tot_experts = n_expert * nranks; @@ -274,7 +274,7 @@ struct GlobalScatterProcessGroupFunctor { for (auto i = 0; i < global_count_len; ++i) { fwd_count += cpu_global_count_data[i]; } - framework::DDim out_dims = phi::make_ddim({fwd_count, in_feat}); + framework::DDim out_dims = common::make_ddim({fwd_count, in_feat}); int64_t* expert_ptr = new int64_t[n_expert * nranks]; expert_ptr[0] = 0; auto tot_experts = n_expert * nranks; diff --git a/paddle/fluid/operators/collective/partial_allgather_op.h b/paddle/fluid/operators/collective/partial_allgather_op.h index 815558d0227eb0..178545f4dd2d3c 100644 --- a/paddle/fluid/operators/collective/partial_allgather_op.h +++ b/paddle/fluid/operators/collective/partial_allgather_op.h @@ -18,10 +18,10 @@ limitations under the License. */ #include #include +#include "paddle/common/ddim.h" #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/core/ddim.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/collective/partial_recv_op.cc b/paddle/fluid/operators/collective/partial_recv_op.cc index 681864e4e1aa4b..5d8a1276a630e7 100644 --- a/paddle/fluid/operators/collective/partial_recv_op.cc +++ b/paddle/fluid/operators/collective/partial_recv_op.cc @@ -68,15 +68,15 @@ class PartialRecvOp : public framework::OperatorWithKernel { i, out_shape[i])); } - auto out_dims = phi::make_ddim(out_shape); - int64_t numel = phi::product(out_dims); + auto out_dims = common::make_ddim(out_shape); + int64_t numel = common::product(out_dims); PADDLE_ENFORCE_EQ( (numel % num), 0, platform::errors::InvalidArgument( "The output numel (%d) must be divisible by num(%d)", numel, num)); - ctx->SetOutputDim("Out", phi::make_ddim(out_shape)); + ctx->SetOutputDim("Out", common::make_ddim(out_shape)); } protected: diff --git a/paddle/fluid/operators/collective/recv_v2_op.cc b/paddle/fluid/operators/collective/recv_v2_op.cc index 260e676affdc36..40757ca89daa89 100644 --- a/paddle/fluid/operators/collective/recv_v2_op.cc +++ b/paddle/fluid/operators/collective/recv_v2_op.cc @@ -63,7 +63,7 @@ class RecvOpV2 : public framework::OperatorWithKernel { i, out_shape[i])); } - ctx->SetOutputDim("Out", phi::make_ddim(out_shape)); + ctx->SetOutputDim("Out", common::make_ddim(out_shape)); } } } diff --git a/paddle/fluid/operators/collective/recv_v2_op.cu.cc b/paddle/fluid/operators/collective/recv_v2_op.cu.cc index b85f37d2144f1d..41c2e70df8c35f 100644 --- a/paddle/fluid/operators/collective/recv_v2_op.cu.cc +++ b/paddle/fluid/operators/collective/recv_v2_op.cu.cc @@ -238,7 +238,7 @@ class RecvOpV2CUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclRecv( out->data(), numel, dtype, peer, comm->comm(), stream)); VLOG(3) << "rank " << comm->rank() << " recv " - << phi::product(out_dims) << " from " << peer; + << common::product(out_dims) << " from " << peer; } } return; @@ -277,7 +277,7 @@ class RecvOpV2CUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclRecv( out->data(), numel, dtype, peer, comm->comm(), stream)); VLOG(3) << "rank " << comm->rank() << " recv " - << phi::product(out->dims()) << " from " << peer; + << common::product(out->dims()) << " from " << peer; } #else PADDLE_THROW(platform::errors::Unavailable( diff --git a/paddle/fluid/operators/collective/send_v2_op.cu.cc b/paddle/fluid/operators/collective/send_v2_op.cu.cc index 523830bbb187bf..86be6908e3cd28 100644 --- a/paddle/fluid/operators/collective/send_v2_op.cu.cc +++ b/paddle/fluid/operators/collective/send_v2_op.cu.cc @@ -225,8 +225,8 @@ class SendOpV2CUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclSend( x.data(), numel, dtype, peer, comm->comm(), stream)); } - VLOG(3) << "rank " << comm->rank() << " send " << phi::product(x.dims()) - << " to " << peer; + VLOG(3) << "rank " << comm->rank() << " send " + << common::product(x.dims()) << " to " << peer; } return; } @@ -251,8 +251,8 @@ class SendOpV2CUDAKernel : public framework::OpKernel { platform::ToNCCLDataType(framework::TransToProtoVarType(x->dtype())); PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclSend( x->data(), numel, dtype, peer, comm->comm(), stream)); - VLOG(3) << "rank " << comm->rank() << " send " << phi::product(x->dims()) - << " to " << peer; + VLOG(3) << "rank " << comm->rank() << " send " + << common::product(x->dims()) << " to " << peer; } #else PADDLE_THROW(platform::errors::Unavailable( diff --git a/paddle/fluid/operators/common_infer_shape_functions.cc b/paddle/fluid/operators/common_infer_shape_functions.cc index fcb58dcb242270..52836ead345a1b 100644 --- a/paddle/fluid/operators/common_infer_shape_functions.cc +++ b/paddle/fluid/operators/common_infer_shape_functions.cc @@ -102,7 +102,7 @@ framework::DDim BroadcastTwoDims(const framework::DDim &x_dims, out_dims_array.data(), max_dim, axis); - return phi::make_ddim(out_dims_array); + return common::make_ddim(out_dims_array); } } // namespace details diff --git a/paddle/fluid/operators/controlflow/tensor_array_read_write_op.cc b/paddle/fluid/operators/controlflow/tensor_array_read_write_op.cc index b9aff315444f52..c04e897aa63665 100644 --- a/paddle/fluid/operators/controlflow/tensor_array_read_write_op.cc +++ b/paddle/fluid/operators/controlflow/tensor_array_read_write_op.cc @@ -174,7 +174,7 @@ class ReadFromArrayOp : public ArrayOp { framework::AttributeMap attrs; attrs["dtype"] = framework::TransToProtoVarType(fw_var_tensor.dtype()); - attrs["shape"] = phi::vectorize(fw_var_tensor.dims()); + attrs["shape"] = common::vectorize(fw_var_tensor.dims()); attrs["value"] = 0.0f; auto zero_op = framework::OpRegistry::CreateOp( diff --git a/paddle/fluid/operators/controlflow/while_op.cc b/paddle/fluid/operators/controlflow/while_op.cc index 5c3e1c127606c8..5d4d65aa0a2ca6 100644 --- a/paddle/fluid/operators/controlflow/while_op.cc +++ b/paddle/fluid/operators/controlflow/while_op.cc @@ -397,10 +397,10 @@ class WhileGradOp : public framework::OperatorBase { auto shape = var_desc->GetShape(); VLOG(8) << "Found uninitialized tensor " << outside_og_name << " in step 0, fill it with 0.0f. dims=" - << phi::make_ddim(shape); + << common::make_ddim(shape); framework::AttributeMap attrs; attrs["dtype"] = var_desc->GetDataType(); - attrs["shape"] = phi::vectorize(phi::make_ddim(shape)); + attrs["shape"] = common::vectorize(common::make_ddim(shape)); attrs["value"] = 0.0f; auto var_name = outside_og_name; @@ -541,7 +541,7 @@ class WhileGradOp : public framework::OperatorBase { framework::AttributeMap attrs; attrs["dtype"] = framework::TransToProtoVarType(inside_tensor.dtype()); - attrs["shape"] = phi::vectorize(inside_tensor.dims()); + attrs["shape"] = common::vectorize(inside_tensor.dims()); attrs["value"] = 0.0f; auto var_name = pg_ig_names[param_id]; diff --git a/paddle/fluid/operators/correlation_op.cc b/paddle/fluid/operators/correlation_op.cc index bedadbd18746c4..4547026e74e913 100644 --- a/paddle/fluid/operators/correlation_op.cc +++ b/paddle/fluid/operators/correlation_op.cc @@ -106,7 +106,7 @@ class CorrelationOp : public framework::OperatorWithKernel { kernel_size, pad_size, max_displacement); - ctx->SetOutputDim("Output", phi::make_ddim(output_shape)); + ctx->SetOutputDim("Output", common::make_ddim(output_shape)); } protected: diff --git a/paddle/fluid/operators/crop_op.cc b/paddle/fluid/operators/crop_op.cc index b615fbd58faeca..03d0b8a0a72be9 100644 --- a/paddle/fluid/operators/crop_op.cc +++ b/paddle/fluid/operators/crop_op.cc @@ -44,16 +44,16 @@ class CropOp : public framework::OperatorWithKernel { for (size_t i = 0; i < shape.size(); ++i) { tensor_shape[i] = static_cast(shape[i]); } - ctx->SetOutputDim("Out", phi::make_ddim(tensor_shape)); + ctx->SetOutputDim("Out", common::make_ddim(tensor_shape)); } else { auto y_dim = ctx->GetInputDim("Y"); - PADDLE_ENFORCE_EQ(phi::arity(x_dim), - phi::arity(y_dim), + PADDLE_ENFORCE_EQ(common::arity(x_dim), + common::arity(y_dim), platform::errors::InvalidArgument( "The number of dimensions (%d) of CropOp's input(X)" " must be equal to that (%d) of input(Y).", - phi::arity(x_dim), - phi::arity(y_dim))); + common::arity(x_dim), + common::arity(y_dim))); ctx->SetOutputDim("Out", y_dim); } } diff --git a/paddle/fluid/operators/crop_op.h b/paddle/fluid/operators/crop_op.h index 0c791f01bd9235..fdb2c538fd8a35 100644 --- a/paddle/fluid/operators/crop_op.h +++ b/paddle/fluid/operators/crop_op.h @@ -89,7 +89,7 @@ void CropFunction(const framework::ExecutionContext& context) { out_dims[0] = x->dims()[0]; } out->mutable_data(out_dims, context.GetPlace()); - auto x_stride = phi::stride(x->dims()); + auto x_stride = common::stride(x->dims()); auto offsets = GetOffsets(context); int64_t offset = 0; for (size_t i = 0; i < offsets.size(); ++i) { diff --git a/paddle/fluid/operators/cross_entropy_op.cc b/paddle/fluid/operators/cross_entropy_op.cc index 0eeb917860735b..42ece4219187a5 100644 --- a/paddle/fluid/operators/cross_entropy_op.cc +++ b/paddle/fluid/operators/cross_entropy_op.cc @@ -34,14 +34,14 @@ class CrossEntropyOpBase : public framework::OperatorWithKernel { auto label_dims = ctx->GetInputDim("Label"); int rank = x_dims.size(); - bool contain_unknown_dim = phi::contain_unknown_dim(x_dims) || - phi::contain_unknown_dim(label_dims); + bool contain_unknown_dim = common::contain_unknown_dim(x_dims) || + common::contain_unknown_dim(label_dims); bool check = ctx->IsRuntime() || !contain_unknown_dim; if (check) { PADDLE_ENFORCE_EQ( - phi::slice_ddim(x_dims, 0, rank - 1), - phi::slice_ddim(label_dims, 0, rank - 1), + common::slice_ddim(x_dims, 0, rank - 1), + common::slice_ddim(label_dims, 0, rank - 1), platform::errors::InvalidArgument( "Input(X) and Input(Label) shall have the same shape " "except the last dimension. But received: the shape of Input(X) " @@ -166,15 +166,15 @@ class CrossEntropyGradientOpBase : public framework::OperatorWithKernel { dy_dims.size(), label_dims.size())); - bool contain_unknown_dim = - phi::contain_unknown_dim(x_dims) || phi::contain_unknown_dim(dy_dims); + bool contain_unknown_dim = common::contain_unknown_dim(x_dims) || + common::contain_unknown_dim(dy_dims); bool check = ctx->IsRuntime() || !contain_unknown_dim; if (check) { PADDLE_ENFORCE_EQ( - phi::slice_ddim(x_dims, 0, rank - 1), - phi::slice_ddim(dy_dims, 0, rank - 1), + common::slice_ddim(x_dims, 0, rank - 1), + common::slice_ddim(dy_dims, 0, rank - 1), platform::errors::InvalidArgument( "The Input(X) and Input(Y@Grad) should have the same " "shape except the last dimension. but received: " @@ -321,9 +321,9 @@ class CrossEntropyOp2 : public CrossEntropyOpBase { OP_INOUT_CHECK( ctx->HasOutput("MatchX"), "Output", "MatchX", "CrossEntropyOp2"); auto x_dims = ctx->GetInputDim("X"); - auto x_dims_vec = phi::vectorize(x_dims); + auto x_dims_vec = common::vectorize(x_dims); x_dims_vec.push_back(0); - ctx->SetOutputDim("XShape", phi::make_ddim(x_dims_vec)); + ctx->SetOutputDim("XShape", common::make_ddim(x_dims_vec)); x_dims[x_dims.size() - 1] = 1; ctx->SetOutputDim("MatchX", x_dims); ctx->ShareLoD("X", /*->*/ "XShape"); diff --git a/paddle/fluid/operators/cross_entropy_op.h b/paddle/fluid/operators/cross_entropy_op.h index 6e6617c7bc4cca..d755cb1639572a 100644 --- a/paddle/fluid/operators/cross_entropy_op.h +++ b/paddle/fluid/operators/cross_entropy_op.h @@ -39,10 +39,10 @@ class CrossEntropyOpKernel : public framework::OpKernel { phi::DenseTensor labels_2d, y_2d; if (label_dims.size() < rank) { labels_2d.ShareDataWith(*labels); - labels_2d.Resize({phi::product(label_dims), 1}); + labels_2d.Resize({common::product(label_dims), 1}); y_2d.ShareDataWith(*y); - y_2d.Resize({phi::product(y->dims()), 1}); + y_2d.Resize({common::product(y->dims()), 1}); } else { labels_2d = phi::ReshapeToMatrix(*labels, rank - 1); @@ -250,7 +250,7 @@ class CrossEntropyOpKernel2 : public framework::OpKernel { auto& x_dims = x->dims(); auto feature_size = x_dims[x_dims.size() - 1]; - auto batch_size = phi::product(x->dims()) / feature_size; + auto batch_size = common::product(x->dims()) / feature_size; auto* p_x = x->data(); auto* p_label = label->data(); @@ -283,7 +283,7 @@ class CrossEntropyGradientOpKernel2 : public framework::OpKernel { int64_t ignore_index = ctx.Attr("ignore_index"); int rank = dx->dims().size(); int64_t feature_size = dx->dims()[rank - 1]; - int64_t batch_size = phi::product(dx->dims()) / feature_size; + int64_t batch_size = common::product(dx->dims()) / feature_size; platform::ForRange for_range( ctx.template device_context(), diff --git a/paddle/fluid/operators/ctc_align_op.h b/paddle/fluid/operators/ctc_align_op.h index 127d89b8f25fd0..faa2efab772a6e 100644 --- a/paddle/fluid/operators/ctc_align_op.h +++ b/paddle/fluid/operators/ctc_align_op.h @@ -33,7 +33,7 @@ class CTCAlignKernel : public framework::OpKernel { size_t blank = static_cast(ctx.Attr("blank")); bool merge_repeated = ctx.Attr("merge_repeated"); T* output_data = output->mutable_data(ctx.GetPlace()); - auto input_dims = phi::vectorize(input->dims()); + auto input_dims = common::vectorize(input->dims()); const T* input_data = input->data(); // support tensor input, no lod information diff --git a/paddle/fluid/operators/custom_device_common_op_registry.cc b/paddle/fluid/operators/custom_device_common_op_registry.cc index f01b0a92d89f82..9573809d6c7ccf 100644 --- a/paddle/fluid/operators/custom_device_common_op_registry.cc +++ b/paddle/fluid/operators/custom_device_common_op_registry.cc @@ -678,7 +678,7 @@ class CBroadcastOpCustomDeviceKernel : public framework::OpKernel { comm->GetXcclComm(), *stream); VLOG(3) << "rank " << comm->GetRank() << " invoke Bcast. received " - << phi::product(out->dims()); + << common::product(out->dims()); } out->set_lod(x->lod()); } @@ -956,7 +956,7 @@ class GlobalScatterOpCustomDeviceKernel : public framework::OpKernel { for (auto i = 0; i < global_count_len; ++i) { fwd_count += cpu_global_count_data[i]; } - framework::DDim out_dims = phi::make_ddim({fwd_count, in_feat}); + framework::DDim out_dims = common::make_ddim({fwd_count, in_feat}); int64_t* expert_ptr = new int64_t[n_expert * nranks]; expert_ptr[0] = 0; auto tot_experts = n_expert * nranks; @@ -1038,7 +1038,7 @@ class GlobalScatterOpCustomDeviceKernel : public framework::OpKernel { for (auto i = 0; i < global_count_len; ++i) { fwd_count += cpu_global_count_data[i]; } - framework::DDim out_dims = phi::make_ddim({fwd_count, in_feat}); + framework::DDim out_dims = common::make_ddim({fwd_count, in_feat}); int64_t* expert_ptr = new int64_t[n_expert * nranks]; expert_ptr[0] = 0; auto tot_experts = n_expert * nranks; @@ -1170,7 +1170,7 @@ class GlobalGatherOpCustomDeviceKernel : public framework::OpKernel { for (auto i = 0; i < local_count_len; ++i) { fwd_count += cpu_local_count_data[i]; } - framework::DDim out_dims = phi::make_ddim({fwd_count, in_feat}); + framework::DDim out_dims = common::make_ddim({fwd_count, in_feat}); int64_t* expert_ptr = new int64_t[n_expert * nranks]; expert_ptr[0] = 0; auto tot_experts = n_expert * nranks; @@ -1250,7 +1250,7 @@ class GlobalGatherOpCustomDeviceKernel : public framework::OpKernel { for (auto i = 0; i < local_count_len; ++i) { fwd_count += cpu_local_count_data[i]; } - framework::DDim out_dims = phi::make_ddim({fwd_count, in_feat}); + framework::DDim out_dims = common::make_ddim({fwd_count, in_feat}); int64_t* expert_ptr = new int64_t[n_expert * nranks]; expert_ptr[0] = 0; auto tot_experts = n_expert * nranks; diff --git a/paddle/fluid/operators/data_norm_op.cc b/paddle/fluid/operators/data_norm_op.cc index 2e70168876162f..32cc8b49cd007b 100644 --- a/paddle/fluid/operators/data_norm_op.cc +++ b/paddle/fluid/operators/data_norm_op.cc @@ -66,8 +66,8 @@ class DataNormOp : public framework::OperatorWithKernel { } const auto x_dims = ctx->GetInputDim("X"); - const DataLayout data_layout = - phi::StringToDataLayout(ctx->Attrs().Get("data_layout")); + const DataLayout data_layout = common::StringToDataLayout( + ctx->Attrs().Get("data_layout")); PADDLE_ENFORCE_EQ(x_dims.size() >= 2 && x_dims.size() <= 5, true, @@ -130,7 +130,7 @@ class DataNormOp : public framework::OperatorWithKernel { bool check = true; if ((!ctx->IsRuntime()) && - (phi::product(scale_dim) <= 0 || phi::product(bias_dim) <= 0)) { + (common::product(scale_dim) <= 0 || common::product(bias_dim) <= 0)) { check = false; } @@ -272,7 +272,7 @@ class DataNormKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext &ctx) const override { // const bool is_test = ctx.Attr("is_test"); const std::string data_layout_str = ctx.Attr("data_layout"); - const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); + const DataLayout data_layout = common::StringToDataLayout(data_layout_str); const auto *x = ctx.Input("X"); const auto &x_dims = x->dims(); @@ -452,8 +452,8 @@ class DataNormGradOp : public framework::OperatorWithKernel { "DataNormGrad"); const auto x_dims = ctx->GetInputDim("X"); - const DataLayout data_layout = - phi::StringToDataLayout(ctx->Attrs().Get("data_layout")); + const DataLayout data_layout = common::StringToDataLayout( + ctx->Attrs().Get("data_layout")); const int C = static_cast(data_layout == DataLayout::kNCHW ? x_dims[1] : x_dims[x_dims.size() - 1]); @@ -516,7 +516,7 @@ class DataNormGradKernel : public framework::OpKernel { const auto *means = ctx.Input("Means"); const std::string data_layout_str = ctx.Attr("data_layout"); - const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); + const DataLayout data_layout = common::StringToDataLayout(data_layout_str); // Get the size for each dimension. // NCHW [batch_size, in_channels, in_height, in_width] diff --git a/paddle/fluid/operators/dequantize_abs_max_op.h b/paddle/fluid/operators/dequantize_abs_max_op.h index a403d974a98637..5b07dfb2a9b001 100644 --- a/paddle/fluid/operators/dequantize_abs_max_op.h +++ b/paddle/fluid/operators/dequantize_abs_max_op.h @@ -16,9 +16,9 @@ limitations under the License. */ #include +#include "paddle/common/ddim.h" #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/core/ddim.h" namespace phi { class DenseTensor; diff --git a/paddle/fluid/operators/dequantize_log_op.h b/paddle/fluid/operators/dequantize_log_op.h index e13b25694f33e5..f17ba146461ae3 100644 --- a/paddle/fluid/operators/dequantize_log_op.h +++ b/paddle/fluid/operators/dequantize_log_op.h @@ -16,8 +16,8 @@ limitations under the License. */ #include +#include "paddle/common/ddim.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/core/ddim.h" namespace phi { class DenseTensor; diff --git a/paddle/fluid/operators/detection/CMakeLists.txt b/paddle/fluid/operators/detection/CMakeLists.txt index fe32cc32d02d4b..d38a72556f7596 100644 --- a/paddle/fluid/operators/detection/CMakeLists.txt +++ b/paddle/fluid/operators/detection/CMakeLists.txt @@ -48,8 +48,9 @@ detection_library(polygon_box_transform_op SRCS polygon_box_transform_op.cc detection_library(rpn_target_assign_op SRCS rpn_target_assign_op.cc) detection_library(generate_proposal_labels_op SRCS generate_proposal_labels_op.cc) -detection_library(multiclass_nms_op SRCS multiclass_nms_op.cc DEPS phi) -detection_library(locality_aware_nms_op SRCS locality_aware_nms_op.cc DEPS phi) +detection_library(multiclass_nms_op SRCS multiclass_nms_op.cc DEPS phi common) +detection_library(locality_aware_nms_op SRCS locality_aware_nms_op.cc DEPS phi + common) detection_library(box_clip_op SRCS box_clip_op.cc box_clip_op.cu) detection_library(box_decoder_and_assign_op SRCS box_decoder_and_assign_op.cc box_decoder_and_assign_op.cu) diff --git a/paddle/fluid/operators/detection/anchor_generator_op.cc b/paddle/fluid/operators/detection/anchor_generator_op.cc index 70c7430c0e23f8..8c3705ba3e760a 100644 --- a/paddle/fluid/operators/detection/anchor_generator_op.cc +++ b/paddle/fluid/operators/detection/anchor_generator_op.cc @@ -56,8 +56,8 @@ class AnchorGeneratorOp : public framework::OperatorWithKernel { dim_vec[1] = input_dims[3]; dim_vec[2] = static_cast(num_anchors); dim_vec[3] = 4; - ctx->SetOutputDim("Anchors", phi::make_ddim(dim_vec)); - ctx->SetOutputDim("Variances", phi::make_ddim(dim_vec)); + ctx->SetOutputDim("Anchors", common::make_ddim(dim_vec)); + ctx->SetOutputDim("Variances", common::make_ddim(dim_vec)); } protected: diff --git a/paddle/fluid/operators/detection/anchor_generator_op.h b/paddle/fluid/operators/detection/anchor_generator_op.h index 9e667d9f99fc1c..ea36d1b47c849a 100644 --- a/paddle/fluid/operators/detection/anchor_generator_op.h +++ b/paddle/fluid/operators/detection/anchor_generator_op.h @@ -109,7 +109,7 @@ class AnchorGeneratorOpKernel : public framework::OpKernel { phi::DenseTensor var_t; var_t.mutable_data( - phi::make_ddim({1, static_cast(variances.size())}), + common::make_ddim({1, static_cast(variances.size())}), ctx.GetPlace()); auto var_et = phi::EigenTensor::From(var_t); for (size_t i = 0; i < variances.size(); ++i) { diff --git a/paddle/fluid/operators/detection/box_decoder_and_assign_op.cc b/paddle/fluid/operators/detection/box_decoder_and_assign_op.cc index 8c97523559de64..552a6da3b34257 100644 --- a/paddle/fluid/operators/detection/box_decoder_and_assign_op.cc +++ b/paddle/fluid/operators/detection/box_decoder_and_assign_op.cc @@ -127,11 +127,13 @@ class BoxDecoderAndAssignOp : public framework::OperatorWithKernel { box_score_dims[1], prior_box_dims[1])); } - ctx->SetOutputDim("DecodeBox", - phi::make_ddim({target_box_dims[0], target_box_dims[1]})); + ctx->SetOutputDim( + "DecodeBox", + common::make_ddim({target_box_dims[0], target_box_dims[1]})); ctx->ShareLoD("TargetBox", /*->*/ "DecodeBox"); - ctx->SetOutputDim("OutputAssignBox", - phi::make_ddim({prior_box_dims[0], prior_box_dims[1]})); + ctx->SetOutputDim( + "OutputAssignBox", + common::make_ddim({prior_box_dims[0], prior_box_dims[1]})); ctx->ShareLoD("PriorBox", /*->*/ "OutputAssignBox"); } }; diff --git a/paddle/fluid/operators/detection/density_prior_box_op.cc b/paddle/fluid/operators/detection/density_prior_box_op.cc index bd4230fed59314..881ef3ac18f76f 100644 --- a/paddle/fluid/operators/detection/density_prior_box_op.cc +++ b/paddle/fluid/operators/detection/density_prior_box_op.cc @@ -92,8 +92,8 @@ class DensityPriorBoxOp : public framework::OperatorWithKernel { dim_vec[1] = input_dims[3]; dim_vec[2] = static_cast(num_priors); dim_vec[3] = 4; - ctx->SetOutputDim("Boxes", phi::make_ddim(dim_vec)); - ctx->SetOutputDim("Variances", phi::make_ddim(dim_vec)); + ctx->SetOutputDim("Boxes", common::make_ddim(dim_vec)); + ctx->SetOutputDim("Variances", common::make_ddim(dim_vec)); } else if (ctx->IsRuntime()) { int64_t dim0 = static_cast(input_dims[2] * input_dims[3] * num_priors); diff --git a/paddle/fluid/operators/detection/density_prior_box_op.h b/paddle/fluid/operators/detection/density_prior_box_op.h index 9474e39cb7daf7..995abf11200130 100644 --- a/paddle/fluid/operators/detection/density_prior_box_op.h +++ b/paddle/fluid/operators/detection/density_prior_box_op.h @@ -123,7 +123,7 @@ class DensityPriorBoxOpKernel : public framework::OpKernel { } phi::DenseTensor var_t; var_t.mutable_data( - phi::make_ddim({1, static_cast(variances.size())}), + common::make_ddim({1, static_cast(variances.size())}), ctx.GetPlace()); auto var_et = phi::EigenTensor::From(var_t); diff --git a/paddle/fluid/operators/detection/iou_similarity_op.cc b/paddle/fluid/operators/detection/iou_similarity_op.cc index ca107077232457..0f2ac1c86d6289 100644 --- a/paddle/fluid/operators/detection/iou_similarity_op.cc +++ b/paddle/fluid/operators/detection/iou_similarity_op.cc @@ -52,7 +52,7 @@ class IOUSimilarityOp : public framework::OperatorWithKernel { "The shape of Y is [M, 4], but got dimension = %d.", y_dims[1])); ctx->ShareLoD("X", /*->*/ "Out"); - ctx->SetOutputDim("Out", phi::make_ddim({x_dims[0], y_dims[0]})); + ctx->SetOutputDim("Out", common::make_ddim({x_dims[0], y_dims[0]})); } }; diff --git a/paddle/fluid/operators/detection/mine_hard_examples_op.cc b/paddle/fluid/operators/detection/mine_hard_examples_op.cc index 4c3e934fab4dc8..0ce9979ff2a3d4 100644 --- a/paddle/fluid/operators/detection/mine_hard_examples_op.cc +++ b/paddle/fluid/operators/detection/mine_hard_examples_op.cc @@ -152,7 +152,7 @@ class MineHardExamplesKernel : public framework::OpKernel { out_neg_indices_lod.emplace_back(batch_starts); int neg_offset = 0; auto neg_data = out_neg_indices->mutable_data( - phi::make_ddim({static_cast(batch_starts.back()), 1}), + common::make_ddim({static_cast(batch_starts.back()), 1}), ctx.GetPlace()); for (auto neg_indices : all_neg_indices) { diff --git a/paddle/fluid/operators/detection/multiclass_nms_op.cc b/paddle/fluid/operators/detection/multiclass_nms_op.cc index 9f3f426d1ad853..be08e4e9680148 100644 --- a/paddle/fluid/operators/detection/multiclass_nms_op.cc +++ b/paddle/fluid/operators/detection/multiclass_nms_op.cc @@ -355,7 +355,7 @@ class MultiClassNMSKernel : public framework::OpKernel { auto index = ctx.Output("Index"); bool has_roisnum = ctx.HasInput("RoisNum") ? true : false; auto rois_num = ctx.Input("RoisNum"); - auto score_dims = phi::vectorize(scores->dims()); + auto score_dims = common::vectorize(scores->dims()); auto score_size = score_dims.size(); auto& dev_ctx = ctx.template device_context(); diff --git a/paddle/fluid/operators/detection/polygon_box_transform_op.cc b/paddle/fluid/operators/detection/polygon_box_transform_op.cc index 936480a9e23ddb..0059aedcdc86ca 100644 --- a/paddle/fluid/operators/detection/polygon_box_transform_op.cc +++ b/paddle/fluid/operators/detection/polygon_box_transform_op.cc @@ -26,7 +26,7 @@ class PolygonBoxTransformCPUKernel : public framework::OpKernel { true, platform::errors::InvalidArgument("It must use CUDAPlace.")); auto* in = ctx.Input("Input"); - auto in_dims = phi::vectorize(in->dims()); + auto in_dims = common::vectorize(in->dims()); const T* in_data = in->data(); auto* out = ctx.Output("Output"); T* out_data = out->mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/detection/prior_box_op.h b/paddle/fluid/operators/detection/prior_box_op.h index b49841399c71f9..b6a6e283479df3 100644 --- a/paddle/fluid/operators/detection/prior_box_op.h +++ b/paddle/fluid/operators/detection/prior_box_op.h @@ -181,7 +181,7 @@ class PriorBoxOpKernel : public framework::OpKernel { phi::DenseTensor var_t; var_t.mutable_data( - phi::make_ddim({1, static_cast(variances.size())}), + common::make_ddim({1, static_cast(variances.size())}), ctx.GetPlace()); auto var_et = phi::EigenTensor::From(var_t); diff --git a/paddle/fluid/operators/detection_map_op.cc b/paddle/fluid/operators/detection_map_op.cc index a342347986d145..a0879337f5ae75 100644 --- a/paddle/fluid/operators/detection_map_op.cc +++ b/paddle/fluid/operators/detection_map_op.cc @@ -85,7 +85,7 @@ class DetectionMAPOp : public framework::OperatorWithKernel { "Input(PosCount) is not null.")); } - ctx->SetOutputDim("MAP", phi::make_ddim({1})); + ctx->SetOutputDim("MAP", common::make_ddim({1})); } protected: diff --git a/paddle/fluid/operators/detection_map_op.h b/paddle/fluid/operators/detection_map_op.h index 9579e527a20d4b..ccf08349687939 100644 --- a/paddle/fluid/operators/detection_map_op.h +++ b/paddle/fluid/operators/detection_map_op.h @@ -257,12 +257,12 @@ class DetectionMAPOpKernel : public framework::OpKernel { } int* pos_count_data = output_pos_count->mutable_data( - phi::make_ddim({class_num, 1}), ctx.GetPlace()); + common::make_ddim({class_num, 1}), ctx.GetPlace()); T* true_pos_data = output_true_pos->mutable_data( - phi::make_ddim({true_pos_count, 2}), ctx.GetPlace()); + common::make_ddim({true_pos_count, 2}), ctx.GetPlace()); T* false_pos_data = output_false_pos->mutable_data( - phi::make_ddim({false_pos_count, 2}), ctx.GetPlace()); + common::make_ddim({false_pos_count, 2}), ctx.GetPlace()); true_pos_count = 0; false_pos_count = 0; std::vector true_pos_starts = {0}; diff --git a/paddle/fluid/operators/dlnne/dlnne_engine_op.h b/paddle/fluid/operators/dlnne/dlnne_engine_op.h index ccc1764b0adb8d..3a8441d76a9dd8 100644 --- a/paddle/fluid/operators/dlnne/dlnne_engine_op.h +++ b/paddle/fluid/operators/dlnne/dlnne_engine_op.h @@ -30,6 +30,7 @@ #include #include +#include "paddle/common/ddim.h" #include "paddle/fluid/framework/data_device_transform.h" #include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/op_registry.h" @@ -37,7 +38,6 @@ #include "paddle/fluid/inference/analysis/helper.h" #include "paddle/fluid/inference/utils/io_utils.h" #include "paddle/fluid/platform/float16.h" -#include "paddle/phi/core/ddim.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/scope.h" @@ -423,7 +423,7 @@ class DlnneEngineOp : public framework::OperatorBase { // convert input and copy to Dlnne engine's buffer auto &t = inference::analysis::GetFromScope(scope, x); - auto t_shape = phi::vectorize(t.dims()); + auto t_shape = common::vectorize(t.dims()); std::vector runtime_input_shape(t_shape.begin(), t_shape.end()); const int bind_index = index; @@ -484,7 +484,7 @@ class DlnneEngineOp : public framework::OperatorBase { } input_buffers[bind_index] = buffer; - auto t_shape = phi::vectorize(t.dims()); + auto t_shape = common::vectorize(t.dims()); std::vector runtime_input_shape(t_shape.begin(), t_shape.end()); for (auto &size : t_shape) { data_bytes = data_bytes * size; @@ -562,7 +562,7 @@ class DlnneEngineOp : public framework::OperatorBase { VLOG(4) << bind_index << ": out_shapes[bind_index] dim:" << out_shapes[bind_index].size(); - fluid_t->Resize(phi::make_ddim(out_shapes[bind_index])); + fluid_t->Resize(common::make_ddim(out_shapes[bind_index])); dl::nne::DataType dl_type = out_types[bind_index]; if (dlnne_log_flag_) { @@ -678,7 +678,7 @@ class DlnneEngineOp : public framework::OperatorBase { // TODO(pei.jiang): refine this code, because when run dlnne create // engine, there is same code - auto t_shape = phi::vectorize(t.dims()); + auto t_shape = common::vectorize(t.dims()); std::vector input_shape(t_shape.begin(), t_shape.end()); calib_data_shape_map.emplace(x, input_shape); std::string data_type = inference::ConvertType(t.type()); diff --git a/paddle/fluid/operators/elementwise/elementwise_functor.h b/paddle/fluid/operators/elementwise/elementwise_functor.h index 844b0a195031f5..718a78c7811ea3 100644 --- a/paddle/fluid/operators/elementwise/elementwise_functor.h +++ b/paddle/fluid/operators/elementwise/elementwise_functor.h @@ -11,8 +11,8 @@ limitations under the License. */ #pragma once +#include "paddle/common/array.h" #include "paddle/fluid/platform/complex.h" -#include "paddle/phi/core/utils/array.h" #include "paddle/phi/kernels/funcs/elementwise_functor.h" namespace paddle { diff --git a/paddle/fluid/operators/elementwise/elementwise_op.h b/paddle/fluid/operators/elementwise/elementwise_op.h index d8fb9a658fa00d..d835caedbf3c87 100644 --- a/paddle/fluid/operators/elementwise/elementwise_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_op.h @@ -119,13 +119,13 @@ class ElementwiseOp : public framework::OperatorWithKernel { if (should_rotate) { // Pick bigger shape and rotate this one bool x_over_y = (x_dims.size() > y_dims.size()); - auto vdims = x_over_y ? phi::vectorize(x_dims) - : phi::vectorize(y_dims); + auto vdims = x_over_y ? common::vectorize(x_dims) + : common::vectorize(y_dims); std::rotate(vdims.begin() + 1, vdims.begin() + 2, vdims.end()); if (x_over_y) { - x_dims = phi::make_ddim(vdims); + x_dims = common::make_ddim(vdims); } else { - y_dims = phi::make_ddim(vdims); + y_dims = common::make_ddim(vdims); } } #endif @@ -145,7 +145,7 @@ class ElementwiseOp : public framework::OperatorWithKernel { out_dims_array.end()); } #endif - ctx->SetOutputDim("Out", phi::make_ddim(out_dims_array)); + ctx->SetOutputDim("Out", common::make_ddim(out_dims_array)); // to do ctx->ShareLoD("X", /*->*/ "Out"); } diff --git a/paddle/fluid/operators/elementwise/elementwise_op_function.h b/paddle/fluid/operators/elementwise/elementwise_op_function.h index 4894dff4b971ca..face0f758f8484 100644 --- a/paddle/fluid/operators/elementwise/elementwise_op_function.h +++ b/paddle/fluid/operators/elementwise/elementwise_op_function.h @@ -473,7 +473,7 @@ void FusedElemwiseAndActComputeNoBroadcast( CompoundFunctor compound_functor, phi::DenseTensor *out, phi::DenseTensor *intermediate_out) { - size_t N = static_cast(phi::product(x_dim)); + size_t N = static_cast(common::product(x_dim)); platform::ForRange for_range( ctx.template device_context(), N); @@ -654,7 +654,7 @@ void FusedElemwiseAndActGradComputeNoBroadcast( DX_OP dx_op, DY_OP dy_op, DIntermediate_OP dintermediate_op) { - size_t N = static_cast(phi::product(x_dim)); + size_t N = static_cast(common::product(x_dim)); platform::ForRange for_range( ctx.template device_context(), N); const T *x_data = nullptr; diff --git a/paddle/fluid/operators/expand_op.cc b/paddle/fluid/operators/expand_op.cc index c6e750f4fe0ecd..4c2dd992657812 100644 --- a/paddle/fluid/operators/expand_op.cc +++ b/paddle/fluid/operators/expand_op.cc @@ -67,7 +67,7 @@ class ExpandOp : public framework::OperatorWithKernel { } } - ctx->SetOutputDim("Out", phi::make_ddim(out_shape)); + ctx->SetOutputDim("Out", common::make_ddim(out_shape)); if (out_shape[0] == x_dims[0]) { ctx->ShareLoD("X", "Out"); } diff --git a/paddle/fluid/operators/fake_dequantize_op.cc b/paddle/fluid/operators/fake_dequantize_op.cc index a941fb8171de3e..fde9a0ca0b8a25 100644 --- a/paddle/fluid/operators/fake_dequantize_op.cc +++ b/paddle/fluid/operators/fake_dequantize_op.cc @@ -121,9 +121,9 @@ struct ChannelDequantizeFunctor { const T* scale_two = scales[1]->data(); for (int i = 0; i < batch_size; i++) { phi::DenseTensor one_batch_in = in->Slice(i, i + 1).Resize( - phi::slice_ddim(in->dims(), 1, in->dims().size())); + common::slice_ddim(in->dims(), 1, in->dims().size())); phi::DenseTensor one_batch_out = out->Slice(i, i + 1).Resize( - phi::slice_ddim(out->dims(), 1, out->dims().size())); + common::slice_ddim(out->dims(), 1, out->dims().size())); for (int j = 0; j < channel; j++) { T s = scale_one[j]; phi::DenseTensor one_channel_in = one_batch_in.Slice(j, j + 1); diff --git a/paddle/fluid/operators/fake_dequantize_op.h b/paddle/fluid/operators/fake_dequantize_op.h index 2718ea6050176b..57887721308d4f 100644 --- a/paddle/fluid/operators/fake_dequantize_op.h +++ b/paddle/fluid/operators/fake_dequantize_op.h @@ -16,9 +16,9 @@ limitations under the License. */ #include +#include "paddle/common/ddim.h" #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/core/ddim.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/fake_quantize_op.cu.h b/paddle/fluid/operators/fake_quantize_op.cu.h index cefe558ab245ed..bdf8a80debb649 100644 --- a/paddle/fluid/operators/fake_quantize_op.cu.h +++ b/paddle/fluid/operators/fake_quantize_op.cu.h @@ -82,7 +82,8 @@ struct FindAbsMaxFunctor { grid = (grid > block) ? block : grid; phi::DenseTensor max; - T *max_data = max.mutable_data(phi::make_ddim({grid}), ctx.GetPlace()); + T *max_data = + max.mutable_data(common::make_ddim({grid}), ctx.GetPlace()); FindAbsMaxKernel <<>>(in, num, max_data); FindAbsMaxKernel diff --git a/paddle/fluid/operators/fake_quantize_op.h b/paddle/fluid/operators/fake_quantize_op.h index 7fb47ebd1a76e7..13f1e5a3a26124 100644 --- a/paddle/fluid/operators/fake_quantize_op.h +++ b/paddle/fluid/operators/fake_quantize_op.h @@ -330,7 +330,7 @@ class FakeMovingAverageAbsMaxKernelBase : public framework::OpKernel { auto *in_state = context.Input("InState"); phi::DenseTensor tmp_scale; - tmp_scale.Resize(phi::make_dim(1)); + tmp_scale.Resize(common::make_dim(1)); T *cur_scale_data = dev_ctx.template Alloc(&tmp_scale); FindAbsMaxFunctor()( @@ -420,7 +420,7 @@ class MovingAverageAbsMaxScaleKernel : public framework::OpKernel { auto *in_accum = context.Input("InAccum"); auto *in_state = context.Input("InState"); phi::DenseTensor tmp_scale; - tmp_scale.Resize(phi::make_dim(1)); + tmp_scale.Resize(common::make_dim(1)); T *cur_scale_data = dev_ctx.template Alloc(&tmp_scale); FindAbsMaxFunctor()( diff --git a/paddle/fluid/operators/fill_constant_op.cc b/paddle/fluid/operators/fill_constant_op.cc index a398698f40cabb..1263d156ce220b 100644 --- a/paddle/fluid/operators/fill_constant_op.cc +++ b/paddle/fluid/operators/fill_constant_op.cc @@ -38,7 +38,7 @@ class FillConstantOp : public framework::OperatorWithKernel { "than 0. But received: shape[%u] = %d; shape = [%s].", i, shape[i], - phi::make_ddim(shape))); + common::make_ddim(shape))); } } if (shape.empty() && ctx->HasInput("ShapeTensor")) { @@ -48,11 +48,11 @@ class FillConstantOp : public framework::OperatorWithKernel { num_ele *= static_cast(shape_dims[i]); } auto vec_dims = std::vector(num_ele, -1); - ctx->SetOutputDim("Out", phi::make_ddim(vec_dims)); + ctx->SetOutputDim("Out", common::make_ddim(vec_dims)); return; } - ctx->SetOutputDim("Out", phi::make_ddim(shape)); + ctx->SetOutputDim("Out", common::make_ddim(shape)); } protected: diff --git a/paddle/fluid/operators/fill_op.cc b/paddle/fluid/operators/fill_op.cc index aeefe07d348e93..f8c4087344f9c9 100644 --- a/paddle/fluid/operators/fill_op.cc +++ b/paddle/fluid/operators/fill_op.cc @@ -47,7 +47,7 @@ class FillOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext* context) const override { OP_INOUT_CHECK(context->HasOutput("Out"), "Output", "Out", "Fill"); auto& shape = context->Attrs().Get>("shape"); - context->SetOutputDim("Out", phi::make_ddim(shape)); + context->SetOutputDim("Out", common::make_ddim(shape)); } protected: diff --git a/paddle/fluid/operators/flatten_op.cc b/paddle/fluid/operators/flatten_op.cc index ddb67eef4a3fa6..6325036dc01bfe 100644 --- a/paddle/fluid/operators/flatten_op.cc +++ b/paddle/fluid/operators/flatten_op.cc @@ -52,7 +52,7 @@ class Flatten2Op : public framework::OperatorWithKernel { "The axis should be less than or equal to input tensor's rank")); const auto &out_dims = Flatten2Op::GetOutputShape(axis, in_dims); - ctx->SetOutputDim("Out", phi::make_ddim(out_dims)); + ctx->SetOutputDim("Out", common::make_ddim(out_dims)); if (in_dims[0] == out_dims[0]) { // Only pass LoD when the first dimension of output and Input(X) // are the same. @@ -65,7 +65,7 @@ class Flatten2Op : public framework::OperatorWithKernel { for (int i = 0; i < in_dims.size(); ++i) { xshape_dims[i + 1] = in_dims[i]; } - ctx->SetOutputDim("XShape", phi::make_ddim(xshape_dims)); + ctx->SetOutputDim("XShape", common::make_ddim(xshape_dims)); ctx->ShareLoD("X", "XShape"); } @@ -189,7 +189,7 @@ class Flatten2GradOp : public framework::OperatorWithKernel { framework::GradVarName("Out"), "Flatten2Grad"); auto xshape_dims = context->GetInputDim("XShape"); - auto x_dims = phi::slice_ddim(xshape_dims, 1, xshape_dims.size()); + auto x_dims = common::slice_ddim(xshape_dims, 1, xshape_dims.size()); context->SetOutputDim(framework::GradVarName("X"), x_dims); context->ShareLoD("XShape", framework::GradVarName("X")); } diff --git a/paddle/fluid/operators/flatten_op.h b/paddle/fluid/operators/flatten_op.h index 6942a0f7db2da4..1b71627a067782 100644 --- a/paddle/fluid/operators/flatten_op.h +++ b/paddle/fluid/operators/flatten_op.h @@ -38,7 +38,7 @@ class Flatten2Kernel : public framework::OpKernel { auto *out = context.Output("Out"); - auto out_dims = phi::make_ddim(GetOutputShape(axes, x_dims)); + auto out_dims = common::make_ddim(GetOutputShape(axes, x_dims)); out->mutable_data(context.GetPlace(), in->type()); framework::TensorCopy( @@ -78,7 +78,7 @@ class Flatten2GradKernel : public framework::OpKernel { auto *d_out = ctx.Input(framework::GradVarName("Out")); auto xshape_dims = ctx.Input("XShape")->dims(); - auto x_dims = phi::slice_ddim(xshape_dims, 1, xshape_dims.size()); + auto x_dims = common::slice_ddim(xshape_dims, 1, xshape_dims.size()); d_x->mutable_data(ctx.GetPlace(), d_out->type()); framework::TensorCopy( diff --git a/paddle/fluid/operators/fused/cudnn_bn_stats_finalize.cu.h b/paddle/fluid/operators/fused/cudnn_bn_stats_finalize.cu.h index 762e86406917dd..8f3b5e4f09a065 100644 --- a/paddle/fluid/operators/fused/cudnn_bn_stats_finalize.cu.h +++ b/paddle/fluid/operators/fused/cudnn_bn_stats_finalize.cu.h @@ -43,7 +43,7 @@ struct BNStatsFinalizeArgs { "The size of param_shape is expected to 4. But received " "param_shape's size is %d, param_shape is [%s].", param_shape.size(), - phi::make_ddim(param_shape))); + common::make_ddim(param_shape))); in_desc.set(param_shape, format, param_dtype); out_desc.set(param_shape, format, dtype); diff --git a/paddle/fluid/operators/fused/cudnn_norm_conv.cu.h b/paddle/fluid/operators/fused/cudnn_norm_conv.cu.h index c82ccc959d204c..9dbb8a8eaebc8f 100644 --- a/paddle/fluid/operators/fused/cudnn_norm_conv.cu.h +++ b/paddle/fluid/operators/fused/cudnn_norm_conv.cu.h @@ -59,7 +59,7 @@ struct NormConvolutionArgs { "The size of input_shape is expected to 4. But received " "input_shape's size is %d, input_shape is [%s].", input_shape.size(), - phi::make_ddim(input_shape))); + common::make_ddim(input_shape))); PADDLE_ENFORCE_EQ( filter_shape.size(), 4U, @@ -67,14 +67,14 @@ struct NormConvolutionArgs { "The size of filter_shape is expected to 4. But received " "filter_shape's size is %d, filter_shape is [%s].", filter_shape.size(), - phi::make_ddim(filter_shape))); + common::make_ddim(filter_shape))); PADDLE_ENFORCE_EQ(filter_shape[1] == filter_shape[2] && (filter_shape[1] == 1 || filter_shape[1] == 3), true, platform::errors::InvalidArgument( "The filter_shape is expected to store as nhwc, and " "h = w = 1 or 3. But received filter_shape is [%s].", - phi::make_ddim(filter_shape))); + common::make_ddim(filter_shape))); PADDLE_ENFORCE_EQ((filter_shape[0] % 32 == 0 && filter_shape[3] % 8 == 0), true, platform::errors::InvalidArgument( @@ -91,7 +91,7 @@ struct NormConvolutionArgs { "The size of output_shape is expected to 4. But received " "filter_shape's size is %d, filter_shape is [%s].", output_shape.size(), - phi::make_ddim(output_shape))); + common::make_ddim(output_shape))); is_support = IsSupport(ctx, filter_shape, stride, dilation, group); PADDLE_ENFORCE_EQ( is_support, diff --git a/paddle/fluid/operators/fused/cudnn_scale_bias_add_relu.cu.h b/paddle/fluid/operators/fused/cudnn_scale_bias_add_relu.cu.h index 4ecc5795ff41a4..8b731e2c55408b 100644 --- a/paddle/fluid/operators/fused/cudnn_scale_bias_add_relu.cu.h +++ b/paddle/fluid/operators/fused/cudnn_scale_bias_add_relu.cu.h @@ -47,7 +47,7 @@ struct ScaleBiasAddReluArgs { "The size of data_shape is expected to 4. But received " "data_shape's size is %d, data_shape is [%s].", data_shape.size(), - phi::make_ddim(data_shape))); + common::make_ddim(data_shape))); PADDLE_ENFORCE_EQ( param_shape.size(), 4U, @@ -55,7 +55,7 @@ struct ScaleBiasAddReluArgs { "The size of param_shape is expected to 4. But received " "param_shape's size is %d, param_shape is [%s].", param_shape.size(), - phi::make_ddim(param_shape))); + common::make_ddim(param_shape))); PADDLE_ENFORCE_EQ( bitmask_shape.size(), 3U, @@ -63,7 +63,7 @@ struct ScaleBiasAddReluArgs { "The size of bitmask_shape is expected to 3. But received " "bitmask_shape's size is %d, bitmask_shape is [%s].", bitmask_shape.size(), - phi::make_ddim(bitmask_shape))); + common::make_ddim(bitmask_shape))); in_desc.set(data_shape, format, dtype); out_desc.set(data_shape, format, dtype); diff --git a/paddle/fluid/operators/fused/fused_attention_utils.h b/paddle/fluid/operators/fused/fused_attention_utils.h index 7d17041133bcd7..b198c4a5792912 100644 --- a/paddle/fluid/operators/fused/fused_attention_utils.h +++ b/paddle/fluid/operators/fused/fused_attention_utils.h @@ -23,8 +23,8 @@ PHI_DECLARE_bool(dynamic_static_unified_comm); #endif +#include "paddle/common/errors.h" #include "paddle/phi/core/distributed/comm_context_manager.h" -#include "paddle/phi/core/errors.h" namespace phi { namespace fusion { diff --git a/paddle/fluid/operators/fused/fused_bn_activation_op.cc b/paddle/fluid/operators/fused/fused_bn_activation_op.cc index ca59a466a5c2b6..2ea40d840d2b38 100644 --- a/paddle/fluid/operators/fused/fused_bn_activation_op.cc +++ b/paddle/fluid/operators/fused/fused_bn_activation_op.cc @@ -128,7 +128,7 @@ void FusedBatchNormActOp::InferShape(framework::InferShapeContext *ctx) const { bool check = true; if ((!ctx->IsRuntime()) && - (phi::product(scale_dim) <= 0 || phi::product(bias_dim) <= 0)) { + (common::product(scale_dim) <= 0 || common::product(bias_dim) <= 0)) { check = false; } diff --git a/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc index ed416d4ad13d13..ac198e9cf2c258 100644 --- a/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc +++ b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc @@ -106,7 +106,7 @@ void FusedBatchNormAddActOp::InferShape( bool check = true; if ((!ctx->IsRuntime()) && - (phi::product(scale_dim) <= 0 || phi::product(bias_dim) <= 0)) { + (common::product(scale_dim) <= 0 || common::product(bias_dim) <= 0)) { check = false; } diff --git a/paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc b/paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc index 96c400ea625d46..e69825fdd90765 100644 --- a/paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc +++ b/paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc @@ -303,15 +303,15 @@ class FusedEmbeddingFCLSTMKernel : public framework::OpKernel { bool is_reverse = ctx.Attr("is_reverse"); \ bool use_peepholes = ctx.Attr("use_peepholes"); -#define INIT_BASE_SIZES \ - auto ids_dims = ids->dims(); /* T x M*/ \ - auto ids_numel = phi::product(ids_dims); /* T x 1*/ \ - auto wh_dims = wh->dims(); /* D x 4D*/ \ - const int D = wh_dims[0]; \ - const int D2 = D * 2; \ - const int D3 = D * 3; \ - int64_t row_number = embeddings->dims()[0]; \ - int64_t row_width = embeddings->dims()[1]; \ +#define INIT_BASE_SIZES \ + auto ids_dims = ids->dims(); /* T x M*/ \ + auto ids_numel = common::product(ids_dims); /* T x 1*/ \ + auto wh_dims = wh->dims(); /* D x 4D*/ \ + const int D = wh_dims[0]; \ + const int D2 = D * 2; \ + const int D3 = D * 3; \ + int64_t row_number = embeddings->dims()[0]; \ + int64_t row_width = embeddings->dims()[1]; \ const int D4 = wh_dims[1]; #define INIT_BASE_INPUT_DATAS \ diff --git a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc index 0bd497b4c5ae29..a0ee64bd2eced7 100644 --- a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc +++ b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc @@ -68,7 +68,7 @@ class FusedEmbeddingSeqPoolOp : public framework::OperatorWithKernel { // in compile time, the shape from Ids -> output // should be [-1, 1] -> [-1, embedding_size] - ctx->SetOutputDim("Out", phi::make_ddim({-1, last_dim})); + ctx->SetOutputDim("Out", common::make_ddim({-1, last_dim})); } protected: diff --git a/paddle/fluid/operators/fused/fused_feedforward_op.cc b/paddle/fluid/operators/fused/fused_feedforward_op.cc index faf2561e5d3ec6..f6343f5bd1cbf7 100644 --- a/paddle/fluid/operators/fused/fused_feedforward_op.cc +++ b/paddle/fluid/operators/fused/fused_feedforward_op.cc @@ -31,7 +31,7 @@ static framework::DDim RowMatrixFromVector(const framework::DDim &x_dim) { if (x_dim.size() > 1) { return x_dim; } - return phi::make_ddim({1, x_dim[0]}); + return common::make_ddim({1, x_dim[0]}); } class FusedFeedForwardOp : public framework::OperatorWithKernel { @@ -97,7 +97,7 @@ class FusedFeedForwardOp : public framework::OperatorWithKernel { context->SetOutputDim("Dropout2Mask", dim_x); } framework::DDim mean_dim = - phi::make_ddim({mat_dim_x.batch_size_ * mat_dim_x.height_}); + common::make_ddim({mat_dim_x.batch_size_ * mat_dim_x.height_}); bool pre_layer_norm = context->Attrs().Get("pre_layer_norm"); if (pre_layer_norm) { OP_INOUT_CHECK(context->HasOutput("Ln1Mean"), diff --git a/paddle/fluid/operators/fused/fused_feedforward_op.cu b/paddle/fluid/operators/fused/fused_feedforward_op.cu index ee40633e4252b3..656f8ba6ad0acb 100644 --- a/paddle/fluid/operators/fused/fused_feedforward_op.cu +++ b/paddle/fluid/operators/fused/fused_feedforward_op.cu @@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/common/errors.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/fused/fused_attention_utils.h" #include "paddle/phi/api/include/tensor.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/broadcast_function.h" #include "paddle/phi/kernels/funcs/elementwise_functor.h" diff --git a/paddle/fluid/operators/fused/fused_gate_attention.h b/paddle/fluid/operators/fused/fused_gate_attention.h index 89f17f24b74a1e..69fbca0f9be0fc 100644 --- a/paddle/fluid/operators/fused/fused_gate_attention.h +++ b/paddle/fluid/operators/fused/fused_gate_attention.h @@ -903,8 +903,8 @@ class FlashAttnWithGating { AllocWithDebugInfo(dev_ctx_, "softmax_lse", softmax_lse); if (VLOG_IS_ON(6)) { - VLOG(6) << "temp_mask_dim={" << phi::make_ddim(temp_mask_dim) << "}"; - VLOG(6) << "temp_bias_dim={" << phi::make_ddim(temp_bias_dim) << "}"; + VLOG(6) << "temp_mask_dim={" << common::make_ddim(temp_mask_dim) << "}"; + VLOG(6) << "temp_bias_dim={" << common::make_ddim(temp_bias_dim) << "}"; VLOG(6) << TensorDebugString(&cu_seq_q, "cu_seq_q"); VLOG(6) << TensorDebugString(&cu_seq_k, "cu_seq_k"); VLOG(6) << TensorDebugString(nonbatched_bias, "nonbatched_bias"); @@ -998,12 +998,12 @@ class FlashAttnWithGating { const T* v_ptr = k_ptr + q_size; phi::DenseTensor qkv_transpose_out_grad; - qkv_transpose_out_grad.Resize(phi::make_ddim({3, - config->batch_size, - config->seq_len_m, - config->seq_len_r, - config->num_heads, - config->head_dim})); + qkv_transpose_out_grad.Resize(common::make_ddim({3, + config->batch_size, + config->seq_len_m, + config->seq_len_r, + config->num_heads, + config->head_dim})); AllocWithDebugInfo( dev_ctx_, "qkv_transpose_out_grad", &qkv_transpose_out_grad); diff --git a/paddle/fluid/operators/fused/fused_gate_attention_op.cu b/paddle/fluid/operators/fused/fused_gate_attention_op.cu index 9caca507c08bbd..d066086bd6ae06 100644 --- a/paddle/fluid/operators/fused/fused_gate_attention_op.cu +++ b/paddle/fluid/operators/fused/fused_gate_attention_op.cu @@ -412,12 +412,12 @@ class FusedGateAttentionOpKernel : public framework::OpKernel { ComputeMergedQKVMatmulForward(ctx, config, query, qkv_out); if (config.CanUseFlashAttn()) { - qkv_transpose_out->Resize(phi::make_ddim({3, - config.batch_size, - config.seq_len_m, - config.seq_len_r, - config.num_heads, - config.head_dim})); + qkv_transpose_out->Resize(common::make_ddim({3, + config.batch_size, + config.seq_len_m, + config.seq_len_r, + config.num_heads, + config.head_dim})); } AllocWithDebugInfo(dev_ctx, "qkv_transpose_out", qkv_transpose_out); } else { diff --git a/paddle/fluid/operators/fused/fused_gemm_epilogue_op.cc b/paddle/fluid/operators/fused/fused_gemm_epilogue_op.cc index fb0550b0c10e5e..2d6a1122b0c286 100644 --- a/paddle/fluid/operators/fused/fused_gemm_epilogue_op.cc +++ b/paddle/fluid/operators/fused/fused_gemm_epilogue_op.cc @@ -73,7 +73,7 @@ class FusedGemmEpilogueOp : public framework::OperatorWithKernel { y_dims)); auto x_mat_dims = - phi::flatten_to_2d(x_dims, trans_x ? 1 : x_dims.size() - 1); + common::flatten_to_2d(x_dims, trans_x ? 1 : x_dims.size() - 1); int K_from_x = trans_x ? x_mat_dims[0] : x_mat_dims[1]; int K_from_y = trans_y ? y_dims[1] : y_dims[0]; @@ -100,11 +100,11 @@ class FusedGemmEpilogueOp : public framework::OperatorWithKernel { } else { out_dims.push_back(y_dims[1]); } - ctx->SetOutputDim("Out", phi::make_ddim(out_dims)); + ctx->SetOutputDim("Out", common::make_ddim(out_dims)); auto activation = ctx->Attrs().Get("activation"); if (ctx->HasOutput("ReserveSpace")) { - ctx->SetOutputDim("ReserveSpace", phi::make_ddim(out_dims)); + ctx->SetOutputDim("ReserveSpace", common::make_ddim(out_dims)); if (activation == "none") { PADDLE_THROW(platform::errors::InvalidArgument( @@ -235,8 +235,8 @@ class FusedGemmEpilogueGradOp : public framework::OperatorWithKernel { dout_dims.size(), x_dims.size())); - auto dout_mat_dims = phi::flatten_to_2d(dout_dims, dout_dims.size() - 1); - auto x_mat_dims = phi::flatten_to_2d(x_dims, x_dims.size() - 1); + auto dout_mat_dims = common::flatten_to_2d(dout_dims, dout_dims.size() - 1); + auto x_mat_dims = common::flatten_to_2d(x_dims, x_dims.size() - 1); PADDLE_ENFORCE_EQ( dout_mat_dims[1], @@ -272,7 +272,7 @@ class FusedGemmEpilogueGradOp : public framework::OperatorWithKernel { if (ctx->HasOutput("DBias")) { int64_t dbias_dim = trans_y ? y_dims[0] : y_dims[1]; - ctx->SetOutputDim("DBias", phi::make_ddim({dbias_dim})); + ctx->SetOutputDim("DBias", common::make_ddim({dbias_dim})); } } diff --git a/paddle/fluid/operators/fused/fused_gemm_epilogue_op.cu b/paddle/fluid/operators/fused/fused_gemm_epilogue_op.cu index 6d7319c1db2721..2ae9f65c4e5a27 100644 --- a/paddle/fluid/operators/fused/fused_gemm_epilogue_op.cu +++ b/paddle/fluid/operators/fused/fused_gemm_epilogue_op.cu @@ -38,8 +38,8 @@ phi::funcs::MatmulFusedType GetFwdFusedEpilogueType( fused_type = FusedType::kMatmulBiasRelu; } else { fused_type = FusedType::kMatmulBiasReluWithReservedData; - int64_t reserve_size = - SizeOf(phi::DataType::BOOL) * phi::product(reserve_space->dims()); + int64_t reserve_size = SizeOf(phi::DataType::BOOL) * + common::product(reserve_space->dims()); ctx.Alloc(reserve_space, phi::DataType::BOOL, reserve_size); } } else if (activation == "gelu") { @@ -47,7 +47,8 @@ phi::funcs::MatmulFusedType GetFwdFusedEpilogueType( fused_type = FusedType::kMatmulBiasGelu; } else { fused_type = FusedType::kMatmulBiasGeluWithReservedData; - int64_t reserve_size = sizeof(T) * phi::product(reserve_space->dims()); + int64_t reserve_size = + sizeof(T) * common::product(reserve_space->dims()); ctx.Alloc(reserve_space, reserve_size); } } else { @@ -85,7 +86,7 @@ class FusedGemmEpilogueKernel : public framework::OpKernel { dev_ctx.Alloc(out, out->numel() * sizeof(T)); // (M * K) * (K * N) auto x_mat_dims = - phi::flatten_to_2d(x->dims(), trans_x ? 1 : x->dims().size() - 1); + common::flatten_to_2d(x->dims(), trans_x ? 1 : x->dims().size() - 1); int64_t M = trans_x ? x_mat_dims[1] : x_mat_dims[0]; int64_t K = trans_y ? y->dims()[1] : y->dims()[0]; int64_t N = trans_y ? y->dims()[0] : y->dims()[1]; @@ -142,7 +143,7 @@ class FusedGemmEpilogueGradKernel : public framework::OpKernel { // (M * K) * (K * N) auto x_mat_dims = - phi::flatten_to_2d(x->dims(), trans_x ? 1 : x->dims().size() - 1); + common::flatten_to_2d(x->dims(), trans_x ? 1 : x->dims().size() - 1); int64_t M = trans_x ? x_mat_dims[1] : x_mat_dims[0]; int64_t K = trans_y ? y->dims()[1] : y->dims()[0]; int64_t N = trans_y ? y->dims()[0] : y->dims()[1]; diff --git a/paddle/fluid/operators/fused/fused_gemm_epilogue_op_xpu.cc b/paddle/fluid/operators/fused/fused_gemm_epilogue_op_xpu.cc index 58d81ebf8be06a..fb6afbf5d256d8 100644 --- a/paddle/fluid/operators/fused/fused_gemm_epilogue_op_xpu.cc +++ b/paddle/fluid/operators/fused/fused_gemm_epilogue_op_xpu.cc @@ -46,7 +46,7 @@ class FusedGemmEpilogueXPUKernel : public framework::OpKernel { << " , activation = " << activation; auto x_mat_dims = - phi::flatten_to_2d(x->dims(), trans_x ? 1 : x->dims().size() - 1); + common::flatten_to_2d(x->dims(), trans_x ? 1 : x->dims().size() - 1); // (M * K) * (K * N) for new api use // int64_t M = trans_x ? x_mat_dims[1] : x_mat_dims[0]; @@ -168,7 +168,7 @@ class FusedGemmEpilogueXPUGradKernel : public framework::OpKernel { } auto x_mat_dims = - phi::flatten_to_2d(x->dims(), trans_x ? 1 : x->dims().size() - 1); + common::flatten_to_2d(x->dims(), trans_x ? 1 : x->dims().size() - 1); phi::XpuFcInfo info_forward; phi::GetFCInfo(x_mat_dims, y->dims(), trans_x, trans_y, &info_forward); diff --git a/paddle/fluid/operators/fused/fused_matmul_op.cc b/paddle/fluid/operators/fused/fused_matmul_op.cc index 198fd61a150780..129f7e85386e70 100644 --- a/paddle/fluid/operators/fused/fused_matmul_op.cc +++ b/paddle/fluid/operators/fused/fused_matmul_op.cc @@ -37,7 +37,7 @@ static std::vector GetInputShape(phi::DDim dim, if (is_input_fused) { dim = dim.reshape(shape).transpose(axis); } - return phi::vectorize(dim); + return common::vectorize(dim); } class FusedMatmulOp : public framework::OperatorWithKernel { @@ -50,8 +50,8 @@ class FusedMatmulOp : public framework::OperatorWithKernel { bool trans_x = ctx->Attrs().Get("trans_x"); bool trans_y = ctx->Attrs().Get("trans_y"); - std::vector dims_x = phi::vectorize(ctx->GetInputDim("X")); - std::vector dims_y = phi::vectorize(ctx->GetInputDim("Y")); + std::vector dims_x = common::vectorize(ctx->GetInputDim("X")); + std::vector dims_y = common::vectorize(ctx->GetInputDim("Y")); auto ndims_x = dims_x.size(); auto ndims_y = dims_y.size(); PADDLE_ENFORCE_GT( @@ -112,7 +112,7 @@ class FusedMatmulOp : public framework::OperatorWithKernel { new_dims.push_back(N); // NOLINT } - ctx->SetOutputDim("Out", phi::make_ddim(new_dims)); + ctx->SetOutputDim("Out", common::make_ddim(new_dims)); ctx->ShareLoD("X", "Out"); }; diff --git a/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cc b/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cc index d8ef46b040e8b5..3dbba2bf42ce44 100644 --- a/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cc +++ b/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cc @@ -101,7 +101,7 @@ class FusedSeqpoolCVMOp : public framework::OperatorWithKernel { } else { out_dim = {batch_size, dims[rank - 1] - cvm_offset}; } - outs_dims[i] = phi::make_ddim(out_dim); + outs_dims[i] = common::make_ddim(out_dim); } } else { for (size_t i = 0; i < num_inputs; ++i) { @@ -123,7 +123,7 @@ class FusedSeqpoolCVMOp : public framework::OperatorWithKernel { } else { out_dim = {-1, dims[rank - 1] - cvm_offset}; } - outs_dims[i] = phi::make_ddim(out_dim); + outs_dims[i] = common::make_ddim(out_dim); } } ctx->SetOutputsDim("Out", outs_dims); diff --git a/paddle/fluid/operators/fused/fusion_conv_inception_op.cu b/paddle/fluid/operators/fused/fusion_conv_inception_op.cu index 0674fc419938c3..63f065e0fef496 100644 --- a/paddle/fluid/operators/fused/fusion_conv_inception_op.cu +++ b/paddle/fluid/operators/fused/fusion_conv_inception_op.cu @@ -65,7 +65,7 @@ class CUDNNConvInceptionFusionOpKernel : public framework::OpKernel { dev_ctx.Alloc(temp_outs[0], temp_outs[0]->numel() * sizeof(T)); DataLayout layout = DataLayout::kNCHW; - std::vector in_dim = phi::vectorize(input->dims()); + std::vector in_dim = common::vectorize(input->dims()); // ------------------- cudnn descriptors --------------------- PoolingMode pooling_mode; @@ -87,9 +87,9 @@ class CUDNNConvInceptionFusionOpKernel : public framework::OpKernel { pool_desc.descriptor(pooling_mode, k3x3, k1x1, k1x1); cudnnTensorDescriptor_t cudnn_input_desc = - input_desc.descriptor(layout, phi::vectorize(input->dims())); - cudnnTensorDescriptor_t pool_out_desc = - out_pool_desc.descriptor(layout, phi::vectorize(input->dims())); + input_desc.descriptor(layout, common::vectorize(input->dims())); + cudnnTensorDescriptor_t pool_out_desc = out_pool_desc.descriptor( + layout, common::vectorize(input->dims())); cudnnDataType_t cudnn_dtype = CudnnDataType::type; cudnnTensorDescriptor_t* out_desc = new cudnnTensorDescriptor_t[4]; @@ -130,7 +130,7 @@ class CUDNNConvInceptionFusionOpKernel : public framework::OpKernel { : CUDNN_DATA_FLOAT; for (int i = 0; i < 4; ++i) { - filter_dims.push_back(phi::vectorize(filters[i]->dims())); + filter_dims.push_back(common::vectorize(filters[i]->dims())); PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cudnnSetFilterNdDescriptor( filter_desc[i], cudnn_dtype, format, 4, filter_dims[i].data())); bias_dims.push_back({1, filter_dims[i][0], 1, 1}); @@ -260,7 +260,7 @@ class CUDNNConvInceptionFusionOpKernel : public framework::OpKernel { in_datas.push_back(static_cast(input_data)); in_datas.push_back( static_cast(output_data + (oc0 + oc1) * h * w)); - temp_outs[1]->Resize(phi::make_ddim(out_dims[2])); + temp_outs[1]->Resize(common::make_ddim(out_dims[2])); T* temp2_data = dev_ctx.Alloc(temp_outs[1], temp_outs[1]->numel() * sizeof(T)); in_datas.push_back(static_cast(temp2_data + oc2 * h * w)); diff --git a/paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc b/paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc index 4972db58043226..ada14e280a0f3c 100644 --- a/paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc +++ b/paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc @@ -353,15 +353,15 @@ class FusionLSTMMKLDNNKernel : public framework::OpKernel { cell = cell; auto x_dims = input->dims(); auto x_mat_dims = (x_dims.size() == 3 && x_dims[1] == 1) - ? phi::flatten_to_2d(x_dims, 1) + ? common::flatten_to_2d(x_dims, 1) : x_dims; // Get attributes const bool is_reverse = ctx.Attr("is_reverse"); const bool use_peepholes = ctx.Attr("use_peepholes"); // Get tensor dimensions - const auto x_mat_dims_vec = phi::vectorize(x_mat_dims); - const auto weight_h_dims = phi::vectorize(weight_h->dims()); + const auto x_mat_dims_vec = common::vectorize(x_mat_dims); + const auto weight_h_dims = common::vectorize(weight_h->dims()); const auto& input_lod = input->lod()[0]; // Calculate RNN dimensions diff --git a/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc b/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc index 1c8e0a1b56a977..4dd6a9a48a16d6 100644 --- a/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc +++ b/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc @@ -26,7 +26,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using phi::vectorize; +using common::vectorize; using phi::funcs::OneDNNGetDataType; using phi::funcs::OneDNNMemDesc; using Direction = dnnl::rnn_direction; diff --git a/paddle/fluid/operators/fused/multi_gru_op.cc b/paddle/fluid/operators/fused/multi_gru_op.cc index 1ef675cb1d8f83..54d0860ef1ccfb 100644 --- a/paddle/fluid/operators/fused/multi_gru_op.cc +++ b/paddle/fluid/operators/fused/multi_gru_op.cc @@ -32,7 +32,7 @@ void MultiGRUOp::InferShape(framework::InferShapeContext* ctx) const { OP_INOUT_CHECK(ctx->HasOutput("Hidden"), "Output", "Hidden", "multi_gru"); auto x_dims = ctx->GetInputDim("X"); auto x_mat_dims = (x_dims.size() == 3 && x_dims[1] == 1) - ? phi::flatten_to_2d(x_dims, 1) + ? common::flatten_to_2d(x_dims, 1) : x_dims; PADDLE_ENFORCE_EQ( x_mat_dims.size(), diff --git a/paddle/fluid/operators/fused/resnet_basic_block_op.cc b/paddle/fluid/operators/fused/resnet_basic_block_op.cc index d17e6c9872a029..58125a9b7f6740 100644 --- a/paddle/fluid/operators/fused/resnet_basic_block_op.cc +++ b/paddle/fluid/operators/fused/resnet_basic_block_op.cc @@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/common/ddim.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/core/ddim.h" namespace paddle { namespace operators { @@ -182,8 +182,8 @@ class ResNetBasicBlockOp : public framework::OperatorWithKernel { int out2_w = (out1_w + padding2 * 2 - filter2_size) / stride2 + 1; std::vector out2_shape = {batch, output2_channel, out2_h, out2_w}; - auto y_dims = phi::make_ddim(out2_shape); - auto conv1_dims = phi::make_ddim(out1_shape); + auto y_dims = common::make_ddim(out2_shape); + auto conv1_dims = common::make_ddim(out1_shape); ctx->SetOutputDim("Y", y_dims); ctx->SetOutputDim("Conv1", conv1_dims); ctx->SetOutputDim("SavedMean1", bn1_param_dims); @@ -206,7 +206,7 @@ class ResNetBasicBlockOp : public framework::OperatorWithKernel { bool find_max = ctx->Attrs().Get("find_conv_input_max"); if (find_max) { - auto max_dims = phi::make_ddim({6}); + auto max_dims = common::make_ddim({6}); ctx->SetOutputDim("MaxInput1", max_dims); ctx->SetOutputDim("MaxFilter1", max_dims); ctx->SetOutputDim("MaxInput2", max_dims); diff --git a/paddle/fluid/operators/fused/resnet_basic_block_op_xpu.cc b/paddle/fluid/operators/fused/resnet_basic_block_op_xpu.cc index 3855ea38544609..bd918924cdf09f 100644 --- a/paddle/fluid/operators/fused/resnet_basic_block_op_xpu.cc +++ b/paddle/fluid/operators/fused/resnet_basic_block_op_xpu.cc @@ -51,16 +51,16 @@ class ResnetBasicBlockAttr { auto conv1_out = ctx.Output("Conv1"); auto filter2 = ctx.Input("Filter2"); auto conv2_out = ctx.Output("Conv2"); - conv1_input_shape = phi::vectorize(input1->dims()); - conv1_output_shape = phi::vectorize(conv1_out->dims()); - conv1_filter_shape = phi::vectorize(filter1->dims()); + conv1_input_shape = common::vectorize(input1->dims()); + conv1_output_shape = common::vectorize(conv1_out->dims()); + conv1_filter_shape = common::vectorize(filter1->dims()); conv1_filter_numel = filter1->numel(); conv1_input_numel = input1->numel(); conv1_output_numel = conv1_out->numel(); - conv2_input_shape = phi::vectorize(conv1_out->dims()); - conv2_output_shape = phi::vectorize(conv2_out->dims()); - conv2_filter_shape = phi::vectorize(filter2->dims()); + conv2_input_shape = common::vectorize(conv1_out->dims()); + conv2_output_shape = common::vectorize(conv2_out->dims()); + conv2_filter_shape = common::vectorize(filter2->dims()); conv2_filter_numel = filter2->numel(); conv2_input_numel = conv1_out->numel(); conv2_output_numel = conv2_out->numel(); @@ -68,9 +68,9 @@ class ResnetBasicBlockAttr { if (has_shortcut) { auto filter3 = ctx.Input("Filter3"); auto conv3_out = ctx.Output("Conv3"); - conv3_input_shape = phi::vectorize(input1->dims()); - conv3_output_shape = phi::vectorize(conv3_out->dims()); - conv3_filter_shape = phi::vectorize(filter3->dims()); + conv3_input_shape = common::vectorize(input1->dims()); + conv3_output_shape = common::vectorize(conv3_out->dims()); + conv3_filter_shape = common::vectorize(filter3->dims()); conv3_filter_numel = filter3->numel(); conv3_input_numel = input1->numel(); conv3_output_numel = conv3_out->numel(); @@ -139,16 +139,16 @@ class ResnetBasicBlockGradAttr { auto conv1_out = ctx.Input("Conv1"); auto filter2 = ctx.Input("Filter2"); auto conv2_out = ctx.Input("Conv2"); - conv1_input_shape = phi::vectorize(input1->dims()); - conv1_output_shape = phi::vectorize(conv1_out->dims()); - conv1_filter_shape = phi::vectorize(filter1->dims()); + conv1_input_shape = common::vectorize(input1->dims()); + conv1_output_shape = common::vectorize(conv1_out->dims()); + conv1_filter_shape = common::vectorize(filter1->dims()); conv1_filter_numel = filter1->numel(); conv1_input_numel = input1->numel(); conv1_output_numel = conv1_out->numel(); - conv2_input_shape = phi::vectorize(conv1_out->dims()); - conv2_output_shape = phi::vectorize(conv2_out->dims()); - conv2_filter_shape = phi::vectorize(filter2->dims()); + conv2_input_shape = common::vectorize(conv1_out->dims()); + conv2_output_shape = common::vectorize(conv2_out->dims()); + conv2_filter_shape = common::vectorize(filter2->dims()); conv2_filter_numel = filter2->numel(); conv2_input_numel = conv1_out->numel(); conv2_output_numel = conv2_out->numel(); @@ -156,9 +156,9 @@ class ResnetBasicBlockGradAttr { if (has_shortcut) { auto filter3 = ctx.Input("Filter3"); auto conv3_out = ctx.Input("Conv3"); - conv3_input_shape = phi::vectorize(input1->dims()); - conv3_output_shape = phi::vectorize(conv3_out->dims()); - conv3_filter_shape = phi::vectorize(filter3->dims()); + conv3_input_shape = common::vectorize(input1->dims()); + conv3_output_shape = common::vectorize(conv3_out->dims()); + conv3_filter_shape = common::vectorize(filter3->dims()); conv3_filter_numel = filter3->numel(); conv3_input_numel = input1->numel(); conv3_output_numel = conv3_out->numel(); diff --git a/paddle/fluid/operators/fused/resnet_unit_op.cc b/paddle/fluid/operators/fused/resnet_unit_op.cc index 948e8fdc9491e9..f1f2628119c155 100644 --- a/paddle/fluid/operators/fused/resnet_unit_op.cc +++ b/paddle/fluid/operators/fused/resnet_unit_op.cc @@ -29,7 +29,7 @@ static framework::DDim GetBitmaskDims(std::vector out_shape) { int32_t c_int32_elems = ((c + 63) & ~63) / 32; int32_t nhw_int32_elems = ((nhw + 31) & ~31); std::vector bitmask_shape = {nhw_int32_elems, c_int32_elems, 1}; - return phi::make_ddim(bitmask_shape); + return common::make_ddim(bitmask_shape); } class ResNetUnitOp : public framework::OperatorWithKernel { @@ -124,11 +124,11 @@ class ResNetUnitOp : public framework::OperatorWithKernel { const auto x_dims = ctx->GetInputDim("X"); const auto w_dims = ctx->GetInputDim("FilterX"); std::vector bn_param_shape = - phi::vectorize(ctx->GetInputDim("ScaleX")); + common::vectorize(ctx->GetInputDim("ScaleX")); if (1 == bn_param_shape.size()) { bn_param_shape = {1, 1, 1, bn_param_shape[0]}; } - framework::DDim bn_param_dims = phi::make_ddim(bn_param_shape); + framework::DDim bn_param_dims = common::make_ddim(bn_param_shape); PADDLE_ENFORCE_EQ( x_dims.size(), 4, @@ -181,7 +181,7 @@ class ResNetUnitOp : public framework::OperatorWithKernel { out_shape.push_back(output_channel); } - auto y_dims = phi::make_ddim(out_shape); + auto y_dims = common::make_ddim(out_shape); auto bitmask_dims = GetBitmaskDims(out_shape); // Set dims of outputs ctx->SetOutputDim("Y", y_dims); diff --git a/paddle/fluid/operators/fused/resnet_unit_op.cu b/paddle/fluid/operators/fused/resnet_unit_op.cu index 7caa0b1caa1afa..5b126008bf6548 100644 --- a/paddle/fluid/operators/fused/resnet_unit_op.cu +++ b/paddle/fluid/operators/fused/resnet_unit_op.cu @@ -69,20 +69,20 @@ class ResNetUnitKernel : public framework::OpKernel { bool is_train = !is_test && !use_global_stats; std::string act_type = ctx.Attr("act_type"); - auto input_x_shape = phi::vectorize(input_x->dims()); - auto filter_x_shape = phi::vectorize(filter_x->dims()); + auto input_x_shape = common::vectorize(input_x->dims()); + auto filter_x_shape = common::vectorize(filter_x->dims()); // std::swap used to convert shape of filter from conv2d when kernel size is // 1. if (filter_x_shape[1] != filter_x_shape[2] && 1 == filter_x_shape[2]) { std::swap(filter_x_shape[1], filter_x_shape[3]); } auto param_dims = scale_x->dims(); - auto param_shape = phi::vectorize(scale_x->dims()); + auto param_shape = common::vectorize(scale_x->dims()); if (1 == param_shape.size()) { param_shape = {1, 1, 1, param_shape[0]}; } - auto output_shape = phi::vectorize(output->dims()); - auto bitmask_shape = phi::vectorize(bitmask->dims()); + auto output_shape = common::vectorize(output->dims()); + auto bitmask_shape = common::vectorize(bitmask->dims()); int output_channel = filter_x_shape[0]; int64_t ele_count = std::accumulate(output_shape.begin(), output_shape.end(), @@ -157,8 +157,8 @@ class ResNetUnitKernel : public framework::OpKernel { phi::DenseTensor *running_var_z = ctx.Output("RunningVarZ"); - auto input_z_shape = phi::vectorize(input_z->dims()); - auto filter_z_shape = phi::vectorize(filter_z->dims()); + auto input_z_shape = common::vectorize(input_z->dims()); + auto filter_z_shape = common::vectorize(filter_z->dims()); // 3.1 Conv for second input phi::DenseTensor sum_z; @@ -273,11 +273,11 @@ class ResNetUnitGradKernel : public framework::OpKernel { bool use_global_stats = ctx.Attr("use_global_stats"); std::string act_type = ctx.Attr("act_type"); - auto x_shape = phi::vectorize(x->dims()); - auto filter_x_shape = phi::vectorize(filter_x->dims()); - auto param_shape = phi::vectorize(scale_x->dims()); - auto output_shape = phi::vectorize(output->dims()); - auto bitmask_shape = phi::vectorize(bitmask->dims()); + auto x_shape = common::vectorize(x->dims()); + auto filter_x_shape = common::vectorize(filter_x->dims()); + auto param_shape = common::vectorize(scale_x->dims()); + auto output_shape = common::vectorize(output->dims()); + auto bitmask_shape = common::vectorize(bitmask->dims()); auto place = ctx.GetPlace(); auto &dev_ctx = ctx.template device_context(); @@ -360,8 +360,8 @@ class ResNetUnitGradKernel : public framework::OpKernel { eps); // 1.3 Backward of Conv for z, get z_grad and filter_z_grad - auto z_shape = phi::vectorize(z->dims()); - auto filter_z_shape = phi::vectorize(filter_z->dims()); + auto z_shape = common::vectorize(z->dims()); + auto filter_z_shape = common::vectorize(filter_z->dims()); CudnnNormConvolutionGrad conv_z_op(dev_ctx, z_shape, filter_z_shape, diff --git a/paddle/fluid/operators/fused/resnet_unit_op_xpu.cc b/paddle/fluid/operators/fused/resnet_unit_op_xpu.cc index 1e4ed290f43a98..c00e58f8463ab5 100644 --- a/paddle/fluid/operators/fused/resnet_unit_op_xpu.cc +++ b/paddle/fluid/operators/fused/resnet_unit_op_xpu.cc @@ -74,9 +74,9 @@ class ResNetUnitXPUKernel : public framework::OpKernel { reinterpret_cast(conv_out_x->mutable_data(place))}; std::vector> x_shape_list = { - phi::vectorize(input_x->dims())}; + common::vectorize(input_x->dims())}; - auto filter_x_shape = phi::vectorize(filter_x->dims()); + auto filter_x_shape = common::vectorize(filter_x->dims()); std::vector ksize = {filter_x_shape[2], filter_x_shape[3]}; if (!is_nchw) { ksize[0] = filter_x_shape[1]; @@ -122,9 +122,9 @@ class ResNetUnitXPUKernel : public framework::OpKernel { conv_y_list.push_back( reinterpret_cast(conv_out_z->mutable_data(place))); - x_shape_list.push_back(phi::vectorize(input_z->dims())); + x_shape_list.push_back(common::vectorize(input_z->dims())); - auto filter_z_shape = phi::vectorize(filter_z->dims()); + auto filter_z_shape = common::vectorize(filter_z->dims()); std::vector ksize_z = {filter_z_shape[2], filter_z_shape[3]}; if (!is_nchw) { ksize_z[0] = filter_z_shape[1]; @@ -143,7 +143,7 @@ class ResNetUnitXPUKernel : public framework::OpKernel { } else { if (fuse_add) { const phi::DenseTensor *input_z = ctx.Input("Z"); - auto input_z_shape = phi::vectorize(input_z->dims()); + auto input_z_shape = common::vectorize(input_z->dims()); x_list.push_back(reinterpret_cast(input_z->data())); x_shape_list.push_back(input_z_shape); x_maxlist.push_back(nullptr); @@ -239,9 +239,9 @@ class ResNetUnitGradXPUKernel : public framework::OpKernel { reinterpret_cast(filter_x_grad->mutable_data(place))}; std::vector> x_shape_list = { - phi::vectorize(x->dims())}; + common::vectorize(x->dims())}; - auto filter_x_shape = phi::vectorize(filter_x->dims()); + auto filter_x_shape = common::vectorize(filter_x->dims()); std::vector x_ksize = {filter_x_shape[2], filter_x_shape[3]}; if (!is_nchw) { x_ksize[0] = filter_x_shape[1]; @@ -298,9 +298,9 @@ class ResNetUnitGradXPUKernel : public framework::OpKernel { reinterpret_cast(z_grad->mutable_data(place))); dw_list.push_back( reinterpret_cast(filter_z_grad->mutable_data(place))); - x_shape_list.push_back(phi::vectorize(z->dims())); + x_shape_list.push_back(common::vectorize(z->dims())); - auto filter_z_shape = phi::vectorize(filter_z->dims()); + auto filter_z_shape = common::vectorize(filter_z->dims()); std::vector ksize_z = {filter_z_shape[2], filter_z_shape[3]}; if (!is_nchw) { ksize_z[0] = filter_z_shape[1]; diff --git a/paddle/fluid/operators/generator/CMakeLists.txt b/paddle/fluid/operators/generator/CMakeLists.txt index dc88ea0b3a5336..a47a0a295be8f4 100644 --- a/paddle/fluid/operators/generator/CMakeLists.txt +++ b/paddle/fluid/operators/generator/CMakeLists.txt @@ -356,7 +356,7 @@ file(APPEND ${op_utils_header} # Automatically generate the registration code of all arg map functions # and compile the corresponding target to avoid frequent code conflicts # when writing to same file -register_op_utils(op_compat_infos DEPS phi) +register_op_utils(op_compat_infos DEPS phi common) copy_if_different(${op_utils_header} ${op_utils_header_final}) diff --git a/paddle/fluid/operators/generator/get_expected_kernel_func.cc b/paddle/fluid/operators/generator/get_expected_kernel_func.cc index d29311f4621b39..e6715189772ba1 100644 --- a/paddle/fluid/operators/generator/get_expected_kernel_func.cc +++ b/paddle/fluid/operators/generator/get_expected_kernel_func.cc @@ -65,7 +65,7 @@ static bool ReduceOpHasOptimizedOneDNNKernel( bool CanMKLDNNSupportPool(const framework::ExecutionContext& ctx) { if (ctx.Attr("adaptive") == false) return true; // oneDNN is supporting only unchangable in size pool window - auto src_tz = phi::vectorize(ctx.Input("X")->dims()); + auto src_tz = common::vectorize(ctx.Input("X")->dims()); if (!ctx.HasAttr("ksize")) { return false; } @@ -228,7 +228,7 @@ phi::KernelKey GetSoftmaxExpectedKernelType( const framework::OperatorWithKernel* op_ptr) { // choose cudnn kernel if the runtime supported. std::string data_format = ctx.Attr("data_format"); - phi::DataLayout layout_ = phi::StringToDataLayout(data_format); + phi::DataLayout layout_ = common::StringToDataLayout(data_format); auto input_data_type = op_ptr->IndicateVarDataType(ctx, "X"); if (input_data_type == framework::proto::VarType::FP16) { PADDLE_ENFORCE_EQ( @@ -248,7 +248,7 @@ phi::KernelKey GetSoftmaxGradExpectedKernelType( const framework::OperatorWithKernel* op_ptr) { // choose cudnn kernel if the runtime supported. std::string data_format = ctx.Attr("data_format"); - phi::DataLayout layout_ = phi::StringToDataLayout(data_format); + phi::DataLayout layout_ = common::StringToDataLayout(data_format); auto input_data_type = op_ptr->IndicateVarDataType(ctx, framework::GradVarName("Out")); if (input_data_type == framework::proto::VarType::FP16) { diff --git a/paddle/fluid/operators/grid_sampler_cudnn_op.cu.cc b/paddle/fluid/operators/grid_sampler_cudnn_op.cu.cc index 9230e114bd3bb2..c88d36602bd79c 100644 --- a/paddle/fluid/operators/grid_sampler_cudnn_op.cu.cc +++ b/paddle/fluid/operators/grid_sampler_cudnn_op.cu.cc @@ -64,9 +64,9 @@ class CUDNNGridSampleOpKernel : public framework::OpKernel { ScopedTensorDescriptor input_desc; ScopedTensorDescriptor output_desc; cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor( - DataLayout::kNCHW, phi::vectorize(input->dims())); + DataLayout::kNCHW, common::vectorize(input->dims())); cudnnTensorDescriptor_t cudnn_output_desc = output_desc.descriptor( - DataLayout::kNCHW, phi::vectorize(output->dims())); + DataLayout::kNCHW, common::vectorize(output->dims())); PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cudnnSpatialTfSamplerForward( handle, @@ -123,13 +123,13 @@ class CUDNNGridSampleGradOpKernel : public framework::OpKernel { ScopedTensorDescriptor input_grad_desc; ScopedTensorDescriptor output_grad_desc; cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor( - DataLayout::kNCHW, phi::vectorize(input->dims())); + DataLayout::kNCHW, common::vectorize(input->dims())); cudnnTensorDescriptor_t cudnn_input_grad_desc = - input_grad_desc.descriptor(DataLayout::kNCHW, - phi::vectorize(input_grad->dims())); + input_grad_desc.descriptor( + DataLayout::kNCHW, common::vectorize(input_grad->dims())); cudnnTensorDescriptor_t cudnn_output_grad_desc = output_grad_desc.descriptor( - DataLayout::kNCHW, phi::vectorize(output_grad->dims())); + DataLayout::kNCHW, common::vectorize(output_grad->dims())); PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cudnnSpatialTfSamplerBackward( handle, diff --git a/paddle/fluid/operators/hash_op.cc b/paddle/fluid/operators/hash_op.cc index e5fc57c6567b4f..03887561934b7b 100644 --- a/paddle/fluid/operators/hash_op.cc +++ b/paddle/fluid/operators/hash_op.cc @@ -52,7 +52,7 @@ class HashOp : public framework::OperatorWithKernel { int num_hash = ctx->Attrs().Get("num_hash"); HashOutputSize(dims, out_dims, num_hash); - ctx->SetOutputDim("Out", phi::make_ddim(out_dims)); + ctx->SetOutputDim("Out", common::make_ddim(out_dims)); ctx->ShareLoD("X", /*->*/ "Out"); } }; diff --git a/paddle/fluid/operators/hash_op.h b/paddle/fluid/operators/hash_op.h index 700f7c1d70138a..268bcc273272d3 100644 --- a/paddle/fluid/operators/hash_op.h +++ b/paddle/fluid/operators/hash_op.h @@ -51,7 +51,7 @@ class HashKernel : public framework::OpKernel { std::vector out_dims; HashOutputSize(in_dims, out_dims, num_hash); - out_t->Resize(phi::make_ddim(out_dims)); + out_t->Resize(common::make_ddim(out_dims)); auto* output = out_t->mutable_data(context.GetPlace()); auto seq_length = in_dims[0]; diff --git a/paddle/fluid/operators/index_select_op.h b/paddle/fluid/operators/index_select_op.h index b13d83a57ee974..c06885633f3482 100644 --- a/paddle/fluid/operators/index_select_op.h +++ b/paddle/fluid/operators/index_select_op.h @@ -79,8 +79,8 @@ void IndexSelectInner(const framework::ExecutionContext& context, VLOG(3) << "Index_Select_Debug; outer_nums: " << outer_nums << "; slice_size: " << slice_size << "; index_size: " << index_size; - input->Resize(phi::make_ddim({outer_nums, input_dim[dim], slice_size})); - output->Resize(phi::make_ddim({outer_nums, index_size, slice_size})); + input->Resize(common::make_ddim({outer_nums, input_dim[dim], slice_size})); + output->Resize(common::make_ddim({outer_nums, index_size, slice_size})); auto input_tensor = phi::EigenTensor::From(*input); auto output_tensor = phi::EigenTensor::From(*output); diff --git a/paddle/fluid/operators/interpolate_op.cc b/paddle/fluid/operators/interpolate_op.cc index 1af8b247de4479..a64bd3c8ac7f6e 100644 --- a/paddle/fluid/operators/interpolate_op.cc +++ b/paddle/fluid/operators/interpolate_op.cc @@ -36,7 +36,7 @@ static void Interpolate1DInferShapeCheck(framework::InferShapeContext* ctx) { "Input(X) dimension is 3, but got method = %s .", interp_method)); const DataLayout data_layout = - phi::StringToDataLayout(ctx->Attrs().Get("data_layout")); + common::StringToDataLayout(ctx->Attrs().Get("data_layout")); if (ctx->HasInputs("SizeTensor")) { // top prority size @@ -125,7 +125,7 @@ static void Interpolate2DInferShapeCheck(framework::InferShapeContext* ctx) { "Input(X) dimension is 4, but got method is %s.", interp_method)); const DataLayout data_layout = - phi::StringToDataLayout(ctx->Attrs().Get("data_layout")); + common::StringToDataLayout(ctx->Attrs().Get("data_layout")); if (ctx->HasInputs("SizeTensor")) { // top prority size @@ -220,7 +220,7 @@ static void Interpolate3DInferShapeCheck(framework::InferShapeContext* ctx) { "dimension is 5, but got method = %s .", interp_method)); const DataLayout data_layout = - phi::StringToDataLayout(ctx->Attrs().Get("data_layout")); + common::StringToDataLayout(ctx->Attrs().Get("data_layout")); if (ctx->HasInputs("SizeTensor")) { // top prority size @@ -353,7 +353,7 @@ class InterpolateOp : public framework::OperatorWithKernel { auto attrs = Attrs(); auto ar = paddle::framework::AttrReader(attrs); const std::string data_format = ar.Get("data_layout"); - auto dl = phi::StringToDataLayout(data_format); + auto dl = common::StringToDataLayout(data_format); // Some models may have intentionally set "AnyLayout" for pool // op. Treat this as NCHW (default data_format value) if (dl != phi::DataLayout::kAnyLayout) { diff --git a/paddle/fluid/operators/interpolate_op.cu b/paddle/fluid/operators/interpolate_op.cu index a0e1410f52d3d3..bfbb15b076448a 100644 --- a/paddle/fluid/operators/interpolate_op.cu +++ b/paddle/fluid/operators/interpolate_op.cu @@ -916,7 +916,7 @@ static void Interpolate1DCUDAFwd(const framework::ExecutionContext& ctx, auto* input_data = input.data(); const std::string data_layout_str = ctx.Attr("data_layout"); - const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); + const DataLayout data_layout = common::StringToDataLayout(data_layout_str); int n, c, in_d, in_h, in_w; ExtractNCDWH(input.dims(), data_layout, &n, &c, &in_d, &in_h, &in_w); @@ -1008,7 +1008,7 @@ static void Interpolate2DCUDAFwd(const framework::ExecutionContext& ctx, auto* input_data = input.data(); const std::string data_layout_str = ctx.Attr("data_layout"); - const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); + const DataLayout data_layout = common::StringToDataLayout(data_layout_str); int n, c, in_d, in_h, in_w; ExtractNCDWH(input.dims(), data_layout, &n, &c, &in_d, &in_h, &in_w); @@ -1160,7 +1160,7 @@ static void Interpolate3DCUDAFwd(const framework::ExecutionContext& ctx, auto* input_data = input.data(); const std::string data_layout_str = ctx.Attr("data_layout"); - const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); + const DataLayout data_layout = common::StringToDataLayout(data_layout_str); int n, c, in_d, in_h, in_w; ExtractNCDWH(input.dims(), data_layout, &n, &c, &in_d, &in_h, &in_w); @@ -1291,7 +1291,7 @@ static void Interpolate1DCUDABwd(const framework::ExecutionContext& ctx, const phi::DenseTensor output_grad) { auto* input = ctx.Input("X"); const std::string data_layout_str = ctx.Attr("data_layout"); - const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); + const DataLayout data_layout = common::StringToDataLayout(data_layout_str); int n, c, in_d, in_h, in_w; ExtractNCDWH(input->dims(), data_layout, &n, &c, &in_d, &in_h, &in_w); @@ -1382,7 +1382,7 @@ static void Interpolate2DCUDABwd(const framework::ExecutionContext& ctx, const phi::DenseTensor output_grad) { auto* input = ctx.Input("X"); const std::string data_layout_str = ctx.Attr("data_layout"); - const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); + const DataLayout data_layout = common::StringToDataLayout(data_layout_str); int n, c, in_d, in_h, in_w; ExtractNCDWH(input->dims(), data_layout, &n, &c, &in_d, &in_h, &in_w); @@ -1528,7 +1528,7 @@ static void Interpolate3DCUDABwd(const framework::ExecutionContext& ctx, const phi::DenseTensor& output_grad) { auto* input = ctx.Input("X"); const std::string data_layout_str = ctx.Attr("data_layout"); - const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); + const DataLayout data_layout = common::StringToDataLayout(data_layout_str); int n, c, in_d, in_h, in_w; ExtractNCDWH(input->dims(), data_layout, &n, &c, &in_d, &in_h, &in_w); diff --git a/paddle/fluid/operators/interpolate_op.h b/paddle/fluid/operators/interpolate_op.h index 6272017aa0da07..31767d68b9d3c9 100644 --- a/paddle/fluid/operators/interpolate_op.h +++ b/paddle/fluid/operators/interpolate_op.h @@ -36,7 +36,7 @@ inline std::vector get_new_shape( for (size_t i = 0; i < list_new_shape_tensor.size(); ++i) { auto tensor = list_new_shape_tensor[i]; PADDLE_ENFORCE_EQ(tensor->dims(), - phi::make_ddim({1}), + common::make_ddim({1}), platform::errors::InvalidArgument( "The shape of dimension tensor should be [1]," "but received d%.", @@ -856,7 +856,7 @@ static void Interpolate1DCPUFwd(const framework::ExecutionContext& ctx, const phi::DenseTensor& input, phi::DenseTensor* output) { const std::string data_layout_str = ctx.Attr("data_layout"); - const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); + const DataLayout data_layout = common::StringToDataLayout(data_layout_str); int n, c, in_d, in_h, in_w; ExtractNCDWH(input.dims(), data_layout, &n, &c, &in_d, &in_h, &in_w); @@ -930,7 +930,7 @@ static void Interpolate2DCPUFwd(const framework::ExecutionContext& ctx, const phi::DenseTensor& input, phi::DenseTensor* output) { const std::string data_layout_str = ctx.Attr("data_layout"); - const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); + const DataLayout data_layout = common::StringToDataLayout(data_layout_str); int n, c, in_d, in_h, in_w; ExtractNCDWH(input.dims(), data_layout, &n, &c, &in_d, &in_h, &in_w); @@ -1047,7 +1047,7 @@ static void Interpolate3DCPUFwd(const framework::ExecutionContext& ctx, const phi::DenseTensor& input, phi::DenseTensor* output) { const std::string data_layout_str = ctx.Attr("data_layout"); - const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); + const DataLayout data_layout = common::StringToDataLayout(data_layout_str); int n, c, in_d, in_h, in_w; ExtractNCDWH(input.dims(), data_layout, &n, &c, &in_d, &in_h, &in_w); @@ -1160,7 +1160,7 @@ static void Interpolate1DCPUBwd(const framework::ExecutionContext& ctx, const phi::DenseTensor& output_grad) { auto* input = ctx.Input("X"); const std::string data_layout_str = ctx.Attr("data_layout"); - const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); + const DataLayout data_layout = common::StringToDataLayout(data_layout_str); int n, c, in_d, in_h, in_w; ExtractNCDWH(input->dims(), data_layout, &n, &c, &in_d, &in_h, &in_w); @@ -1234,7 +1234,7 @@ static void Interpolate2DCPUBwd(const framework::ExecutionContext& ctx, const phi::DenseTensor& output_grad) { auto* input = ctx.Input("X"); const std::string data_layout_str = ctx.Attr("data_layout"); - const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); + const DataLayout data_layout = common::StringToDataLayout(data_layout_str); int n, c, in_d, in_h, in_w; ExtractNCDWH(input->dims(), data_layout, &n, &c, &in_d, &in_h, &in_w); @@ -1345,7 +1345,7 @@ static void Interpolate3DCPUBwd(const framework::ExecutionContext& ctx, const phi::DenseTensor output_grad) { auto* input = ctx.Input("X"); const std::string data_layout_str = ctx.Attr("data_layout"); - const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); + const DataLayout data_layout = common::StringToDataLayout(data_layout_str); int n, c, in_d, in_h, in_w; ExtractNCDWH(input->dims(), data_layout, &n, &c, &in_d, &in_h, &in_w); diff --git a/paddle/fluid/operators/is_empty_op.h b/paddle/fluid/operators/is_empty_op.h index 6f020cba1944d6..3c9dfbf58fae52 100644 --- a/paddle/fluid/operators/is_empty_op.h +++ b/paddle/fluid/operators/is_empty_op.h @@ -32,7 +32,7 @@ class IsEmptyOpKernel : public framework::OpKernel { // always be allocated for CPUPlace. We reigister CUDA kernel for this op to // avoid the unnecessary data transform. output_tensor->mutable_data(platform::CPUPlace())[0] = - phi::product(input_tensor->dims()) == 0; + common::product(input_tensor->dims()) == 0; } }; diff --git a/paddle/fluid/operators/l1_norm_op.cc b/paddle/fluid/operators/l1_norm_op.cc index 92f190c0025ed9..8f0b705c8de79f 100644 --- a/paddle/fluid/operators/l1_norm_op.cc +++ b/paddle/fluid/operators/l1_norm_op.cc @@ -27,7 +27,7 @@ class L1NormOp : public framework::OperatorWithKernel { OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "L1NormOp"); OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "L1NormOp"); - ctx->SetOutputDim("Out", phi::make_ddim({})); + ctx->SetOutputDim("Out", common::make_ddim({})); } }; diff --git a/paddle/fluid/operators/layout_utils.h b/paddle/fluid/operators/layout_utils.h index 2faf47538ffa54..da49245812605e 100644 --- a/paddle/fluid/operators/layout_utils.h +++ b/paddle/fluid/operators/layout_utils.h @@ -35,31 +35,31 @@ inline void ResizeToChannelFirst(const framework::ExecutionContext& context, // input transformed_input->Resize(input->dims()); - auto in_dims_vec = phi::vectorize(input->dims()); + auto in_dims_vec = common::vectorize(input->dims()); in_dims_vec[1] = input->dims()[4]; in_dims_vec[2] = input->dims()[1]; in_dims_vec[3] = input->dims()[2]; in_dims_vec[4] = input->dims()[3]; - transformed_input->Resize(phi::make_ddim(in_dims_vec)); + transformed_input->Resize(common::make_ddim(in_dims_vec)); transformed_input->mutable_data(context.GetPlace()); } else if (dim == 2) { // input transformed_input->Resize(input->dims()); - auto in_dims_vec = phi::vectorize(input->dims()); + auto in_dims_vec = common::vectorize(input->dims()); in_dims_vec[1] = input->dims()[3]; in_dims_vec[2] = input->dims()[1]; in_dims_vec[3] = input->dims()[2]; - transformed_input->Resize(phi::make_ddim(in_dims_vec)); + transformed_input->Resize(common::make_ddim(in_dims_vec)); transformed_input->mutable_data(context.GetPlace()); } else if (dim == 1) { transformed_input->Resize(input->dims()); - auto in_dims_vec = phi::vectorize(input->dims()); + auto in_dims_vec = common::vectorize(input->dims()); in_dims_vec[1] = input->dims()[2]; in_dims_vec[2] = input->dims()[1]; - transformed_input->Resize(phi::make_ddim(in_dims_vec)); + transformed_input->Resize(common::make_ddim(in_dims_vec)); transformed_input->mutable_data(context.GetPlace()); } } @@ -73,31 +73,31 @@ inline void ResizeToChannelLast(const framework::ExecutionContext& context, // input transformed_input->Resize(input->dims()); - auto in_dims_vec = phi::vectorize(input->dims()); + auto in_dims_vec = common::vectorize(input->dims()); in_dims_vec[1] = input->dims()[2]; in_dims_vec[2] = input->dims()[3]; in_dims_vec[3] = input->dims()[4]; in_dims_vec[4] = input->dims()[1]; - transformed_input->Resize(phi::make_ddim(in_dims_vec)); + transformed_input->Resize(common::make_ddim(in_dims_vec)); transformed_input->mutable_data(context.GetPlace()); } else if (dim == 2) { // input transformed_input->Resize(input->dims()); - auto in_dims_vec = phi::vectorize(input->dims()); + auto in_dims_vec = common::vectorize(input->dims()); in_dims_vec[1] = input->dims()[2]; in_dims_vec[2] = input->dims()[3]; in_dims_vec[3] = input->dims()[1]; - transformed_input->Resize(phi::make_ddim(in_dims_vec)); + transformed_input->Resize(common::make_ddim(in_dims_vec)); transformed_input->mutable_data(context.GetPlace()); } else if (dim == 1) { transformed_input->Resize(input->dims()); - auto in_dims_vec = phi::vectorize(input->dims()); + auto in_dims_vec = common::vectorize(input->dims()); in_dims_vec[1] = input->dims()[2]; in_dims_vec[2] = input->dims()[1]; - transformed_input->Resize(phi::make_ddim(in_dims_vec)); + transformed_input->Resize(common::make_ddim(in_dims_vec)); transformed_input->mutable_data(context.GetPlace()); } } diff --git a/paddle/fluid/operators/linear_chain_crf_op.h b/paddle/fluid/operators/linear_chain_crf_op.h index 49387240625c18..9a27db56a9c31e 100644 --- a/paddle/fluid/operators/linear_chain_crf_op.h +++ b/paddle/fluid/operators/linear_chain_crf_op.h @@ -129,7 +129,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel { // Now, all the inputs and outputs should be on the CPU memory. phi::DenseTensor emission_row_max; emission_row_max.mutable_data( - phi::make_ddim({static_cast(batch_size), 1}), + common::make_ddim({static_cast(batch_size), 1}), platform::CPUPlace()); auto& place = *ctx.template device_context().eigen_device(); diff --git a/paddle/fluid/operators/lite/ut_helper.h b/paddle/fluid/operators/lite/ut_helper.h index b085f332781b0b..3d574b1f844c87 100644 --- a/paddle/fluid/operators/lite/ut_helper.h +++ b/paddle/fluid/operators/lite/ut_helper.h @@ -99,7 +99,7 @@ void CreateTensor(framework::Scope* scope, const std::vector& shape) { auto* var = scope->Var(name); auto* tensor = var->GetMutable(); - auto dims = phi::make_ddim(shape); + auto dims = common::make_ddim(shape); tensor->Resize(dims); platform::Place place = platform::CPUPlace(); RandomizeTensor(tensor, place); diff --git a/paddle/fluid/operators/lod_reset_op.h b/paddle/fluid/operators/lod_reset_op.h index a4af52472ca9cd..a468577ab9aa1f 100644 --- a/paddle/fluid/operators/lod_reset_op.h +++ b/paddle/fluid/operators/lod_reset_op.h @@ -96,7 +96,7 @@ class LoDResetKernel : public framework::OpKernel { "The last value of 'Target LoD''s last level LoD should be equal " "to the first dimension of Input(X). But received the 'Target LoD' " "is %s, Input(X)'s shape is %s.", - phi::make_ddim(level0), + common::make_ddim(level0), in->dims())); for (size_t i = 0; i < level0.size() - 1; ++i) { PADDLE_ENFORCE_GE(level0[i + 1], @@ -104,7 +104,7 @@ class LoDResetKernel : public framework::OpKernel { platform::errors::InvalidArgument( "'Target LoD' should be an ascending " "vector. But received the Target LoD is %s.", - phi::make_ddim(level0))); + common::make_ddim(level0))); } // cast level0 to size_t diff --git a/paddle/fluid/operators/lookup_table_dequant_op.cc b/paddle/fluid/operators/lookup_table_dequant_op.cc index f9258f9e0185cd..93826aab0d5739 100644 --- a/paddle/fluid/operators/lookup_table_dequant_op.cc +++ b/paddle/fluid/operators/lookup_table_dequant_op.cc @@ -66,7 +66,7 @@ class LookupTableDequantOp : public framework::OperatorWithKernel { ids_dims)); auto output_dims = - phi::vectorize(phi::slice_ddim(ids_dims, 0, ids_rank - 1)); + common::vectorize(common::slice_ddim(ids_dims, 0, ids_rank - 1)); PADDLE_ENFORCE_GE(table_dims[1], 2, platform::errors::InvalidArgument( @@ -76,7 +76,7 @@ class LookupTableDequantOp : public framework::OperatorWithKernel { table_dims)); output_dims.push_back((table_dims[1] - 2) * 4); - ctx->SetOutputDim("Out", phi::make_ddim(output_dims)); + ctx->SetOutputDim("Out", common::make_ddim(output_dims)); if (ctx->GetOutputsVarType("Out")[0] == framework::proto::VarType::LOD_TENSOR) { diff --git a/paddle/fluid/operators/lookup_table_op.cc b/paddle/fluid/operators/lookup_table_op.cc index 6bb9f9ee19e42c..a8185691c45aae 100644 --- a/paddle/fluid/operators/lookup_table_op.cc +++ b/paddle/fluid/operators/lookup_table_op.cc @@ -56,9 +56,9 @@ class LookupTableOp : public framework::OperatorWithKernel { ids_dims)); auto output_dims = - phi::vectorize(phi::slice_ddim(ids_dims, 0, ids_rank - 1)); + common::vectorize(common::slice_ddim(ids_dims, 0, ids_rank - 1)); output_dims.push_back(table_dims[1]); - ctx->SetOutputDim("Out", phi::make_ddim(output_dims)); + ctx->SetOutputDim("Out", common::make_ddim(output_dims)); if (ctx->GetOutputsVarType("Out")[0] == framework::proto::VarType::LOD_TENSOR) { diff --git a/paddle/fluid/operators/lookup_table_op.cu b/paddle/fluid/operators/lookup_table_op.cu index 32946d65785a97..ba8af995429a39 100644 --- a/paddle/fluid/operators/lookup_table_op.cu +++ b/paddle/fluid/operators/lookup_table_op.cu @@ -192,7 +192,7 @@ class LookupTableGradCUDAKernel : public framework::OpKernel { auto *d_output_data = d_output->data(); auto d_output_dims = d_output->dims(); auto d_output_dims_2d = - phi::flatten_to_2d(d_output_dims, d_output_dims.size() - 1); + common::flatten_to_2d(d_output_dims, d_output_dims.size() - 1); PADDLE_ENFORCE_EQ(d_table_value->dims(), d_output_dims_2d, platform::errors::InvalidArgument( diff --git a/paddle/fluid/operators/lookup_table_op.h b/paddle/fluid/operators/lookup_table_op.h index b467428eeafd3e..21f0bf6a957aea 100644 --- a/paddle/fluid/operators/lookup_table_op.h +++ b/paddle/fluid/operators/lookup_table_op.h @@ -207,7 +207,7 @@ class LookupTableGradKernel : public framework::OpKernel { auto d_output_dims = d_output->dims(); auto d_output_dims_2d = - phi::flatten_to_2d(d_output_dims, d_output_dims.size() - 1); + common::flatten_to_2d(d_output_dims, d_output_dims.size() - 1); PADDLE_ENFORCE_EQ(d_table_value->dims(), d_output_dims_2d, platform::errors::InvalidArgument( diff --git a/paddle/fluid/operators/lookup_table_v2_op.cu b/paddle/fluid/operators/lookup_table_v2_op.cu index 11c35293ebe345..edd8b20da160c5 100644 --- a/paddle/fluid/operators/lookup_table_v2_op.cu +++ b/paddle/fluid/operators/lookup_table_v2_op.cu @@ -187,7 +187,7 @@ struct LookupTableV2GradCUDAFunctor { auto *d_output_data = d_output->template data(); auto d_output_dims = d_output->dims(); auto d_output_dims_2d = - phi::flatten_to_2d(d_output_dims, d_output_dims.size() - 1); + common::flatten_to_2d(d_output_dims, d_output_dims.size() - 1); PADDLE_ENFORCE_EQ(d_table_value->dims(), d_output_dims_2d, platform::errors::InvalidArgument( diff --git a/paddle/fluid/operators/lookup_table_v2_op.h b/paddle/fluid/operators/lookup_table_v2_op.h index 52c93f26b7e8a8..82dbac8b21dfc2 100644 --- a/paddle/fluid/operators/lookup_table_v2_op.h +++ b/paddle/fluid/operators/lookup_table_v2_op.h @@ -206,7 +206,7 @@ struct LookupTableV2GradCPUFunctor { auto d_output_dims = d_output->dims(); auto d_output_dims_2d = - phi::flatten_to_2d(d_output_dims, d_output_dims.size() - 1); + common::flatten_to_2d(d_output_dims, d_output_dims.size() - 1); PADDLE_ENFORCE_EQ(d_table_value->dims(), d_output_dims_2d, platform::errors::InvalidArgument( diff --git a/paddle/fluid/operators/lrn_op.cc b/paddle/fluid/operators/lrn_op.cc index a1e328fce5942c..bf4c72a2133b69 100644 --- a/paddle/fluid/operators/lrn_op.cc +++ b/paddle/fluid/operators/lrn_op.cc @@ -54,9 +54,9 @@ struct LRNFunctor { auto in_dims = input.dims(); std::vector shape( {in_dims[0], in_dims[3], in_dims[1], in_dims[2]}); - in_transpose.mutable_data(phi::make_ddim(shape), place); - mid_transpose.mutable_data(phi::make_ddim(shape), place); - out_transpose.mutable_data(phi::make_ddim(shape), place); + in_transpose.mutable_data(common::make_ddim(shape), place); + mid_transpose.mutable_data(common::make_ddim(shape), place); + out_transpose.mutable_data(common::make_ddim(shape), place); std::vector axis = {0, 3, 1, 2}; transpose(dev_ctx, input, &in_transpose, axis); } else { @@ -238,7 +238,7 @@ class LRNOp : public framework::OperatorWithKernel { auto attrs = Attrs(); auto ar = paddle::framework::AttrReader(attrs); const std::string data_format = ar.Get("data_format"); - auto dl = phi::StringToDataLayout(data_format); + auto dl = common::StringToDataLayout(data_format); // Some models may have intentionally set "AnyLayout" for lrn // op. Treat this as NCHW (default data_format value) if (dl != phi::DataLayout::kAnyLayout) { @@ -361,7 +361,7 @@ class LRNOpGrad : public framework::OperatorWithKernel { auto attrs = Attrs(); auto ar = paddle::framework::AttrReader(attrs); const std::string data_format = ar.Get("data_format"); - auto dl = phi::StringToDataLayout(data_format); + auto dl = common::StringToDataLayout(data_format); // Some models may have intentionally set "AnyLayout" for lrn // op. Treat this as NCHW (default data_format value) if (dl != phi::DataLayout::kAnyLayout) { diff --git a/paddle/fluid/operators/lrn_op.h b/paddle/fluid/operators/lrn_op.h index 15ebb4df74f47d..4d1cc268d48b6f 100644 --- a/paddle/fluid/operators/lrn_op.h +++ b/paddle/fluid/operators/lrn_op.h @@ -56,7 +56,7 @@ class LRNKernel : public framework::OpKernel { const std::string data_layout_str = ctx.Attr("data_format"); const phi::DataLayout data_layout = - phi::StringToDataLayout(data_layout_str); + common::StringToDataLayout(data_layout_str); // NCHW int N = x_dims[0]; int C = (data_layout != DataLayout::kNHWC ? x_dims[1] : x_dims[3]); @@ -147,7 +147,7 @@ class LRNGradKernel : public framework::OpKernel { const phi::DenseTensor& mid = *ctx.Input("MidOut"); const std::string data_layout_str = ctx.Attr("data_format"); const phi::DataLayout data_layout = - phi::StringToDataLayout(data_layout_str); + common::StringToDataLayout(data_layout_str); auto x_g = ctx.Output(framework::GradVarName("X")); x_g->mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/match_matrix_tensor_op.cc b/paddle/fluid/operators/match_matrix_tensor_op.cc index 271a027c456236..7055f3ca95efe0 100644 --- a/paddle/fluid/operators/match_matrix_tensor_op.cc +++ b/paddle/fluid/operators/match_matrix_tensor_op.cc @@ -182,8 +182,8 @@ void MatchMatrixTensorOP::InferShape(framework::InferShapeContext* ctx) const { out_dims_vec.push_back(1); std::vector tmp_dims_vec{tmp_dim_0}; tmp_dims_vec.push_back(1); - ctx->SetOutputDim("Out", phi::make_ddim(out_dims_vec)); - ctx->SetOutputDim("Tmp", phi::make_ddim(tmp_dims_vec)); + ctx->SetOutputDim("Out", common::make_ddim(out_dims_vec)); + ctx->SetOutputDim("Tmp", common::make_ddim(tmp_dims_vec)); } void MatchMatrixTensorOpGrad::InferShape( diff --git a/paddle/fluid/operators/math/CMakeLists.txt b/paddle/fluid/operators/math/CMakeLists.txt index af14333b9d1ea0..0e0423bd64ff45 100644 --- a/paddle/fluid/operators/math/CMakeLists.txt +++ b/paddle/fluid/operators/math/CMakeLists.txt @@ -6,20 +6,20 @@ if(WITH_XPU) endif() # please add new math_library in alphabetical order -math_library(concat_and_split DEPS phi) -math_library(context_project DEPS phi) +math_library(concat_and_split DEPS phi common) +math_library(context_project DEPS phi common) math_library(cos_sim_functor) math_library(depthwise_conv) math_library(sample_prob) -math_library(sampler DEPS phi) +math_library(sampler DEPS phi common) if(WITH_XPU) - math_library(beam_search DEPS phi beam_search_xpu) + math_library(beam_search DEPS phi common beam_search_xpu) else() - math_library(beam_search DEPS phi) + math_library(beam_search DEPS phi common) endif() math_library(unpooling) math_library(prelu) math_library(bert_encoder_functor) -math_library(tree2col DEPS phi) +math_library(tree2col DEPS phi common) diff --git a/paddle/fluid/operators/math/beam_search.cc b/paddle/fluid/operators/math/beam_search.cc index be8734076da3b4..aeff6c394c429a 100644 --- a/paddle/fluid/operators/math/beam_search.cc +++ b/paddle/fluid/operators/math/beam_search.cc @@ -67,7 +67,7 @@ class BeamSearchFunctor { 0, [](size_t a, std::vector &b) { return a + b.size(); }); // the output tensor shape should be [num_instances, 1] - auto dims = phi::make_ddim( + auto dims = common::make_ddim( std::vector({static_cast(num_instances), 1})); auto *selected_ids_data = selected_ids->mutable_data(dims, platform::CPUPlace()); diff --git a/paddle/fluid/operators/math/beam_search.cu b/paddle/fluid/operators/math/beam_search.cu index bd8e905389e812..098f40ab526b10 100644 --- a/paddle/fluid/operators/math/beam_search.cu +++ b/paddle/fluid/operators/math/beam_search.cu @@ -432,7 +432,7 @@ class BeamSearchFunctor { // Reserve a big enough memory. auto selected_dims = - phi::make_ddim({static_cast(num_seqs * beam_size), 1}); + common::make_ddim({static_cast(num_seqs * beam_size), 1}); int64_t* selected_ids_data = selected_ids->mutable_data(selected_dims, context.GetPlace()); float* selected_scores_data = @@ -521,7 +521,7 @@ class BeamSearchFunctor { selected_scores->set_lod(selected_lod); if (selected_lod[1].back() < num_seqs * beam_size) { auto final_selected_dims = - phi::make_ddim({static_cast(selected_lod[1].back()), 1}); + common::make_ddim({static_cast(selected_lod[1].back()), 1}); selected_ids->Resize(final_selected_dims); selected_scores->Resize(final_selected_dims); if (parent_idx) { diff --git a/paddle/fluid/operators/math/beam_search_xpu.cc b/paddle/fluid/operators/math/beam_search_xpu.cc index 5451b5a3138896..4ac0e3d886017a 100644 --- a/paddle/fluid/operators/math/beam_search_xpu.cc +++ b/paddle/fluid/operators/math/beam_search_xpu.cc @@ -92,7 +92,7 @@ class BeamSearchFunctor { 0, [](size_t a, std::vector &b) { return a + b.size(); }); // the output tensor shape should be [num_instances, 1] - auto dims = phi::make_ddim( + auto dims = common::make_ddim( std::vector({static_cast(num_instances), 1})); auto *selected_ids_data = selected_ids->mutable_data(dims, platform::CPUPlace()); diff --git a/paddle/fluid/operators/math/context_project.h b/paddle/fluid/operators/math/context_project.h index 0b6dc510f477fa..20211160b7e5ed 100644 --- a/paddle/fluid/operators/math/context_project.h +++ b/paddle/fluid/operators/math/context_project.h @@ -130,13 +130,13 @@ class ContextProjectFunctor { context_length, sequence_width}); // output_height, output_width, // input_channels, filter_height, filter_width - out_t.Resize(phi::make_ddim(output_shape)); + out_t.Resize(common::make_ddim(output_shape)); std::vector input_shape( {1, input_row_end - input_row_begin, sequence_width}); // input_channels, input_height, input_width - in_t.Resize(phi::make_ddim(input_shape)); + in_t.Resize(common::make_ddim(input_shape)); im2col_ocf(context, in_t, dilation, stride, padding, &out_t); out_t.Resize({sequence_height, context_length * sequence_width}); } @@ -265,13 +265,13 @@ class ContextProjectGradFunctor { context_length, sequence_width}); // output_height, output_width, // input_channels, filter_height, filter_width - out_t.Resize(phi::make_ddim(output_shape)); + out_t.Resize(common::make_ddim(output_shape)); std::vector input_shape( {1, input_row_end - input_row_begin, sequence_width}); // input_channels, input_height, input_width - in_t.Resize(phi::make_ddim(input_shape)); + in_t.Resize(common::make_ddim(input_shape)); col2im_ocf(context, out_t, dilation, stride, padding, &in_t); out_t.Resize({sequence_height, context_length * sequence_width}); diff --git a/paddle/fluid/operators/math/eigen_values_vectors.h b/paddle/fluid/operators/math/eigen_values_vectors.h index f4198acfd830c7..8d6b0b99f9d528 100644 --- a/paddle/fluid/operators/math/eigen_values_vectors.h +++ b/paddle/fluid/operators/math/eigen_values_vectors.h @@ -132,13 +132,13 @@ struct MatrixEighFunctor { framework::TransToProtoVarType(input.dtype()))) { lrwork = std::max(1, static_cast(rwork_opt)); rwork_data = rwork_tensor.mutable_data( - phi::make_ddim({lrwork}), ctx.GetPlace()); + common::make_ddim({lrwork}), ctx.GetPlace()); } phi::DenseTensor iwork_tensor, work_tensor; - auto *iwork_data = iwork_tensor.mutable_data(phi::make_ddim({liwork}), - ctx.GetPlace()); + auto *iwork_data = iwork_tensor.mutable_data( + common::make_ddim({liwork}), ctx.GetPlace()); auto *work_data = - work_tensor.mutable_data(phi::make_ddim({lwork}), ctx.GetPlace()); + work_tensor.mutable_data(common::make_ddim({lwork}), ctx.GetPlace()); for (auto i = 0; i < batch_size; i++) { auto *value_data = out_value + i * values_stride; diff --git a/paddle/fluid/operators/math/sample_prob.cu b/paddle/fluid/operators/math/sample_prob.cu index 0c6b49729546cd..bf028c4ada3695 100644 --- a/paddle/fluid/operators/math/sample_prob.cu +++ b/paddle/fluid/operators/math/sample_prob.cu @@ -19,12 +19,12 @@ limitations under the License. */ #include #include +#include "paddle/common/ddim.h" #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/operators/math/sample_prob.h" #include "paddle/fluid/operators/math/sampler.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/kernels/funcs/math_function.h" namespace paddle { diff --git a/paddle/fluid/operators/math/sample_prob.h b/paddle/fluid/operators/math/sample_prob.h index 7c60be68415520..524ba826a57047 100644 --- a/paddle/fluid/operators/math/sample_prob.h +++ b/paddle/fluid/operators/math/sample_prob.h @@ -17,11 +17,11 @@ limitations under the License. */ #include #include +#include "paddle/common/ddim.h" #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/operators/math/sampler.h" #include "paddle/phi/backends/gpu/gpu_context.h" -#include "paddle/phi/core/ddim.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/matmul_op.cc b/paddle/fluid/operators/matmul_op.cc index df66ab400f40bf..895a427bae6e20 100644 --- a/paddle/fluid/operators/matmul_op.cc +++ b/paddle/fluid/operators/matmul_op.cc @@ -40,7 +40,7 @@ static framework::DDim RowMatrixFromVector(const framework::DDim &x_dim) { if (x_dim.size() > 1) { return x_dim; } - return phi::make_ddim({1, x_dim[0]}); + return common::make_ddim({1, x_dim[0]}); } /** @@ -51,7 +51,7 @@ static framework::DDim ColumnMatrixFromVector(const framework::DDim &y_dim) { if (y_dim.size() > 1) { return y_dim; } - return phi::make_ddim({y_dim[0], 1}); + return common::make_ddim({y_dim[0], 1}); } #if defined(PADDLE_WITH_CUDA) && CUDA_VERSION >= 11060 @@ -676,11 +676,11 @@ class MatMulOp : public framework::OperatorWithKernel { std::vector dim_out; if (mat_dim_x.batch_size_ != 0) { - dim_out = phi::vectorize(dim_x); + dim_out = common::vectorize(dim_x); dim_out[dim_out.size() - 2] = mat_dim_x.height_; dim_out[dim_out.size() - 1] = dim_out_y; } else if (mat_dim_y.batch_size_ != 0) { - dim_out = phi::vectorize(dim_y); + dim_out = common::vectorize(dim_y); dim_out[dim_out.size() - 2] = mat_dim_x.height_; dim_out[dim_out.size() - 1] = dim_out_y; } else { @@ -696,7 +696,7 @@ class MatMulOp : public framework::OperatorWithKernel { dim_out.resize(dim_out.size() - 1); } - phi::DDim ddim_out = phi::make_ddim(dim_out); + phi::DDim ddim_out = common::make_ddim(dim_out); context->SetOutputDim("Out", ddim_out); context->ShareLoD("X", "Out"); diff --git a/paddle/fluid/operators/merge_lod_tensor_op.cc b/paddle/fluid/operators/merge_lod_tensor_op.cc index 3f0fd7bfef2dcc..3ed27460e16b6c 100644 --- a/paddle/fluid/operators/merge_lod_tensor_op.cc +++ b/paddle/fluid/operators/merge_lod_tensor_op.cc @@ -86,16 +86,16 @@ class MergeLoDTensorOp : public framework::OperatorBase { framework::DDim in_dims; if (in_true.IsInitialized()) { rank = in_true.dims().size(); - in_dims = phi::slice_ddim(in_true.dims(), 1, rank); + in_dims = common::slice_ddim(in_true.dims(), 1, rank); } else { rank = in_false.dims().size(); - in_dims = phi::slice_ddim(in_false.dims(), 1, rank); + in_dims = common::slice_ddim(in_false.dims(), 1, rank); } - auto in_dim_vec = phi::vectorize(in_dims); + auto in_dim_vec = common::vectorize(in_dims); in_dim_vec.insert(in_dim_vec.begin(), batch_size); - framework::DDim out_dims = phi::make_ddim(in_dim_vec); + framework::DDim out_dims = common::make_ddim(in_dim_vec); out->Resize(out_dims); out->mutable_data(place, data_type); diff --git a/paddle/fluid/operators/metrics/precision_recall_op.cc b/paddle/fluid/operators/metrics/precision_recall_op.cc index 413cd8546011be..63385cb59171fa 100644 --- a/paddle/fluid/operators/metrics/precision_recall_op.cc +++ b/paddle/fluid/operators/metrics/precision_recall_op.cc @@ -106,7 +106,7 @@ class PrecisionRecallOp : public framework::OperatorWithKernel { if (ctx->IsRuntime()) { PADDLE_ENFORCE_EQ( weights_dims, - phi::make_ddim({max_probs_dims[0], 1}), + common::make_ddim({max_probs_dims[0], 1}), platform::errors::InvalidArgument( "The shape of PrecisionRecallOp Input(Weights) should be " "[batch_size, 1]. But the shape we received is [%d, %d]", @@ -120,7 +120,7 @@ class PrecisionRecallOp : public framework::OperatorWithKernel { if (ctx->IsRuntime()) { PADDLE_ENFORCE_EQ( states_dims, - phi::make_ddim({cls_num, 4}), + common::make_ddim({cls_num, 4}), platform::errors::InvalidArgument( "The shape of PrecisionRecallOp Input(StatesInfo) should be " "[class_number, 4]. But the shape we received is [%d, %d]", diff --git a/paddle/fluid/operators/minus_op.cc b/paddle/fluid/operators/minus_op.cc index 8c33a5da1baff9..64bc176d971492 100644 --- a/paddle/fluid/operators/minus_op.cc +++ b/paddle/fluid/operators/minus_op.cc @@ -48,7 +48,7 @@ class MinusOp : public framework::OperatorWithKernel { auto y_dims = ctx->GetInputDim("Y"); if (ctx->IsRuntime() || - (phi::product(x_dims) > 0 && phi::product(y_dims) > 0)) { + (common::product(x_dims) > 0 && common::product(y_dims) > 0)) { PADDLE_ENFORCE_EQ( x_dims, y_dims, diff --git a/paddle/fluid/operators/mkldnn/interpolate_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/interpolate_mkldnn_op.cc index c0a68fe126c27b..34e9679b29bb64 100644 --- a/paddle/fluid/operators/mkldnn/interpolate_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/interpolate_mkldnn_op.cc @@ -33,7 +33,7 @@ class InterpolateOneDNNHandler phi::DenseTensor* out) : phi::funcs::OneDNNHandlerNoCachingT( engine, cpu_place) { - const auto dst_tz = phi::vectorize(out->dims()); + const auto dst_tz = common::vectorize(out->dims()); const auto dst_md = memory::desc( dst_tz, phi::funcs::OneDNNGetDataType(), OneDNNMemoryFormat::any); this->AcquireForwardPrimitiveDescriptor( @@ -49,7 +49,7 @@ class InterpolateOneDNNKernel : public framework::OpKernel { const auto& in_dims = x->dims(); const framework::DDim in_dhw_dims = - phi::slice_ddim(in_dims, 2, in_dims.size()); + common::slice_ddim(in_dims, 2, in_dims.size()); std::vector out_dims; out_dims.reserve(5); @@ -102,7 +102,7 @@ class InterpolateOneDNNKernel : public framework::OpKernel { if (scale.size() == 3 && scale[0] > 0.0f && scale[1] > 0.0f && scale[2] > 0.0f) { int j = 0; - std::vector in_dhw_vec = phi::vectorize(in_dhw_dims); + std::vector in_dhw_vec = common::vectorize(in_dhw_dims); std::transform( in_dhw_vec.begin(), in_dhw_vec.end(), @@ -138,7 +138,7 @@ class InterpolateOneDNNKernel : public framework::OpKernel { : dnnl::algorithm::resampling_linear; const auto out_dims_vec = ComputeOutputShape(ctx); - framework::DDim dim_out = phi::make_ddim(out_dims_vec); + framework::DDim dim_out = common::make_ddim(out_dims_vec); out->Resize(dim_out); InterpolateOneDNNHandler handler( diff --git a/paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc index 497a7186b537de..d2b715a5f56e6a 100644 --- a/paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc @@ -87,7 +87,7 @@ class LayerNormMKLDNNOpKernel : public paddle::framework::OpKernel { auto& dev_ctx = ctx.template device_context(); const auto& onednn_engine = dev_ctx.GetEngine(); - auto src_tz = phi::vectorize(x->dims()); + auto src_tz = common::vectorize(x->dims()); PADDLE_ENFORCE_EQ(begin_norm_axis, (src_tz.size() - 1), platform::errors::InvalidArgument( diff --git a/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc index a1dad17392a22e..80af1b00b743cc 100644 --- a/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc @@ -18,11 +18,11 @@ limitations under the License. */ #include "paddle/phi/kernels/funcs/blas/blas.h" namespace { +using common::vectorize; using dnnl::memory; using paddle::framework::ExecutionContext; using paddle::framework::GradVarName; using phi::OneDNNContext; -using phi::vectorize; using phi::funcs::OneDNNGetDataType; // Reshape a rank-3 tensor from P x M x N to (P * M) x N. @@ -467,7 +467,7 @@ class MatMulMKLDNNKernel : public paddle::framework::OpKernel { (*y_bd_dims)[i])); (out_dims)[i] = std::max((*x_bd_dims)[i], (*y_bd_dims)[i]); } - out->Resize(phi::make_ddim((out_dims))); + out->Resize(common::make_ddim((out_dims))); } } }; diff --git a/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc index 48e8e12ccab142..63b373be5ad156 100644 --- a/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc @@ -49,7 +49,7 @@ class QuantOpKernel : public framework::OpKernel { auto& dev_ctx = ctx.template device_context(); - auto x_tz = phi::vectorize(x->dims()); + auto x_tz = common::vectorize(x->dims()); const bool is_negative_input = ctx.Attr("is_negative_input"); const bool bfloat16 = ctx.Attr("bfloat16"); diff --git a/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc index e41baa39b47c58..99e25f98ce3598 100644 --- a/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc @@ -62,7 +62,7 @@ class ReQuantOpKernel : public framework::OpKernel { auto& dev_ctx = ctx.template device_context(); - auto src_tz = phi::vectorize(input->dims()); + auto src_tz = common::vectorize(input->dims()); auto src_paddle_dt = input->dtype(); auto dst_paddle_dt = with_shift ? DataType::UINT8 : src_paddle_dt; diff --git a/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc index d1bbfe42293724..5e5e2f8c19abe1 100644 --- a/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc @@ -38,7 +38,7 @@ static std::vector extract_shape( for (const auto& tensor : list_new_shape_tensor) { PADDLE_ENFORCE_EQ( tensor->dims(), - phi::make_ddim({1}), + common::make_ddim({1}), platform::errors::InvalidArgument( "If the element type of 'shape' in ReshapeOp is phi::DenseTensor, " "the element's shape must be [1]. But received the element's shape " @@ -68,7 +68,7 @@ class ReshapeMKLDNNKernel : public framework::OpKernel { framework::DDim x_dims, out_dims; InferInOutShape(ctx, x_dims, out_dims); - auto x_vec_dims = phi::vectorize(x_dims); + auto x_vec_dims = common::vectorize(x_dims); auto x_type = phi::funcs ::ToOneDNNDataType(x->dtype()); phi::funcs::ReorderOneDNNHandler reorder_handler( @@ -89,7 +89,7 @@ class ReshapeMKLDNNKernel : public framework::OpKernel { astream.wait(); out->Resize(out_dims); - auto reshape_dims = out_dims.size() != 0 ? phi::vectorize(out_dims) + auto reshape_dims = out_dims.size() != 0 ? common::vectorize(out_dims) : std::vector{1}; out->set_mem_desc(reorder_dst_memory_p->get_desc().reshape(reshape_dims)); } @@ -160,15 +160,15 @@ class ReshapeMKLDNNKernel : public framework::OpKernel { auto x = ctx.Input("X"); x_dims = x->dims(); auto axes = ctx.Attr("axis"); - out_dims = phi::make_ddim( + out_dims = common::make_ddim( Flatten2Kernel::GetOutputShape(axes, x_dims)); } protected: static framework::DDim ValidateShape(const std::vector& shape, const framework::DDim& in_dims) { - const int64_t in_size = phi::product(in_dims); - auto in_dims_vec = phi::vectorize(in_dims); + const int64_t in_size = common::product(in_dims); + auto in_dims_vec = common::vectorize(in_dims); bool all_positive = std::all_of(in_dims_vec.cbegin(), in_dims_vec.cend(), [](int64_t i) { return i > 0; }); @@ -188,7 +188,7 @@ class ReshapeMKLDNNKernel : public framework::OpKernel { platform::errors::InvalidArgument( "Only one dimension value of 'shape' in ReshapeOp can " "be -1. But received shape = [%s], shape[%d] is also -1.", - phi::make_ddim(shape), + common::make_ddim(shape), i)); unk_dim_idx = i; } else if (shape[i] == copy_dim_val) { @@ -200,7 +200,7 @@ class ReshapeMKLDNNKernel : public framework::OpKernel { "the input tensor X's dimensions. " "But received shape = [%s], shape[%d] = 0, X's shape = [%s], " "X's dimensions = %d.", - phi::make_ddim(shape), + common::make_ddim(shape), i, in_dims, in_dims.size())); @@ -212,7 +212,7 @@ class ReshapeMKLDNNKernel : public framework::OpKernel { "Each dimension value of 'shape' in ReshapeOp must not " "be negative except one unknown dimension. " "But received shape = [%s], shape[%d] = %d.", - phi::make_ddim(shape), + common::make_ddim(shape), i, shape[i])); } @@ -240,7 +240,7 @@ class ReshapeMKLDNNKernel : public framework::OpKernel { "'shape' is [%s], known capacity of 'shape' is %d.", in_dims, in_size, - phi::make_ddim(shape), + common::make_ddim(shape), capacity)); } else { output_shape[unk_dim_idx] = -1; @@ -258,11 +258,11 @@ class ReshapeMKLDNNKernel : public framework::OpKernel { "[%s], the capacity of 'shape' is %d.", in_dims, in_size, - phi::make_ddim(shape), + common::make_ddim(shape), capacity)); } } - return phi::make_ddim(output_shape); + return common::make_ddim(output_shape); } }; @@ -284,8 +284,9 @@ class ReshapeGradMKLDNNKernel : public ReshapeMKLDNNKernel { framework::DDim dx_dims; InferOutputShapeInGrad(ctx, dx_dims); - auto dout_vec_dims = dout->dims().size() != 0 ? phi::vectorize(dout->dims()) - : std::vector{1}; + auto dout_vec_dims = dout->dims().size() != 0 + ? common::vectorize(dout->dims()) + : std::vector{1}; auto dout_type = phi::funcs::ToOneDNNDataType(dout->dtype()); phi::funcs::ReorderOneDNNHandler reorder_handler( @@ -305,8 +306,8 @@ class ReshapeGradMKLDNNKernel : public ReshapeMKLDNNKernel { astream.wait(); dx->Resize(dx_dims); - const auto reshape_dims = - dx_dims.size() != 0 ? phi::vectorize(dx_dims) : std::vector{1}; + const auto reshape_dims = dx_dims.size() != 0 ? common::vectorize(dx_dims) + : std::vector{1}; reorder_dst_memory_p->get_desc().reshape(reshape_dims); } @@ -345,7 +346,7 @@ class ReshapeGradMKLDNNKernel : public ReshapeMKLDNNKernel { const framework::ExecutionContext& ctx, framework::DDim& dx_dims) const { // NOLINT auto xshape_dims = ctx.Input("XShape")->dims(); - dx_dims = phi::slice_ddim(xshape_dims, 1, xshape_dims.size()); + dx_dims = common::slice_ddim(xshape_dims, 1, xshape_dims.size()); } void InferShapeFlattenGradOp(const framework::ExecutionContext& ctx, diff --git a/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc index ee6712df4734bd..f3ab0e8459cc48 100644 --- a/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc @@ -44,7 +44,7 @@ class TransposeMKLDNNOpKernel : public paddle::framework::OpKernel { return; } - auto x_vec_dims = phi::vectorize(x->dims()); + auto x_vec_dims = common::vectorize(x->dims()); auto x_type = phi::funcs::ToOneDNNDataType(x->dtype()); phi::funcs::ReorderOneDNNHandler reorder_handler( @@ -105,7 +105,7 @@ class TransposeMKLDNNGradOpKernel : public paddle::framework::OpKernel { return; } - auto dout_vec_dims = phi::vectorize(dout->dims()); + auto dout_vec_dims = common::vectorize(dout->dims()); auto dout_type = phi::funcs::ToOneDNNDataType(dout->dtype()); phi::funcs::ReorderOneDNNHandler reorder_handler( diff --git a/paddle/fluid/operators/modified_huber_loss_op.cc b/paddle/fluid/operators/modified_huber_loss_op.cc index b44c795b6e5368..c6d553865277ed 100644 --- a/paddle/fluid/operators/modified_huber_loss_op.cc +++ b/paddle/fluid/operators/modified_huber_loss_op.cc @@ -38,7 +38,7 @@ class ModifiedHuberLossOp : public framework::OperatorWithKernel { x_dims.size())); if (ctx->IsRuntime() || - (phi::product(x_dims) > 0 && phi::product(y_dims) > 0)) { + (common::product(x_dims) > 0 && common::product(y_dims) > 0)) { PADDLE_ENFORCE_EQ( x_dims, y_dims, diff --git a/paddle/fluid/operators/modified_huber_loss_op.cu b/paddle/fluid/operators/modified_huber_loss_op.cu index f811b0ad9d6d64..d063e8d1cb4d57 100644 --- a/paddle/fluid/operators/modified_huber_loss_op.cu +++ b/paddle/fluid/operators/modified_huber_loss_op.cu @@ -49,7 +49,7 @@ class ModifiedHuberLossGradGPUKernel : public framework::OpKernel { auto* out0 = context.Output(framework::GradVarName("X")); if (out0) { - auto counts = phi::product(in1->dims()); + auto counts = common::product(in1->dims()); auto y_ptr = thrust::device_pointer_cast(in0->data()); auto inter_val_ptr = thrust::device_pointer_cast(in1->data()); auto out_grad_ptr = thrust::device_pointer_cast(in2->data()); diff --git a/paddle/fluid/operators/modified_huber_loss_op.h b/paddle/fluid/operators/modified_huber_loss_op.h index 571482ce475886..4330abde2a828a 100644 --- a/paddle/fluid/operators/modified_huber_loss_op.h +++ b/paddle/fluid/operators/modified_huber_loss_op.h @@ -92,7 +92,7 @@ class ModifiedHuberLossGradCPUKernel : public framework::OpKernel { const T* y_ptr = in0->data(); const T* inter_val_ptr = in1->data(); const T* out_grad_ptr = in2->data(); - size_t counts = static_cast(phi::product(in1->dims())); + size_t counts = static_cast(common::product(in1->dims())); T* x_grad_ptr = out0->mutable_data(context.GetPlace()); for (size_t i = 0; i < counts; ++i) { if (inter_val_ptr[i] < -1) { diff --git a/paddle/fluid/operators/nccl/nccl_op.cu.cc b/paddle/fluid/operators/nccl/nccl_op.cu.cc index 7dae16afafdf11..abb24cc8cae10d 100644 --- a/paddle/fluid/operators/nccl/nccl_op.cu.cc +++ b/paddle/fluid/operators/nccl/nccl_op.cu.cc @@ -110,7 +110,7 @@ class NCCLReduceKernel : public framework::OpKernel { if (root == gpu_id) { recvbuffer = out->mutable_data(ctx.GetPlace()); } else { - out->Resize(phi::make_ddim({0})); + out->Resize(common::make_ddim({0})); } VLOG(3) << "gpu : " << gpu_id << " invoke reduce. send " << x->numel() << " recv " << out->numel(); @@ -155,7 +155,7 @@ class NCCLBcastKernel : public framework::OpKernel { } else { auto* out = ctx.Output("Out"); VLOG(3) << "gpu : " << gpu_id << " invoke Bcast. recv buffer " - << phi::product(out->dims()); + << common::product(out->dims()); PADDLE_ENFORCE_GPU_SUCCESS( platform::dynload::ncclBcast(out->mutable_data(ctx.GetPlace()), out->numel(), diff --git a/paddle/fluid/operators/nce_op.cc b/paddle/fluid/operators/nce_op.cc index 477d5aea7e8839..f4320cd0b6796e 100644 --- a/paddle/fluid/operators/nce_op.cc +++ b/paddle/fluid/operators/nce_op.cc @@ -100,7 +100,7 @@ class NCEOp : public framework::OperatorWithKernel { std::vector out_dims; out_dims.push_back(x_dims[0]); out_dims.push_back(1); - ctx->SetOutputDim("Cost", phi::make_ddim(out_dims)); + ctx->SetOutputDim("Cost", common::make_ddim(out_dims)); if (!is_test) { // set dims of output(SampleOut) @@ -108,8 +108,8 @@ class NCEOp : public framework::OperatorWithKernel { sample_out_dims.push_back(x_dims[0]); sample_out_dims.push_back( (num_true_classes == -1) ? -1 : (num_neg_samples + num_true_classes)); - ctx->SetOutputDim("SampleLogits", phi::make_ddim(sample_out_dims)); - ctx->SetOutputDim("SampleLabels", phi::make_ddim(sample_out_dims)); + ctx->SetOutputDim("SampleLogits", common::make_ddim(sample_out_dims)); + ctx->SetOutputDim("SampleLabels", common::make_ddim(sample_out_dims)); } } diff --git a/paddle/fluid/operators/nce_op.h b/paddle/fluid/operators/nce_op.h index f8983c0db9a1fa..a21c7c816e191d 100644 --- a/paddle/fluid/operators/nce_op.h +++ b/paddle/fluid/operators/nce_op.h @@ -165,10 +165,10 @@ class NCEKernel : public framework::OpKernel { (num_true_classes == -1) ? -1 : (num_neg_samples + num_true_classes)); sample_labels = &sample_labels_tmp; - sample_labels->Resize(phi::make_ddim(sample_out_dims)); + sample_labels->Resize(common::make_ddim(sample_out_dims)); sample_out = &sample_out_tmp; - sample_out->Resize(phi::make_ddim(sample_out_dims)); + sample_out->Resize(common::make_ddim(sample_out_dims)); } else { sample_labels = context.Output("SampleLabels"); sample_out = context.Output("SampleLogits"); diff --git a/paddle/fluid/operators/optimizers/decayed_adagrad_op.cc b/paddle/fluid/operators/optimizers/decayed_adagrad_op.cc index c1f9650c62376a..6c64c6a1f72ffb 100644 --- a/paddle/fluid/operators/optimizers/decayed_adagrad_op.cc +++ b/paddle/fluid/operators/optimizers/decayed_adagrad_op.cc @@ -55,14 +55,14 @@ class DecayedAdagradOp : public framework::OperatorWithKernel { ctx->HasOutput("MomentOut"), "Output", "MomentOut", "DecayedAdagradOp"); auto lr_dims = ctx->GetInputDim("LearningRate"); - PADDLE_ENFORCE_NE(phi::product(lr_dims), + PADDLE_ENFORCE_NE(common::product(lr_dims), 0, platform::errors::InvalidArgument( "Maybe the Input variable LearningRate has not " "been initialized. You may need to confirm " "if you put exe.run(startup_program) " "after optimizer.minimize function.")); - PADDLE_ENFORCE_EQ(phi::product(lr_dims), + PADDLE_ENFORCE_EQ(common::product(lr_dims), 1, platform::errors::InvalidArgument( "LearningRate should have one element")); diff --git a/paddle/fluid/operators/optimizers/dpsgd_op.cc b/paddle/fluid/operators/optimizers/dpsgd_op.cc index 0da5ae57b0932f..d8762b8bd719a7 100644 --- a/paddle/fluid/operators/optimizers/dpsgd_op.cc +++ b/paddle/fluid/operators/optimizers/dpsgd_op.cc @@ -54,12 +54,12 @@ class DpsgdOp : public framework::OperatorWithKernel { "Output(ParamOut) of DpsgdOp should not be null.")); auto lr_dims = ctx->GetInputDim("LearningRate"); - PADDLE_ENFORCE_EQ(phi::product(lr_dims), + PADDLE_ENFORCE_EQ(common::product(lr_dims), 1, platform::errors::InvalidArgument( "Learning rate should have 1 dimension. But Received " "LearningRate's dims [%s].", - phi::product(lr_dims))); + common::product(lr_dims))); auto param_dims = ctx->GetInputDim("Param"); PADDLE_ENFORCE_EQ( param_dims, diff --git a/paddle/fluid/operators/optimizers/ftrl_op.cc b/paddle/fluid/operators/optimizers/ftrl_op.cc index 7c757d3fddc25b..e6eadadc17b6cd 100644 --- a/paddle/fluid/operators/optimizers/ftrl_op.cc +++ b/paddle/fluid/operators/optimizers/ftrl_op.cc @@ -52,18 +52,18 @@ class FTRLOp : public framework::OperatorWithKernel { ctx->GetInputDim("Grad"))); auto lr_dim = ctx->GetInputDim("LearningRate"); - PADDLE_ENFORCE_NE(phi::product(lr_dim), + PADDLE_ENFORCE_NE(common::product(lr_dim), 0, platform::errors::InvalidArgument( "Maybe the Input variable LearningRate has not " "been initialized. You may need to confirm " "if you put exe.run(startup_program) " "after optimizer.minimize function.")); - PADDLE_ENFORCE_EQ(phi::product(lr_dim), + PADDLE_ENFORCE_EQ(common::product(lr_dim), 1, platform::errors::InvalidArgument( "Learning Rate should be a scalar, but got %d", - phi::product(lr_dim))); + common::product(lr_dim))); ctx->SetOutputDim("ParamOut", param_dim); ctx->SetOutputDim("SquaredAccumOut", param_dim); diff --git a/paddle/fluid/operators/optimizers/pow2_decay_with_linear_warmup_op.cc b/paddle/fluid/operators/optimizers/pow2_decay_with_linear_warmup_op.cc index 625db9f375ab02..0c5a9721e279ba 100644 --- a/paddle/fluid/operators/optimizers/pow2_decay_with_linear_warmup_op.cc +++ b/paddle/fluid/operators/optimizers/pow2_decay_with_linear_warmup_op.cc @@ -24,7 +24,7 @@ class Pow2DecayWithLinearWarmupOp : public framework::OperatorWithKernel { protected: void InferShape(framework::InferShapeContext *ctx) const override { - auto dim = phi::make_ddim({1}); + auto dim = common::make_ddim({1}); ctx->SetOutputDim("LearningRateOut", dim); ctx->SetOutputDim("StepOut", dim); } diff --git a/paddle/fluid/operators/optimizers/proximal_gd_op.cc b/paddle/fluid/operators/optimizers/proximal_gd_op.cc index 08cc29ce9eb8db..5190d65ee0af26 100644 --- a/paddle/fluid/operators/optimizers/proximal_gd_op.cc +++ b/paddle/fluid/operators/optimizers/proximal_gd_op.cc @@ -44,7 +44,7 @@ class ProximalGDOp : public framework::OperatorWithKernel { auto lr_dim = ctx->GetInputDim("LearningRate"); PADDLE_ENFORCE_EQ( - phi::product(lr_dim), + common::product(lr_dim), 1, platform::errors::InvalidArgument( "Learning Rate should be a scalar. But received dimmensions:[%s]", diff --git a/paddle/fluid/operators/optimizers/sparse_momentum_op.h b/paddle/fluid/operators/optimizers/sparse_momentum_op.h index d29b4b8fb2e5a4..4c47fd2b621784 100644 --- a/paddle/fluid/operators/optimizers/sparse_momentum_op.h +++ b/paddle/fluid/operators/optimizers/sparse_momentum_op.h @@ -151,7 +151,7 @@ class SparseMomentumOp : public framework::OperatorWithKernel { "VelocityOut", "SparseMomentum"); - auto lr_dims = phi::product(ctx->GetInputDim("LearningRate")); + auto lr_dims = common::product(ctx->GetInputDim("LearningRate")); PADDLE_ENFORCE_EQ(lr_dims != 0 && lr_dims == 1, true, platform::errors::InvalidArgument( diff --git a/paddle/fluid/operators/pad2d_op.cc b/paddle/fluid/operators/pad2d_op.cc index 29d2807b239709..6529bbc29fcfe1 100644 --- a/paddle/fluid/operators/pad2d_op.cc +++ b/paddle/fluid/operators/pad2d_op.cc @@ -467,7 +467,7 @@ class Pad2dOp : public framework::OperatorWithKernel { } } - ctx->SetOutputDim("Out", phi::make_ddim(out_dims)); + ctx->SetOutputDim("Out", common::make_ddim(out_dims)); ctx->ShareLoD("X", /*->*/ "Out"); } @@ -499,7 +499,7 @@ class Pad2dOp : public framework::OperatorWithKernel { auto ar = paddle::framework::AttrReader(attrs); const std::string data_format = ar.Get("data_format"); return phi::KernelKey(tensor.place(), - phi::StringToDataLayout(data_format), + common::StringToDataLayout(data_format), expected_kernel_type.dtype()); } #endif diff --git a/paddle/fluid/operators/partial_sum_op.cc b/paddle/fluid/operators/partial_sum_op.cc index 0bba0381d20933..3cbb0e1c6e2083 100644 --- a/paddle/fluid/operators/partial_sum_op.cc +++ b/paddle/fluid/operators/partial_sum_op.cc @@ -86,7 +86,7 @@ class PartialSumOp : public framework::OperatorWithKernel { std::vector out_dims(2); out_dims[0] = batch_size; out_dims[1] = (length == -1) ? input_len - start_index : length; - ctx->SetOutputDim("Out", phi::make_ddim(out_dims)); + ctx->SetOutputDim("Out", common::make_ddim(out_dims)); ctx->ShareLoD("X", /*->*/ "Out"); } diff --git a/paddle/fluid/operators/positive_negative_pair_op.cc b/paddle/fluid/operators/positive_negative_pair_op.cc index 72236c012c357c..96d8bbaa6f772f 100644 --- a/paddle/fluid/operators/positive_negative_pair_op.cc +++ b/paddle/fluid/operators/positive_negative_pair_op.cc @@ -40,7 +40,7 @@ class PositiveNegativePairOp : public framework::OperatorWithKernel { "NeutralPair", "positive_negative_pair"); - auto scalar_dim = phi::make_ddim({1}); + auto scalar_dim = common::make_ddim({1}); if (ctx->HasInput("AccumulatePositivePair") || ctx->HasInput("AccumulateNegativePair") || ctx->HasInput("AccumulateNeutralPair")) { diff --git a/paddle/fluid/operators/pscore/CMakeLists.txt b/paddle/fluid/operators/pscore/CMakeLists.txt index dea89806bc202c..25314c72b8033f 100755 --- a/paddle/fluid/operators/pscore/CMakeLists.txt +++ b/paddle/fluid/operators/pscore/CMakeLists.txt @@ -21,6 +21,7 @@ if(WITH_ARM_BRPC) sendrecv_rpc arm_brpc phi + common glog snappy device_context) @@ -39,6 +40,7 @@ else() sendrecv_rpc ${EXTERNAL_BRPC_DEPS} phi + common zlib device_context) endif() diff --git a/paddle/fluid/operators/pscore/distributed_lookup_table_op.cc b/paddle/fluid/operators/pscore/distributed_lookup_table_op.cc index 45aef43caeeb48..aafc8ab7faad1f 100644 --- a/paddle/fluid/operators/pscore/distributed_lookup_table_op.cc +++ b/paddle/fluid/operators/pscore/distributed_lookup_table_op.cc @@ -64,12 +64,12 @@ class DistributedLookupTableOp : public framework::OperatorWithKernel { for (auto &ids_dim : ids_dims) { if (lookup_table_version == "lookup_table") { - outputs_dims.push_back(phi::make_ddim({ids_dim[0], table_dims[1]})); + outputs_dims.push_back(common::make_ddim({ids_dim[0], table_dims[1]})); } else if (lookup_table_version == "lookup_table_v2") { outputs_dims.push_back( - phi::make_ddim({static_cast(ids_dim[0]), - static_cast(ids_dim[1]), - static_cast(table_dims[1])})); + common::make_ddim({static_cast(ids_dim[0]), + static_cast(ids_dim[1]), + static_cast(table_dims[1])})); } } diff --git a/paddle/fluid/operators/pscore/distributed_lookup_table_op.h b/paddle/fluid/operators/pscore/distributed_lookup_table_op.h index 9b99089c141192..414500c2faac3a 100644 --- a/paddle/fluid/operators/pscore/distributed_lookup_table_op.h +++ b/paddle/fluid/operators/pscore/distributed_lookup_table_op.h @@ -119,10 +119,10 @@ class DistributedLookupTableKernel : public framework::OpKernel { auto *id_tensor = id_vars[i]->GetMutable(); auto *out_tensor = out_vars[i]->GetMutable(); - auto id_dims = phi::vectorize(id_tensor->dims()); - out_tensor->Resize(phi::make_ddim({static_cast(id_dims[0]), - static_cast(id_dims[1]), - static_cast(emb_dim)})); + auto id_dims = common::vectorize(id_tensor->dims()); + out_tensor->Resize(common::make_ddim({static_cast(id_dims[0]), + static_cast(id_dims[1]), + static_cast(emb_dim)})); } } } diff --git a/paddle/fluid/operators/pscore/fake_init_op.cc b/paddle/fluid/operators/pscore/fake_init_op.cc index cefd0ee5855f2b..cd919cb7ca0bf0 100644 --- a/paddle/fluid/operators/pscore/fake_init_op.cc +++ b/paddle/fluid/operators/pscore/fake_init_op.cc @@ -21,7 +21,7 @@ class FakeInitInferShape : public framework::InferShapeBase { void operator()(framework::InferShapeContext *ctx) const override { OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "FakeInit"); auto &shape = ctx->Attrs().Get>("shape"); - ctx->SetOutputDim("Out", phi::make_ddim(shape)); + ctx->SetOutputDim("Out", common::make_ddim(shape)); } }; @@ -38,10 +38,10 @@ class FakeInitOp : public framework::OperatorBase { if (out_var.IsType()) { tensor = out_var.GetMutable(); - tensor->Resize(phi::make_ddim(Attr>("shape"))); + tensor->Resize(common::make_ddim(Attr>("shape"))); } else if (out_var.IsType()) { tensor = out_var.GetMutable()->mutable_value(); - tensor->Resize(phi::make_ddim(Attr>("shape"))); + tensor->Resize(common::make_ddim(Attr>("shape"))); } else { PADDLE_THROW(platform::errors::InvalidArgument( "fake init op's output only" diff --git a/paddle/fluid/operators/pull_box_extended_sparse_op.cc b/paddle/fluid/operators/pull_box_extended_sparse_op.cc index f0799f75862bc4..75918b9ad62a48 100644 --- a/paddle/fluid/operators/pull_box_extended_sparse_op.cc +++ b/paddle/fluid/operators/pull_box_extended_sparse_op.cc @@ -54,14 +54,15 @@ class PullBoxExtendedSparseOp : public framework::OperatorWithKernel { "Shape error in %lu id, the last dimension of the " "'Ids' tensor must be 1.", i)); - auto out_dim = phi::vectorize(phi::slice_ddim(ids_dims, 0, ids_rank - 1)); + auto out_dim = + common::vectorize(common::slice_ddim(ids_dims, 0, ids_rank - 1)); out_dim.push_back(emb_size); - outs_dims[i] = phi::make_ddim(out_dim); + outs_dims[i] = common::make_ddim(out_dim); auto out_extended_dim = - phi::vectorize(phi::slice_ddim(ids_dims, 0, ids_rank - 1)); + common::vectorize(common::slice_ddim(ids_dims, 0, ids_rank - 1)); out_extended_dim.push_back(emb_extended_size); - outs_extended_dims[i] = phi::make_ddim(out_extended_dim); + outs_extended_dims[i] = common::make_ddim(out_extended_dim); } ctx->SetOutputsDim("Out", outs_dims); ctx->SetOutputsDim("OutExtend", outs_extended_dims); diff --git a/paddle/fluid/operators/pull_box_sparse_op.cc b/paddle/fluid/operators/pull_box_sparse_op.cc index a8f91c85485c7c..d37cc35a599450 100644 --- a/paddle/fluid/operators/pull_box_sparse_op.cc +++ b/paddle/fluid/operators/pull_box_sparse_op.cc @@ -45,9 +45,10 @@ class PullBoxSparseOp : public framework::OperatorWithKernel { "Shape error in %lu id, the last dimension of the " "'Ids' tensor must be 1.", i)); - auto out_dim = phi::vectorize(phi::slice_ddim(ids_dims, 0, ids_rank - 1)); + auto out_dim = + common::vectorize(common::slice_ddim(ids_dims, 0, ids_rank - 1)); out_dim.push_back(hidden_size); - outs_dims[i] = phi::make_ddim(out_dim); + outs_dims[i] = common::make_ddim(out_dim); } ctx->SetOutputsDim("Out", outs_dims); for (size_t i = 0; i < n_ids; ++i) { diff --git a/paddle/fluid/operators/pull_gpups_sparse_op.cc b/paddle/fluid/operators/pull_gpups_sparse_op.cc index afaa9af3fda20a..6055632f5681a1 100644 --- a/paddle/fluid/operators/pull_gpups_sparse_op.cc +++ b/paddle/fluid/operators/pull_gpups_sparse_op.cc @@ -53,9 +53,10 @@ class PullGpuPSSparseOp : public framework::OperatorWithKernel { "Shape error in %lu id, the last dimension of the " "'Ids' tensor must be 1.", i)); - auto out_dim = phi::vectorize(phi::slice_ddim(ids_dims, 0, ids_rank - 1)); + auto out_dim = + common::vectorize(common::slice_ddim(ids_dims, 0, ids_rank - 1)); out_dim.push_back(embedding_size); - outs_dims[i] = phi::make_ddim(out_dim); + outs_dims[i] = common::make_ddim(out_dim); } ctx->SetOutputsDim("Out", outs_dims); for (size_t i = 0; i < n_ids; ++i) { diff --git a/paddle/fluid/operators/pull_sparse_op.cc b/paddle/fluid/operators/pull_sparse_op.cc index 4850bf33ae89cd..55a6af8466b863 100644 --- a/paddle/fluid/operators/pull_sparse_op.cc +++ b/paddle/fluid/operators/pull_sparse_op.cc @@ -47,9 +47,10 @@ class PullSparseOp : public framework::OperatorWithKernel { "Shape error in %lu id, the last dimension of " " the 'Ids' tensor must be 1.", i)); - auto out_dim = phi::vectorize(phi::slice_ddim(ids_dims, 0, ids_rank - 1)); + auto out_dim = + common::vectorize(common::slice_ddim(ids_dims, 0, ids_rank - 1)); out_dim.push_back(hidden_size); - outs_dims[i] = phi::make_ddim(out_dim); + outs_dims[i] = common::make_ddim(out_dim); } ctx->SetOutputsDim("Out", outs_dims); for (size_t i = 0; i < n_ids; ++i) { diff --git a/paddle/fluid/operators/pull_sparse_v2_op.cc b/paddle/fluid/operators/pull_sparse_v2_op.cc index 993950c360c12c..d134607d3c4bb2 100644 --- a/paddle/fluid/operators/pull_sparse_v2_op.cc +++ b/paddle/fluid/operators/pull_sparse_v2_op.cc @@ -40,9 +40,9 @@ class PullSparseV2Op : public framework::OperatorWithKernel { outs_dims.resize(n_ids); for (size_t i = 0; i < n_ids; ++i) { const auto ids_dims = all_ids_dim[i]; - auto out_dim = phi::vectorize(ids_dims); + auto out_dim = common::vectorize(ids_dims); out_dim.push_back(hidden_size); - outs_dims[i] = phi::make_ddim(out_dim); + outs_dims[i] = common::make_ddim(out_dim); } ctx->SetOutputsDim("Out", outs_dims); for (size_t i = 0; i < n_ids; ++i) { diff --git a/paddle/fluid/operators/pyramid_hash_op.cc b/paddle/fluid/operators/pyramid_hash_op.cc index 787797544ef849..45373070d95f96 100644 --- a/paddle/fluid/operators/pyramid_hash_op.cc +++ b/paddle/fluid/operators/pyramid_hash_op.cc @@ -218,7 +218,7 @@ class PyramidHashOP : public framework::OperatorWithKernel { // something to do in runtime. } else { // compile time - ctx->SetOutputDim("Out", phi::make_ddim({-1, num_emb})); + ctx->SetOutputDim("Out", common::make_ddim({-1, num_emb})); ctx->SetOutputDim("X_Temp_Out", x_dims); ctx->ShareLoD("X", /*->*/ "Out"); } @@ -295,7 +295,7 @@ class CPUPyramidHashOPKernel : public framework::OpKernel { const auto& offset = bottom->lod()[0]; const auto* bottom_data_ori = bottom->data(); auto* buff = ctx.Output("X_Temp_Out"); - buff->Resize(phi::make_ddim({bottom->dims()[0], bottom->dims()[1]})); + buff->Resize(common::make_ddim({bottom->dims()[0], bottom->dims()[1]})); float* bottom_data = buff->mutable_data(ctx.GetPlace()); for (int i = 0; i < bottom->dims()[0]; i++) { bottom_data[i] = bottom_data_ori[i]; // NOLINT @@ -332,7 +332,7 @@ class CPUPyramidHashOPKernel : public framework::OpKernel { } } - drop_pos->Resize(phi::make_ddim( + drop_pos->Resize(common::make_ddim( {bottom->dims()[0] * bottom->dims()[1] * _pyramid_layer, 1})); std::vector drop_pos_offset; drop_pos_offset.resize(offset.size()); @@ -380,7 +380,7 @@ class CPUPyramidHashOPKernel : public framework::OpKernel { framework::LoD top_lod; top_lod.push_back(top_offset); top->set_lod(top_lod); - top->Resize(phi::make_ddim({top_l, _num_emb})); + top->Resize(common::make_ddim({top_l, _num_emb})); auto* top_data = top->mutable_data(ctx.GetPlace()); framework::LoD drop_pos_lod; diff --git a/paddle/fluid/operators/quantize_linear_op.h b/paddle/fluid/operators/quantize_linear_op.h index d6c3b3d2e50ae8..1f1bfc3dea73bd 100644 --- a/paddle/fluid/operators/quantize_linear_op.h +++ b/paddle/fluid/operators/quantize_linear_op.h @@ -14,13 +14,13 @@ limitations under the License. */ #include #include +#include "paddle/common/ddim.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/operators/fake_quantize_op.h" #include "paddle/phi/common/data_type.h" #include "paddle/phi/common/transform.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/hostdevice.h" #include "paddle/phi/kernels/cast_kernel.h" @@ -70,7 +70,7 @@ class QuantizeLinearKernel : public framework::OpKernel { auto* in_accum = context.Input("InAccum"); auto* in_state = context.Input("InState"); phi::DenseTensor tmp_scale; - tmp_scale.Resize(phi::make_dim(1)); + tmp_scale.Resize(common::make_dim(1)); T* cur_scale_data = dev_ctx.template Alloc(&tmp_scale); FindAbsMaxFunctor()( diff --git a/paddle/fluid/operators/randperm_op.h b/paddle/fluid/operators/randperm_op.h index 988b5d475ee31e..96981a4728402a 100644 --- a/paddle/fluid/operators/randperm_op.h +++ b/paddle/fluid/operators/randperm_op.h @@ -54,7 +54,7 @@ class RandpermKernel : public framework::OpKernel { } else { phi::DenseTensor tmp_tensor; - tmp_tensor.Resize(phi::make_ddim({n})); + tmp_tensor.Resize(common::make_ddim({n})); T* tmp_data = tmp_tensor.mutable_data(platform::CPUPlace()); random_permate(tmp_data, n, seed); framework::TensorCopy(tmp_tensor, ctx.GetPlace(), out_tensor); diff --git a/paddle/fluid/operators/range_op.h b/paddle/fluid/operators/range_op.h index e59d4f3cfcaddd..195ef276b957e7 100644 --- a/paddle/fluid/operators/range_op.h +++ b/paddle/fluid/operators/range_op.h @@ -58,7 +58,7 @@ class CPURangeKernel : public framework::OpKernel { auto* out = context.Output("Out"); int64_t size = 0; GetSize(start, end, step, &size); - out->Resize(phi::make_ddim({size})); + out->Resize(common::make_ddim({size})); T* out_data = out->mutable_data(context.GetPlace()); T value = start; for (int64_t i = 0; i < size; ++i) { diff --git a/paddle/fluid/operators/rank_attention.cu.h b/paddle/fluid/operators/rank_attention.cu.h index 6726a2defc4b83..7077bd7a7aa4cd 100644 --- a/paddle/fluid/operators/rank_attention.cu.h +++ b/paddle/fluid/operators/rank_attention.cu.h @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#include "paddle/phi/core/utils/dim.h" +#include "paddle/common/dim.h" #include "paddle/phi/kernels/funcs/math_function.h" namespace paddle { diff --git a/paddle/fluid/operators/read_file_op.cc b/paddle/fluid/operators/read_file_op.cc index 0330cac93396c6..239589847673b0 100644 --- a/paddle/fluid/operators/read_file_op.cc +++ b/paddle/fluid/operators/read_file_op.cc @@ -38,7 +38,7 @@ class CPUReadFileKernel : public framework::OpKernel { auto* out = ctx.Output("Out"); std::vector out_shape = {file_size}; - out->Resize(phi::make_ddim(out_shape)); + out->Resize(common::make_ddim(out_shape)); uint8_t* data = out->mutable_data(ctx.GetPlace()); @@ -57,7 +57,7 @@ class ReadFileOp : public framework::OperatorWithKernel { "Output(Out) of ReadFileOp is null.")); auto out_dims = std::vector(1, -1); - ctx->SetOutputDim("Out", phi::make_ddim(out_dims)); + ctx->SetOutputDim("Out", common::make_ddim(out_dims)); } protected: diff --git a/paddle/fluid/operators/reader/create_py_reader_op.cc b/paddle/fluid/operators/reader/create_py_reader_op.cc index 6b935adf9a2f38..57cf3370563936 100644 --- a/paddle/fluid/operators/reader/create_py_reader_op.cc +++ b/paddle/fluid/operators/reader/create_py_reader_op.cc @@ -12,9 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/common/ddim.h" #include "paddle/fluid/operators/reader/py_reader.h" #include "paddle/fluid/operators/reader/reader_op_registry.h" -#include "paddle/phi/core/ddim.h" namespace paddle { namespace operators { @@ -68,7 +68,7 @@ class CreatePyReaderOp : public framework::OperatorBase { int shape_end_index = shape_start_index + rank; auto shape = std::vector(shape_concat.begin() + shape_start_index, shape_concat.begin() + shape_end_index); - dims.push_back(phi::make_ddim(shape)); + dims.push_back(common::make_ddim(shape)); shape_start_index = shape_end_index; } diff --git a/paddle/fluid/operators/reader/lod_tensor_blocking_queue.h b/paddle/fluid/operators/reader/lod_tensor_blocking_queue.h index f3dbc5c49c2ddc..da265a6fce76da 100644 --- a/paddle/fluid/operators/reader/lod_tensor_blocking_queue.h +++ b/paddle/fluid/operators/reader/lod_tensor_blocking_queue.h @@ -18,10 +18,10 @@ #include #include +#include "paddle/common/ddim.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/operators/reader/blocking_queue.h" #include "paddle/fluid/platform/place.h" -#include "paddle/phi/core/ddim.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/reader/reader_op_registry.cc b/paddle/fluid/operators/reader/reader_op_registry.cc index ff9d4260230dd6..e62d728b6f0171 100644 --- a/paddle/fluid/operators/reader/reader_op_registry.cc +++ b/paddle/fluid/operators/reader/reader_op_registry.cc @@ -31,7 +31,7 @@ std::vector RestoreShapes(const std::vector& shape_concat, for (int len : ranks) { auto start_it = shape_concat.begin() + offset; auto end_it = start_it + len; - res.push_back(phi::make_ddim(std::vector(start_it, end_it))); + res.push_back(common::make_ddim(std::vector(start_it, end_it))); offset += len; } return res; diff --git a/paddle/fluid/operators/recurrent_op.cc b/paddle/fluid/operators/recurrent_op.cc index 0362e618c2bc57..a5d4ce5e298289 100644 --- a/paddle/fluid/operators/recurrent_op.cc +++ b/paddle/fluid/operators/recurrent_op.cc @@ -203,9 +203,9 @@ void RecurrentBase::LinkTensor(const framework::Scope &src_scope, // (seq_len, shape) -> return [seq_len] + list(shape) framework::DDim RecurrentBase::PrependDims(size_t seq_len, const framework::DDim &src) { - auto dims = phi::vectorize(src); + auto dims = common::vectorize(src); dims.insert(dims.begin(), static_cast(seq_len)); - return phi::make_ddim(dims); + return common::make_ddim(dims); } RecurrentOp::RecurrentOp(const std::string &type, @@ -253,9 +253,9 @@ void RecurrentOp::RunImpl(const framework::Scope &scope, phi::DenseTensor *inside) { inside->ShareDataWith(outside.Slice( seq_offset, seq_offset + 1)); // NOLINT - auto dims = phi::vectorize(inside->dims()); + auto dims = common::vectorize(inside->dims()); dims.erase(dims.begin()); - inside->Resize(phi::make_ddim(dims)); + inside->Resize(common::make_ddim(dims)); }); if (has_state) { @@ -383,9 +383,9 @@ void RecurrentGradOp::RunImpl(const framework::Scope &scope, [&](const phi::DenseTensor &outside, phi::DenseTensor *inside) { inside->ShareDataWith( outside.Slice(seq_offset, seq_offset + 1)); // NOLINT - auto dims = phi::vectorize(inside->dims()); + auto dims = common::vectorize(inside->dims()); dims.erase(dims.begin()); - inside->Resize(phi::make_ddim(dims)); + inside->Resize(common::make_ddim(dims)); }, true /*is_backward*/); auto og_set = List2Set(Inputs(kOutputGrads)); @@ -495,7 +495,7 @@ void RecurrentGradOp::RunImpl(const framework::Scope &scope, framework::AttributeMap attrs; attrs["dtype"] = framework::TransToProtoVarType(inside_tensor.dtype()); - attrs["shape"] = phi::vectorize(inside_tensor.dims()); + attrs["shape"] = common::vectorize(inside_tensor.dims()); attrs["value"] = 0.0f; auto zero_op = diff --git a/paddle/fluid/operators/reduce_ops/reduce_op.h b/paddle/fluid/operators/reduce_ops/reduce_op.h index 3d262627970515..e69492501c1173 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_op.h +++ b/paddle/fluid/operators/reduce_ops/reduce_op.h @@ -544,11 +544,11 @@ class ReduceBaseOp : public framework::OperatorWithKernel { if (reduce_all) { if (keep_dim) ctx->SetOutputDim("Out", - phi::make_ddim(std::vector(x_rank, 1))); + common::make_ddim(std::vector(x_rank, 1))); else ctx->SetOutputDim("Out", {1}); } else { - auto dims_vector = vectorize(x_dims); + auto dims_vector = common::vectorize(x_dims); if (keep_dim) { for (size_t i = 0; i < dims.size(); ++i) { dims_vector[dims[i]] = 1; @@ -565,7 +565,7 @@ class ReduceBaseOp : public framework::OperatorWithKernel { if (!keep_dim && dims_vector.size() == 0) { dims_vector.push_back(1); } - auto out_dims = phi::make_ddim(dims_vector); + auto out_dims = common::make_ddim(dims_vector); ctx->SetOutputDim("Out", out_dims); if (dims.size() > 0 && dims[0] != 0) { // Only pass LoD when not reducing on the first dim. @@ -810,7 +810,7 @@ class ReduceCudaGradKernel : public framework::OpKernel { // get reduce_dim and reduce_num for reduce_mean_grad int dim_size = in_x->dims().size(); std::vector reduce_dims = GetReduceDim(dims, dim_size, reduce_all); - auto update_dims = vectorize(d_x->dims()); + auto update_dims = common::vectorize(d_x->dims()); int reduce_num = 1; for (auto i : reduce_dims) { reduce_num *= (in_x->dims())[i]; @@ -819,7 +819,7 @@ class ReduceCudaGradKernel : public framework::OpKernel { // make new tensor phi::DenseTensor new_d_out(d_out->type()); new_d_out.ShareDataWith(*d_out); - new_d_out.Resize(phi::make_ddim(update_dims)); + new_d_out.Resize(common::make_ddim(update_dims)); auto& dev_ctx = context.cuda_device_context(); if (out_dtype > 0) { d_x->mutable_data(dev_ctx.GetPlace(), pt_out_dtype); diff --git a/paddle/fluid/operators/reduce_ops/reduce_op_function.h b/paddle/fluid/operators/reduce_ops/reduce_op_function.h index 3176e489f89b30..dd9f22d25c86c1 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_op_function.h +++ b/paddle/fluid/operators/reduce_ops/reduce_op_function.h @@ -58,13 +58,13 @@ void ReduceFunctor(const DeviceContext& context, DDim out_dims = output->dims(); if (keep_dim && x_rank > 1) { const int kDelFlag = -2; - auto dims_vector = phi::vectorize(out_dims); + auto dims_vector = common::vectorize(out_dims); for (size_t i = 0; i < dims_ref.size(); ++i) { dims_vector[dims_ref[i]] = kDelFlag; } dims_vector.erase(remove(dims_vector.begin(), dims_vector.end(), kDelFlag), dims_vector.end()); - out_dims = phi::make_ddim(dims_vector); + out_dims = common::make_ddim(dims_vector); } auto& place = *context.eigen_device(); Functor functor; @@ -90,7 +90,7 @@ void ReduceGradFunctor(const DeviceContext& context, auto x_grad = EigenTensor::From(*output); auto x_rank = static_cast(x.dimensions().size()); auto x_dims = input0.dims(); - auto reduced_dims_v = phi::vectorize(x_dims); + auto reduced_dims_v = common::vectorize(x_dims); std::vector dims_ref = dims; Eigen::array broadcast_dim; for (size_t i = 0; i < D; ++i) broadcast_dim[i] = 1; @@ -104,7 +104,7 @@ void ReduceGradFunctor(const DeviceContext& context, broadcast_dim[dims_ref[i]] = x_dims[dims_ref[i]]; broad_cats_times *= x_dims[dims_ref[i]]; } - auto reduced_dims = phi::make_ddim(reduced_dims_v); + auto reduced_dims = common::make_ddim(reduced_dims_v); auto x_reduce = EigenTensor::From(input1, reduced_dims); auto x_reduce_grad = EigenTensor::From(input2, reduced_dims); diff --git a/paddle/fluid/operators/repeat_interleave_op.cc b/paddle/fluid/operators/repeat_interleave_op.cc index 44d022f4d5fbce..15b4b80cb739b8 100644 --- a/paddle/fluid/operators/repeat_interleave_op.cc +++ b/paddle/fluid/operators/repeat_interleave_op.cc @@ -39,7 +39,7 @@ class RepeatInterleaveOp : public framework::OperatorWithKernel { auto input_dim = ctx->GetInputDim("X"); auto dim = ctx->Attrs().Get("dim"); - auto output_dim = phi::vectorize(input_dim); + auto output_dim = common::vectorize(input_dim); PADDLE_ENFORCE_EQ( dim < input_dim.size() && dim >= (0 - input_dim.size()), true, @@ -78,7 +78,7 @@ class RepeatInterleaveOp : public framework::OperatorWithKernel { output_dim[dim] = input_dim[dim] * repeats; } VLOG(3) << "infershap out " << output_dim[dim]; - ctx->SetOutputDim("Out", phi::make_ddim(output_dim)); + ctx->SetOutputDim("Out", common::make_ddim(output_dim)); auto type = ctx->GetInputsVarType("X")[0]; if (type == framework::proto::VarType::LOD_TENSOR) { ctx->ShareLoD("X", /*->*/ "Out"); diff --git a/paddle/fluid/operators/reshape_op.cc b/paddle/fluid/operators/reshape_op.cc index 3a57b6da5642ab..30d4fb0cf9ad4c 100644 --- a/paddle/fluid/operators/reshape_op.cc +++ b/paddle/fluid/operators/reshape_op.cc @@ -99,7 +99,7 @@ class ReshapeOp : public framework::OperatorWithKernel { infer_shape[i] = static_cast(in_dims[static_cast(i)]); } } - auto infer_out_dims = phi::make_ddim(infer_shape); + auto infer_out_dims = common::make_ddim(infer_shape); ctx->SetOutputDim("Out", infer_out_dims); return; } @@ -112,7 +112,7 @@ class ReshapeOp : public framework::OperatorWithKernel { num_ele *= static_cast(shape_dims[i]); } auto vec_dims = std::vector(num_ele, -1); - auto out_dims = phi::make_ddim(vec_dims); + auto out_dims = common::make_ddim(vec_dims); ctx->SetOutputDim("Out", out_dims); ctx->ShareLoD("X", /*->*/ "Out"); return; @@ -137,8 +137,8 @@ class ReshapeOp : public framework::OperatorWithKernel { static framework::DDim ValidateShape(const std::vector shape, const framework::DDim &in_dims) { - const int64_t in_size = phi::product(in_dims); - auto in_dims_vec = phi::vectorize(in_dims); + const int64_t in_size = common::product(in_dims); + auto in_dims_vec = common::vectorize(in_dims); bool all_positive = std::all_of(in_dims_vec.cbegin(), in_dims_vec.cend(), [](int64_t i) { return i > 0; }); @@ -158,7 +158,7 @@ class ReshapeOp : public framework::OperatorWithKernel { platform::errors::InvalidArgument( "Only one dimension value of 'shape' in ReshapeOp can " "be -1. But received shape = [%s], shape[%d] is also -1.", - phi::make_ddim(shape), + common::make_ddim(shape), i)); unk_dim_idx = static_cast(i); } else if (shape[i] == copy_dim_val) { @@ -170,7 +170,7 @@ class ReshapeOp : public framework::OperatorWithKernel { "the input tensor X's dimensions. " "But received shape = [%s], shape[%d] = 0, X's shape = [%s], " "X's dimensions = %d.", - phi::make_ddim(shape), + common::make_ddim(shape), i, in_dims, in_dims.size())); @@ -182,7 +182,7 @@ class ReshapeOp : public framework::OperatorWithKernel { "Each dimension value of 'shape' in ReshapeOp must not " "be negative except one unknown dimension. " "But received shape = [%s], shape[%d] = %d.", - phi::make_ddim(shape), + common::make_ddim(shape), i, shape[i])); } @@ -212,7 +212,7 @@ class ReshapeOp : public framework::OperatorWithKernel { "'shape' is [%s], known capacity of 'shape' is %d.", in_dims, in_size, - phi::make_ddim(shape), + common::make_ddim(shape), capacity)); } else { output_shape[unk_dim_idx] = -1; @@ -230,7 +230,7 @@ class ReshapeOp : public framework::OperatorWithKernel { "[%s], the capacity of 'shape' is %d.", in_dims, in_size, - phi::make_ddim(shape), + common::make_ddim(shape), capacity)); } } @@ -249,11 +249,11 @@ class ReshapeOp : public framework::OperatorWithKernel { "capacity of 'Out' is %d.", in_dims, in_size, - phi::make_ddim(shape), + common::make_ddim(shape), capacity)); } - return phi::make_ddim(output_shape); + return common::make_ddim(output_shape); } protected: @@ -529,7 +529,7 @@ class Reshape2Op : public ReshapeOp { for (int i = 0; i < x_dims.size(); ++i) { xshape_dims[i + 1] = x_dims[i]; } - ctx->SetOutputDim("XShape", phi::make_ddim(xshape_dims)); + ctx->SetOutputDim("XShape", common::make_ddim(xshape_dims)); ctx->ShareLoD("X", /*->*/ "XShape"); } ReshapeOp::InferShape(ctx); diff --git a/paddle/fluid/operators/sampling_id_op.cc b/paddle/fluid/operators/sampling_id_op.cc index 785d148f79df0b..5df5270976ca42 100644 --- a/paddle/fluid/operators/sampling_id_op.cc +++ b/paddle/fluid/operators/sampling_id_op.cc @@ -41,7 +41,7 @@ class SamplingIdOp : public framework::OperatorWithKernel { input_dims.size())); auto dim0 = input_dims[0]; - framework::DDim dims = phi::make_ddim({dim0}); + framework::DDim dims = common::make_ddim({dim0}); ctx->SetOutputDim("Out", dims); ctx->ShareLoD("X", "Out"); } diff --git a/paddle/fluid/operators/sampling_id_op.h b/paddle/fluid/operators/sampling_id_op.h index 38c0ea3834af7b..730d84c2a651e1 100644 --- a/paddle/fluid/operators/sampling_id_op.h +++ b/paddle/fluid/operators/sampling_id_op.h @@ -74,7 +74,7 @@ class SamplingIdKernel : public framework::OpKernel { out_dim.push_back(static_cast(batch_size)); phi::DenseTensor* output = context.Output("Out"); - output->Resize(phi::make_ddim(out_dim)); + output->Resize(common::make_ddim(out_dim)); output->mutable_data(context.GetPlace()); framework::TensorFromVector(ids, context.device_context(), output); } diff --git a/paddle/fluid/operators/sequence_ops/CMakeLists.txt b/paddle/fluid/operators/sequence_ops/CMakeLists.txt index 1bd10f19e03594..38ac50b0d6434a 100644 --- a/paddle/fluid/operators/sequence_ops/CMakeLists.txt +++ b/paddle/fluid/operators/sequence_ops/CMakeLists.txt @@ -6,5 +6,5 @@ endif() register_operators() if(WITH_UNITY_BUILD) - target_link_libraries(paddle_operators_sequence_ops_unity phi) + target_link_libraries(paddle_operators_sequence_ops_unity phi common) endif() diff --git a/paddle/fluid/operators/sequence_ops/sequence_concat_op.cc b/paddle/fluid/operators/sequence_ops/sequence_concat_op.cc index 3ef695b111993c..dd65162b3aad46 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_concat_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_concat_op.cc @@ -66,7 +66,7 @@ class SequenceConcatOp : public framework::OperatorWithKernel { std::vector out_dims; for (auto &x_dim : x_dims) { if (out_dims.empty()) { - out_dims = phi::vectorize(x_dim); + out_dims = common::vectorize(x_dim); } batch_size += x_dim[0]; PADDLE_ENFORCE_NE( @@ -75,25 +75,25 @@ class SequenceConcatOp : public framework::OperatorWithKernel { platform::errors::InvalidArgument( "The first dim of SequenceConcatOp inputs must not be 0.")); if (feature_size == 0) { - feature_size = phi::product(x_dim) / x_dim[0]; + feature_size = common::product(x_dim) / x_dim[0]; } else { PADDLE_ENFORCE_EQ( feature_size, - phi::product(x_dim) / x_dim[0], + common::product(x_dim) / x_dim[0], platform::errors::InvalidArgument( "Each input of SequenceConcatOp inputs must have same feature " "size, But " "the feature size we received is %d, the feature size of 1st " "input is %d", feature_size, - phi::product(x_dim) / x_dim[0])); + common::product(x_dim) / x_dim[0])); } } if (batch_size < 0) { batch_size = -1; // Normalize batch size for compile time. } out_dims[0] = batch_size; - context->SetOutputDim("Out", phi::make_ddim(out_dims)); + context->SetOutputDim("Out", common::make_ddim(out_dims)); if (!context->IsRuntime()) { // Runtime LoD infershape will be computed // in Kernel. context->ShareLoD("X", "Out"); diff --git a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.h b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.h index 3eb7e51cfe0c6e..c66f4065a58f15 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.h @@ -35,7 +35,7 @@ class SequenceEnumerateKernel : public framework::OpKernel { "Input(X) phi::DenseTensor of SequenceEnumerateOp does not contain " "LoD information.")); - auto in_dims = phi::vectorize(in->dims()); + auto in_dims = common::vectorize(in->dims()); auto lod0 = in->lod()[0]; PADDLE_ENFORCE_EQ( static_cast(in_dims[0]), diff --git a/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cu b/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cu index 5c1058ebf16062..053c439814e957 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cu +++ b/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cu @@ -70,7 +70,7 @@ struct SequenceExpandAsFunctor { const phi::Vector &ref_lod, /*expand referenced lod*/ phi::DenseTensor *out) { int height = x.dims()[0]; - int width = phi::product(x.dims()) / height; + int width = common::product(x.dims()) / height; const int kThreadsPerBlock = 1024; int thread_x = kThreadsPerBlock; @@ -100,7 +100,7 @@ struct SequenceExpandAsGradFunctor { const phi::Vector &ref_lod, /*expand based lod*/ phi::DenseTensor *dx) { int height = dx->dims()[0]; - int width = phi::product(dx->dims()) / height; + int width = common::product(dx->dims()) / height; const int kThreadsPerBlock = 1024; int thread_x = kThreadsPerBlock; diff --git a/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.h b/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.h index be195d72b5665c..26f428b165256b 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.h @@ -47,7 +47,7 @@ struct SequenceExpandAsFunctor { const phi::Vector &ref_lod, /*expand referenced lod*/ phi::DenseTensor *out) { int64_t height = x.dims()[0]; - int64_t width = phi::product(x.dims()) / height; + int64_t width = common::product(x.dims()) / height; const T *in_data = x.data(); T *out_data = out->mutable_data(context.GetPlace()); @@ -124,7 +124,7 @@ struct SequenceExpandAsGradFunctor { const phi::Vector &ref_lod, /*expand referenced lod*/ phi::DenseTensor *dx) { int64_t height = dx->dims()[0]; - int64_t width = phi::product(dx->dims()) / height; + int64_t width = common::product(dx->dims()) / height; const T *dout_data = dout.data(); T *dx_data = dx->mutable_data(context.GetPlace()); diff --git a/paddle/fluid/operators/sequence_ops/sequence_expand_op.cu b/paddle/fluid/operators/sequence_ops/sequence_expand_op.cu index b70b750daba915..e46e17418a490b 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_expand_op.cu +++ b/paddle/fluid/operators/sequence_ops/sequence_expand_op.cu @@ -199,7 +199,7 @@ struct SequenceExpandGradFunctor { const phi::Vector& x_lod, /*expand source lod*/ const phi::Vector& ref_lod, /*expand based lod*/ LoDTensor* dx) { - int x_item_length = phi::product(dx->dims()) / dx->dims()[0]; + int x_item_length = common::product(dx->dims()) / dx->dims()[0]; phi::Vector out_offset(x_lod.size()); GetOutputOffset(x_lod, ref_lod, &out_offset); diff --git a/paddle/fluid/operators/sequence_ops/sequence_expand_op.h b/paddle/fluid/operators/sequence_ops/sequence_expand_op.h index 767942b4db8746..0f53249cfbc240 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_expand_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_expand_op.h @@ -173,7 +173,7 @@ struct SequenceExpandGradFunctor { int x_seq_len = x_end - x_start; if (x_seq_len == 0) continue; auto dx_sub = dx->Slice(x_start, x_end); - dx_sub.Resize(phi::flatten_to_1d(dx_sub.dims())); + dx_sub.Resize(common::flatten_to_1d(dx_sub.dims())); int dout_end = dout_offset + repeat_num * x_seq_len; auto dout_sub = dout.Slice(dout_offset, dout_end); dout_sub.Resize({repeat_num, dx_sub.dims()[0]}); diff --git a/paddle/fluid/operators/sequence_ops/sequence_pad_op.cc b/paddle/fluid/operators/sequence_ops/sequence_pad_op.cc index ac78b18602360d..a7f3869e307537 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_pad_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_pad_op.cc @@ -52,11 +52,11 @@ class SequencePadOp : public framework::OperatorWithKernel { "The rank of SequencePadOp Input(X) can't be less " "than 2. But the rank we received is %d", x_dims.size())); - auto time_step_dims = phi::slice_ddim(x_dims, 1, x_dims.size()); + auto time_step_dims = common::slice_ddim(x_dims, 1, x_dims.size()); auto pad_value_dims = ctx->GetInputDim("PadValue"); PADDLE_ENFORCE_EQ( - pad_value_dims == phi::make_ddim({1}) || - pad_value_dims == phi::make_ddim({}) || + pad_value_dims == common::make_ddim({1}) || + pad_value_dims == common::make_ddim({}) || pad_value_dims == time_step_dims, true, platform::errors::InvalidArgument( @@ -127,12 +127,12 @@ class SequencePadOp : public framework::OperatorWithKernel { std::vector out_dims_vec{out_dim_0, padded_length}; std::vector len_dims_vec{out_dim_0}; - auto time_step_dims_vec = phi::vectorize(time_step_dims); + auto time_step_dims_vec = common::vectorize(time_step_dims); out_dims_vec.insert(out_dims_vec.end(), time_step_dims_vec.begin(), time_step_dims_vec.end()); - ctx->SetOutputDim("Out", phi::make_ddim(out_dims_vec)); - ctx->SetOutputDim("Length", phi::make_ddim(len_dims_vec)); + ctx->SetOutputDim("Out", common::make_ddim(out_dims_vec)); + ctx->SetOutputDim("Length", common::make_ddim(len_dims_vec)); } protected: diff --git a/paddle/fluid/operators/sequence_ops/sequence_reshape_op.cc b/paddle/fluid/operators/sequence_ops/sequence_reshape_op.cc index ff956ab2d1d4f7..6e34f76fbd37d0 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_reshape_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_reshape_op.cc @@ -16,7 +16,7 @@ #include -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/sequence_ops/sequence_slice_op.h b/paddle/fluid/operators/sequence_ops/sequence_slice_op.h index 19c074ef5f75d7..50a3e976334753 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_slice_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_slice_op.h @@ -130,8 +130,8 @@ class SequenceSliceOpKernel : public framework::OpKernel { out->Resize(out_dims); out->set_lod(out_lod); - auto in_stride = phi::stride(in->dims()); - auto out_stride = phi::stride(out->dims()); + auto in_stride = common::stride(in->dims()); + auto out_stride = common::stride(out->dims()); size_t out_offset = 0; for (size_t i = 0; i < n; ++i) { @@ -193,9 +193,9 @@ class SequenceSliceGradOpKernel : public framework::OpKernel { Tensor out_grad_t = out_grad->Slice(static_cast(out_lod[0][i]), static_cast(out_lod[0][i + 1])); - auto out_grad_stride = phi::stride(out_grad_t.dims()); + auto out_grad_stride = common::stride(out_grad_t.dims()); - auto x_grad_stride = phi::stride(x_grad->dims()); + auto x_grad_stride = common::stride(x_grad->dims()); Tensor x_grad_t = x_grad->Slice( static_cast(lod[0][i] + offset_data[i]), diff --git a/paddle/fluid/operators/sequence_ops/sequence_softmax_cudnn_op.cu.cc b/paddle/fluid/operators/sequence_ops/sequence_softmax_cudnn_op.cu.cc index 3b2583bbe42996..01f7bb3e928902 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_softmax_cudnn_op.cu.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_softmax_cudnn_op.cu.cc @@ -57,8 +57,8 @@ class SequenceSoftmaxCUDNNKernel : public framework::OpKernel { // Reshape from (end_pos - start_pos) x 1UL to 1UL x (end_pos - start_pos) framework::DDim dims_i = - // phi::make_ddim({1UL, end_pos - start_pos, 1UL, 1UL}); - phi::make_ddim({1UL, end_pos - start_pos}); + // common::make_ddim({1UL, end_pos - start_pos, 1UL, 1UL}); + common::make_ddim({1UL, end_pos - start_pos}); x_i.Resize(dims_i); out_i.Resize(dims_i); phi::funcs::SoftmaxCUDNNFunctor()( @@ -91,7 +91,7 @@ class SequenceSoftmaxGradCUDNNKernel : public framework::OpKernel { Tensor x_grad_i = x_grad->Slice(start_pos, end_pos); // Reshape from (end_pos - start_pos) x 1UL to 1UL x (end_pos - start_pos) - framework::DDim dims_i = phi::make_ddim({1UL, end_pos - start_pos}); + framework::DDim dims_i = common::make_ddim({1UL, end_pos - start_pos}); out_i.Resize(dims_i); out_grad_i.Resize(dims_i); x_grad_i.Resize(dims_i); diff --git a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cc b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cc index 0ca5514900d460..12d4f72a91169e 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cc @@ -41,7 +41,8 @@ class SequenceSoftmaxOp : public framework::OperatorWithKernel { auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); phi::DataLayout layout_ = DataLayout::kAnyLayout; if (ctx.HasAttr("data_format")) { - layout_ = phi::StringToDataLayout(ctx.Attr("data_format")); + layout_ = + common::StringToDataLayout(ctx.Attr("data_format")); } return phi::KernelKey( ctx.GetPlace(), layout_, phi::TransToPhiDataType(input_data_type)); @@ -126,7 +127,8 @@ class SequenceSoftmaxGradOp : public framework::OperatorWithKernel { auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Out"); phi::DataLayout layout_ = DataLayout::kAnyLayout; if (ctx.HasAttr("data_format")) { - layout_ = phi::StringToDataLayout(ctx.Attr("data_format")); + layout_ = + common::StringToDataLayout(ctx.Attr("data_format")); } return phi::KernelKey( ctx.GetPlace(), layout_, phi::TransToPhiDataType(input_data_type)); diff --git a/paddle/fluid/operators/sequence_ops/sequence_unpad_op.cc b/paddle/fluid/operators/sequence_ops/sequence_unpad_op.cc index 33fa45f27972ed..6088b8181646ba 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_unpad_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_unpad_op.cc @@ -79,7 +79,7 @@ class SequenceUnpadOp : public framework::OperatorWithKernel { out_dims_vec.push_back(x_dims[i]); } } - ctx->SetOutputDim("Out", phi::make_ddim(out_dims_vec)); + ctx->SetOutputDim("Out", common::make_ddim(out_dims_vec)); if (!ctx->IsRuntime()) { ctx->SetLoDLevel("Out", 1); } diff --git a/paddle/fluid/operators/sequence_ops/sequence_unpad_op.h b/paddle/fluid/operators/sequence_ops/sequence_unpad_op.h index aff7ce2392d6c2..cc38fd510ef1ea 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_unpad_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_unpad_op.h @@ -64,7 +64,7 @@ class SequenceUnpadOpKernel : public framework::OpKernel { out_dims_vec.push_back(x_t->dims()[i]); } } - out_t->Resize(phi::make_ddim(out_dims_vec)); + out_t->Resize(common::make_ddim(out_dims_vec)); // after set the lod of output, allocate the memory out_t->mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/shuffle_batch_op.cc b/paddle/fluid/operators/shuffle_batch_op.cc index 61b3f30b390100..0b5a7bf5540abd 100644 --- a/paddle/fluid/operators/shuffle_batch_op.cc +++ b/paddle/fluid/operators/shuffle_batch_op.cc @@ -63,7 +63,7 @@ class ShuffleBatchOp : public framework::OperatorWithKernel { ctx->ShareLoD("X", "Out"); ctx->ShareDim("Seed", "SeedOut"); ctx->ShareLoD("Seed", "SeedOut"); - ctx->SetOutputDim("ShuffleIdx", phi::make_ddim({-1})); + ctx->SetOutputDim("ShuffleIdx", common::make_ddim({-1})); } protected: diff --git a/paddle/fluid/operators/slice_op.cc b/paddle/fluid/operators/slice_op.cc index e12c5b1e7febb2..16b895ce557a77 100644 --- a/paddle/fluid/operators/slice_op.cc +++ b/paddle/fluid/operators/slice_op.cc @@ -154,7 +154,7 @@ class SliceOp : public framework::OperatorWithKernel { #ifdef PADDLE_WITH_DNNL auto input_data_type = framework::OperatorWithKernel::IndicateVarDataType(ctx, "Input"); - auto vec_dims = phi::vectorize(in_tensor.dims()); + auto vec_dims = common::vectorize(in_tensor.dims()); bool all_zero_dims = std::all_of( vec_dims.cbegin(), vec_dims.cend(), [](int64_t i) { return i == 0; }); if (!all_zero_dims && this->CanMKLDNNBeUsed(ctx, input_data_type)) { diff --git a/paddle/fluid/operators/sparse_attention_op.cu b/paddle/fluid/operators/sparse_attention_op.cu index 086de1fd706fd5..117de1c1a55dfb 100644 --- a/paddle/fluid/operators/sparse_attention_op.cu +++ b/paddle/fluid/operators/sparse_attention_op.cu @@ -649,7 +649,7 @@ std::vector GetSplitTensor(phi::DenseTensor* input) { for (int i = 1; i < new_dims.size(); i++) { new_dims[i] = dims[i + 1]; } - input->Resize(phi::make_ddim(new_dims)); + input->Resize(common::make_ddim(new_dims)); return input->Split(1, 0); } diff --git a/paddle/fluid/operators/split_op.h b/paddle/fluid/operators/split_op.h index e6545cc8d8d995..aaee366a4636a7 100644 --- a/paddle/fluid/operators/split_op.h +++ b/paddle/fluid/operators/split_op.h @@ -79,7 +79,7 @@ static inline std::vector UpdateOutsDims( "Only one dimension value of Attr(num_or_sections) " "in SplitOp can be -1. " "But received Attr(num_or_sections) = [%s].", - phi::make_ddim(sections))); + common::make_ddim(sections))); } if (unk_dim_idx != -1) { @@ -95,7 +95,7 @@ static inline std::vector UpdateOutsDims( "size " "along the split dimension. But received Attr(num_or_sections) " "= [%s], input(X)'s shape = [%s], Attr(dim) = %d.", - phi::make_ddim(sections), + common::make_ddim(sections), in_dims, axis)); if (each_section_is_known) { @@ -110,7 +110,7 @@ static inline std::vector UpdateOutsDims( "size " "along the split dimension. But received Attr(num_or_sections)" " = [%s], input(X)'s shape = [%s], Attr(dim) = %d.", - phi::make_ddim(sections), + common::make_ddim(sections), in_dims, axis)); } diff --git a/paddle/fluid/operators/spp_op.cc b/paddle/fluid/operators/spp_op.cc index 9fe04caf3e1003..98072746e8eee7 100644 --- a/paddle/fluid/operators/spp_op.cc +++ b/paddle/fluid/operators/spp_op.cc @@ -80,7 +80,7 @@ class SppOp : public framework::OperatorWithKernel { int outlen = ((std::pow(4, pyramid_height) - 1) / (4 - 1)) * in_x_dims[1]; // NOLINT std::vector output_shape({in_x_dims[0], outlen}); - ctx->SetOutputDim("Out", phi::make_ddim(output_shape)); + ctx->SetOutputDim("Out", common::make_ddim(output_shape)); } }; diff --git a/paddle/fluid/operators/spp_op.h b/paddle/fluid/operators/spp_op.h index bf810e8825570b..5d3f4a78020a02 100644 --- a/paddle/fluid/operators/spp_op.h +++ b/paddle/fluid/operators/spp_op.h @@ -34,7 +34,7 @@ class SppKernel : public framework::OpKernel { std::string pooling_type = context.template Attr("pooling_type"); out->mutable_data(context.GetPlace()); - auto out_stride = phi::stride(out->dims()); + auto out_stride = common::stride(out->dims()); int input_h = in_x->dims()[2]; int input_w = in_x->dims()[3]; size_t output_offset = 0; @@ -51,7 +51,7 @@ class SppKernel : public framework::OpKernel { phi::DenseTensor out_level; std::vector output_shape_vec( {in_x->dims()[0], in_x->dims()[1], bins, bins}); - framework::DDim output_shape(phi::make_ddim(output_shape_vec)); + framework::DDim output_shape(common::make_ddim(output_shape_vec)); out_level.mutable_data(output_shape, context.GetPlace()); // pooling if (pooling_type == "max") { @@ -92,10 +92,10 @@ class SppKernel : public framework::OpKernel { std::vector output_flatten_shape_vec( {in_x->dims()[0], output_flatten_w}); framework::DDim output_flatten_shape( - phi::make_ddim(output_flatten_shape_vec)); + common::make_ddim(output_flatten_shape_vec)); out_level.Resize(output_flatten_shape); // concat - auto out_level_stride = phi::stride(out_level.dims()); + auto out_level_stride = common::stride(out_level.dims()); phi::funcs::StridedMemcpy( context.template device_context(), out_level.data(), @@ -127,7 +127,7 @@ class SppGradKernel : public framework::OpKernel { zero; in_x_grad->mutable_data(context.GetPlace()); zero(device_ctx, in_x_grad, static_cast(0)); - auto out_stride = phi::stride(out->dims()); + auto out_stride = common::stride(out->dims()); int input_h = in_x->dims()[2]; int input_w = in_x->dims()[3]; size_t out_offset = 0; @@ -146,10 +146,11 @@ class SppGradKernel : public framework::OpKernel { int out_flatten_w = in_x->dims()[1] * bins * bins; std::vector out_flatten_shape_vec( {in_x->dims()[0], out_flatten_w}); - framework::DDim out_flatten_shape(phi::make_ddim(out_flatten_shape_vec)); + framework::DDim out_flatten_shape( + common::make_ddim(out_flatten_shape_vec)); out_level.mutable_data(out_flatten_shape, context.GetPlace()); outgrad_level.mutable_data(out_flatten_shape, context.GetPlace()); - auto flatten_stride = phi::stride(out_level.dims()); + auto flatten_stride = common::stride(out_level.dims()); // memcpy phi::funcs::StridedMemcpy( context.template device_context(), @@ -174,7 +175,7 @@ class SppGradKernel : public framework::OpKernel { (input_h - kernel_size_h + 2 * padding_h) / kernel_size_h + 1); out_shape_vec.push_back( (input_w - kernel_size_w + 2 * padding_w) / kernel_size_w + 1); - framework::DDim out_shape(phi::make_ddim(out_shape_vec)); + framework::DDim out_shape(common::make_ddim(out_shape_vec)); out_level.ShareDataWith(out_level); out_level.Resize(out_shape); outgrad_level.ShareDataWith(outgrad_level); diff --git a/paddle/fluid/operators/squeeze_op.h b/paddle/fluid/operators/squeeze_op.h index 6f0da1d42e5467..10ff809d608886 100644 --- a/paddle/fluid/operators/squeeze_op.h +++ b/paddle/fluid/operators/squeeze_op.h @@ -85,7 +85,7 @@ framework::DDim GetOutputShape(const std::vector squeeze_dims, output_shape.push_back(in_dims[i]); } } - return phi::make_ddim(output_shape); + return common::make_ddim(output_shape); } template @@ -119,7 +119,7 @@ class Squeeze2GradKernel : public framework::OpKernel { // auto in_dims = d_x->dims(); auto xshape_dims = ctx.Input("XShape")->dims(); - auto x_dims = phi::slice_ddim(xshape_dims, 1, xshape_dims.size()); + auto x_dims = common::slice_ddim(xshape_dims, 1, xshape_dims.size()); d_x->mutable_data(ctx.GetPlace(), d_out->type()); framework::TensorCopySync(*d_out, ctx.GetPlace(), d_x); diff --git a/paddle/fluid/operators/stft_op.cc b/paddle/fluid/operators/stft_op.cc index 14c1af81c16103..34f6ee854dd7bc 100644 --- a/paddle/fluid/operators/stft_op.cc +++ b/paddle/fluid/operators/stft_op.cc @@ -75,7 +75,7 @@ class StftOp : public framework::OperatorWithKernel { } output_shape.push_back(n_frames); - ctx->SetOutputDim("Out", phi::make_ddim(output_shape)); + ctx->SetOutputDim("Out", common::make_ddim(output_shape)); } protected: diff --git a/paddle/fluid/operators/string/faster_tokenizer_op.h b/paddle/fluid/operators/string/faster_tokenizer_op.h index b054531e4e05ef..1f848cb393fae2 100644 --- a/paddle/fluid/operators/string/faster_tokenizer_op.h +++ b/paddle/fluid/operators/string/faster_tokenizer_op.h @@ -176,11 +176,12 @@ class FasterTokenizerKernel : public framework::OpKernel { } input_ids->Resize( - phi::make_ddim({static_cast(batch_size), - static_cast(batch_max_seq_len)})); + common::make_ddim({static_cast(batch_size), + static_cast(batch_max_seq_len)})); auto* input_ids_data = input_ids->mutable_data(ctx.GetPlace()); - seg_ids->Resize(phi::make_ddim({static_cast(batch_size), - static_cast(batch_max_seq_len)})); + seg_ids->Resize( + common::make_ddim({static_cast(batch_size), + static_cast(batch_max_seq_len)})); auto* seg_ids_data = seg_ids->mutable_data(ctx.GetPlace()); auto pad_token_id = tokenizer.GetPadTokenID(); diff --git a/paddle/fluid/operators/svd_helper.h b/paddle/fluid/operators/svd_helper.h index ccf5cd09a08426..caa31565d4cf3d 100644 --- a/paddle/fluid/operators/svd_helper.h +++ b/paddle/fluid/operators/svd_helper.h @@ -20,6 +20,7 @@ #include #include +#include "paddle/common/ddim.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/operators/diag_op.h" @@ -27,7 +28,6 @@ #include "paddle/fluid/operators/elementwise/elementwise_op_function.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/for_range.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/complex_functors.h" #include "paddle/phi/kernels/funcs/lapack/lapack_function.h" @@ -85,8 +85,8 @@ static std::vector GetBroadcastShape(InTensors ins) { auto x_dim = ins[0]->dims(); auto y_dim = ins[1]->dims(); std::vector broadcast_shape = - (x_dim.size() > y_dim.size() ? phi::vectorize(x_dim) - : phi::vectorize(y_dim)); + (x_dim.size() > y_dim.size() ? common::vectorize(x_dim) + : common::vectorize(y_dim)); int rank_min = std::min(x_dim.size(), y_dim.size()); int rank_x = x_dim.size(); int rank_y = y_dim.size(); @@ -301,10 +301,10 @@ struct DeviceIndependenceTensorOperations { phi::DenseTensor ret; auto a_dim = mat_a.dims(); auto b_dim = mat_b.dims(); - std::vector x_vec = phi::vectorize(a_dim); + std::vector x_vec = common::vectorize(a_dim); x_vec[x_vec.size() - 2] = a_dim[a_dim.size() - (trans_a ? 1 : 2)]; x_vec[x_vec.size() - 1] = b_dim[b_dim.size() - (trans_b ? 2 : 1)]; - ret.Resize(phi::make_ddim(x_vec)); + ret.Resize(common::make_ddim(x_vec)); ret.mutable_data(context.GetPlace()); auto blas = GetBlas(); auto mat_a_discrib = phi::funcs::CreateMatrixDescriptor(a_dim, 0, trans_a); @@ -318,7 +318,7 @@ struct DeviceIndependenceTensorOperations { // transpose the last two dimision phi::DenseTensor ret; auto x_dim = x.dims(); - auto x_vec = phi::vectorize(x_dim); + auto x_vec = common::vectorize(x_dim); int rank = x_vec.size(); std::swap(x_vec[rank - 1], x_vec[rank - 2]); std::vector out_shape = x_vec; @@ -328,7 +328,7 @@ struct DeviceIndependenceTensorOperations { } std::swap(axis[rank - 1], axis[rank - 2]); auto& dev_ctx = context.template device_context(); - ret.Resize(phi::make_ddim(x_vec)); + ret.Resize(common::make_ddim(x_vec)); ret.mutable_data(context.GetPlace()); switch (rank) { DITO_TRANSPOSE_RANK_CASE(2); @@ -397,7 +397,7 @@ struct DeviceIndependenceTensorOperations { for (int i = 0; i < num_dims - 1; ++i) { out_shape.push_back(x.dims()[i]); } - out.Resize(phi::make_ddim(out_shape)); + out.Resize(common::make_ddim(out_shape)); int order = x.dims()[num_dims - 1]; int stride_out = order * order; int stride_in = order + 1; @@ -414,7 +414,7 @@ struct DeviceIndependenceTensorOperations { const phi::DenseTensor& y) { phi::DenseTensor ret; std::vector out_shape = GetBroadcastShape({&x, &y}); - ret.Resize(phi::make_ddim(out_shape)); + ret.Resize(common::make_ddim(out_shape)); ElementwiseComputeEx, DeviceContext, T>( context, &x, &y, -1, RealMulComplexFunctor(), &ret); return ret; @@ -432,7 +432,7 @@ struct DeviceIndependenceTensorOperations { out_vector.device(place) = x_vector / y_vector; } else { std::vector out_shape = GetBroadcastShape({&x, &y}); - ret.Resize(phi::make_ddim(out_shape)); + ret.Resize(common::make_ddim(out_shape)); ElementwiseComputeEx, DeviceContext, T>( context, &x, &y, -1, DivFunctor(), &ret); } @@ -442,7 +442,7 @@ struct DeviceIndependenceTensorOperations { // element wise add, support numpy broadcast. phi::DenseTensor ret; std::vector out_shape = GetBroadcastShape({&x, &y}); - ret.Resize(phi::make_ddim(out_shape)); + ret.Resize(common::make_ddim(out_shape)); ElementwiseComputeEx, DeviceContext, T>( context, &x, &y, -1, AddFunctor(), &ret); return ret; @@ -450,7 +450,7 @@ struct DeviceIndependenceTensorOperations { phi::DenseTensor Mul(const phi::DenseTensor& x, const phi::DenseTensor& y) { phi::DenseTensor ret; std::vector out_shape = GetBroadcastShape({&x, &y}); - ret.Resize(phi::make_ddim(out_shape)); + ret.Resize(common::make_ddim(out_shape)); ElementwiseComputeEx, DeviceContext, T>( context, &x, &y, -1, MulFunctor(), &ret); return ret; @@ -476,7 +476,7 @@ struct DeviceIndependenceTensorOperations { phi::DenseTensor Sub(const phi::DenseTensor& x, const phi::DenseTensor& y) { phi::DenseTensor ret; std::vector out_shape = GetBroadcastShape({&x, &y}); - ret.Resize(phi::make_ddim(out_shape)); + ret.Resize(common::make_ddim(out_shape)); if (platform::is_gpu_place(context.GetPlace())) { #if defined(__NVCC__) || defined(__HIPCC__) // For GPU, there is no need to define XxxInverseFunctor and call @@ -501,7 +501,7 @@ struct DeviceIndependenceTensorOperations { // don't copy data, only change the dims phi::DenseTensor out; out.ShareDataWith(x); - std::vector out_shape = phi::vectorize(x.dims()); + std::vector out_shape = common::vectorize(x.dims()); if (axis >= 0) { auto index = (out_shape.begin() + axis); out_shape.insert(index, 1); @@ -509,12 +509,12 @@ struct DeviceIndependenceTensorOperations { auto index = (out_shape.end() + axis + 1); out_shape.insert(index, 1); } - out.Resize(phi::make_ddim(out_shape)); + out.Resize(common::make_ddim(out_shape)); return out; } phi::DenseTensor Fill(std::vector shape, float fill_value) { phi::DenseTensor ret; - ret.Resize(phi::make_ddim(shape)); + ret.Resize(common::make_ddim(shape)); ret.mutable_data(context.GetPlace()); auto& dev_ctx = context.template device_context(); phi::funcs::SetConstant()(dev_ctx, &ret, T(fill_value)); @@ -535,7 +535,7 @@ struct DeviceIndependenceTensorOperations { std::vector ends) { phi::DenseTensor ret; std::vector new_axes = axes; - std::vector out_shape = phi::vectorize(x.dims()); + std::vector out_shape = common::vectorize(x.dims()); size_t rank = out_shape.size(); PADDLE_ENFORCE_EQ( axes.size(), @@ -566,7 +566,7 @@ struct DeviceIndependenceTensorOperations { offset[new_axes[i]] = starts[i]; extends[new_axes[i]] = ends[i] - starts[i]; } - ret.Resize(phi::make_ddim(out_shape)); + ret.Resize(common::make_ddim(out_shape)); ret.mutable_data(context.GetPlace()); switch (rank) { DITO_SLICE_RANK_CASE(1); @@ -596,7 +596,7 @@ struct DeviceIndependenceTensorOperations { x_rank, 2, platform::errors::InvalidArgument("Rank must be at least 2.")); - std::vector out_shape = phi::vectorize(x.dims()); + std::vector out_shape = common::vectorize(x.dims()); return CreateOpRunAndReturnTensor("tril_triu", inputs, attrs, out_shape); } @@ -613,8 +613,8 @@ struct DeviceIndependenceTensorOperations { auto x_dims = x.dims(); auto y_dims = y.dims(); auto y_dims_n = y_dims.size(); - std::vector x_dims_vec = phi::vectorize(x_dims); - std::vector y_dims_vec = phi::vectorize(y_dims); + std::vector x_dims_vec = common::vectorize(x_dims); + std::vector y_dims_vec = common::vectorize(y_dims); std::vector x_dims_vec_cut(x_dims_vec.begin(), x_dims_vec.end() - 2); std::vector y_dims_vec_cut(y_dims_vec.begin(), @@ -646,7 +646,7 @@ struct DeviceIndependenceTensorOperations { if (out_dims[axis_] < 0) { out_dims[axis_] = -1; } - std::vector out_shape = phi::vectorize(out_dims); + std::vector out_shape = common::vectorize(out_dims); return CreateOpRunAndReturnTensor("concat", inputs, attrs, out_shape); } @@ -755,8 +755,8 @@ struct DeviceIndependenceTensorOperations { auto out_var = local_scope.Var("tmp_Out"); // return the Out // create Out phi::DenseTensor and allocat memory out_var->GetMutable()->mutable_data( - phi::make_ddim(out_shape), context.GetPlace()); - // phi::make_ddim(out_shape) + common::make_ddim(out_shape), context.GetPlace()); + // common::make_ddim(out_shape) framework::VariableNameMap op_inputs; int counter = 0; for (auto item : inputs) { @@ -780,7 +780,7 @@ struct DeviceIndependenceTensorOperations { op->Run(local_scope, context.GetPlace()); phi::DenseTensor out; out.ShareDataWith(*(out_var->GetMutable())); - out.Resize(phi::make_ddim(out_shape)); + out.Resize(common::make_ddim(out_shape)); context.scope().DeleteScope(&local_scope); return out; } diff --git a/paddle/fluid/operators/sync_batch_norm_op.cu b/paddle/fluid/operators/sync_batch_norm_op.cu index 7f618db46976a9..af69594f992cde 100644 --- a/paddle/fluid/operators/sync_batch_norm_op.cu +++ b/paddle/fluid/operators/sync_batch_norm_op.cu @@ -51,7 +51,7 @@ void SyncBatchNormKernel(const Context& ctx, double epsilon = epsilon_f; const bool trainable_stats = trainable_statistics; - const DataLayout layout = phi::StringToDataLayout(data_layout_str); + const DataLayout layout = common::StringToDataLayout(data_layout_str); bool test_mode = is_test && (!trainable_statistics); const auto& x_dims = x.dims(); PADDLE_ENFORCE_GE(x_dims.size(), diff --git a/paddle/fluid/operators/sync_batch_norm_utils.h b/paddle/fluid/operators/sync_batch_norm_utils.h index 7c14f6dfac324d..c132a91bb5346c 100644 --- a/paddle/fluid/operators/sync_batch_norm_utils.h +++ b/paddle/fluid/operators/sync_batch_norm_utils.h @@ -30,9 +30,9 @@ namespace cub = hipcub; #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) #include "paddle/fluid/distributed/collective/process_group_nccl.h" #endif +#include "paddle/common/layout.h" #include "paddle/fluid/platform/device/gpu/nccl_helper.h" #include "paddle/phi/backends/gpu/gpu_dnn.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/common/memory_utils.h" #include "paddle/phi/kernels/funcs/norm_utils.cu.h" #include "paddle/phi/kernels/funcs/norm_utils.h" @@ -412,7 +412,7 @@ void SyncBatchNormGradFunctor( DenseTensor *bias_grad) { double epsilon = static_cast(epsilon_f); - const DataLayout layout = phi::StringToDataLayout(data_layout_str); + const DataLayout layout = common::StringToDataLayout(data_layout_str); const auto *d_y = &y_grad; diff --git a/paddle/fluid/operators/tdm_child_op.cc b/paddle/fluid/operators/tdm_child_op.cc index 64c67d67b776ab..7b9932ffb4a62c 100644 --- a/paddle/fluid/operators/tdm_child_op.cc +++ b/paddle/fluid/operators/tdm_child_op.cc @@ -89,10 +89,10 @@ class TDMChildOp : public framework::OperatorWithKernel { info_dims.size(), info_dims)); - auto output_dims = phi::vectorize(input_dims); + auto output_dims = common::vectorize(input_dims); output_dims.push_back(child_nums); - ctx->SetOutputDim("Child", phi::make_ddim(output_dims)); - ctx->SetOutputDim("LeafMask", phi::make_ddim(output_dims)); + ctx->SetOutputDim("Child", common::make_ddim(output_dims)); + ctx->SetOutputDim("LeafMask", common::make_ddim(output_dims)); if (ctx->GetOutputsVarType("Child")[0] == framework::proto::VarType::LOD_TENSOR) { diff --git a/paddle/fluid/operators/tdm_sampler_op.cc b/paddle/fluid/operators/tdm_sampler_op.cc index f5e835ca2f7a79..d516af77183653 100644 --- a/paddle/fluid/operators/tdm_sampler_op.cc +++ b/paddle/fluid/operators/tdm_sampler_op.cc @@ -101,15 +101,16 @@ class TDMSamplerOp : public framework::OperatorWithKernel { } auto input_dims = ctx->GetInputDim("X"); - auto ddim = phi::make_ddim({-1, sample_res_length}); + auto ddim = common::make_ddim({-1, sample_res_length}); if (ctx->IsRuntime()) { - auto output_dims = phi::vectorize(input_dims); + auto output_dims = common::vectorize(input_dims); auto batch_size = output_dims[0]; - ctx->SetOutputDim("Out", phi::make_ddim({batch_size, sample_res_length})); + ctx->SetOutputDim("Out", + common::make_ddim({batch_size, sample_res_length})); ctx->SetOutputDim("Labels", - phi::make_ddim({batch_size, sample_res_length})); + common::make_ddim({batch_size, sample_res_length})); ctx->SetOutputDim("Mask", - phi::make_ddim({batch_size, sample_res_length})); + common::make_ddim({batch_size, sample_res_length})); } else { ctx->SetOutputDim("Out", ddim); ctx->SetOutputDim("Labels", ddim); diff --git a/paddle/fluid/operators/tdm_sampler_op.h b/paddle/fluid/operators/tdm_sampler_op.h index f3b55c4a5cc34b..984e6666e1f915 100644 --- a/paddle/fluid/operators/tdm_sampler_op.h +++ b/paddle/fluid/operators/tdm_sampler_op.h @@ -60,7 +60,7 @@ void TDMSamplerInner(const framework::ExecutionContext &context, } VLOG(3) << "TDM: sample res length: " << sample_res_length; - auto travel_dim = phi::vectorize(travel_lod_tensor.dims()); + auto travel_dim = common::vectorize(travel_lod_tensor.dims()); auto total_sample_nums = input_ids_num * sample_res_length; // get all data diff --git a/paddle/fluid/operators/temporal_shift_op.cu b/paddle/fluid/operators/temporal_shift_op.cu index 68f8153e88e600..51b75832d078ac 100644 --- a/paddle/fluid/operators/temporal_shift_op.cu +++ b/paddle/fluid/operators/temporal_shift_op.cu @@ -165,7 +165,7 @@ class TemporalShiftOpCUDAKernel : public framework::OpKernel { int t = ctx.Attr("seg_num"); float shift_ratio = ctx.Attr("shift_ratio"); const std::string data_format_str = ctx.Attr("data_format"); - const DataLayout data_layout = phi::StringToDataLayout(data_format_str); + const DataLayout data_layout = common::StringToDataLayout(data_format_str); const int nt = input->dims()[0]; const int c = (data_layout == DataLayout::kNCHW ? input->dims()[1] @@ -184,8 +184,8 @@ class TemporalShiftOpCUDAKernel : public framework::OpKernel { const int c2 = static_cast(c * 2 * shift_ratio); framework::DDim out_dims = - (data_layout == DataLayout::kNCHW ? phi::make_ddim({nt, c, h, w}) - : phi::make_ddim({nt, h, w, c})); + (data_layout == DataLayout::kNCHW ? common::make_ddim({nt, c, h, w}) + : common::make_ddim({nt, h, w, c})); const T* input_data = input->data(); T* output_data = output->mutable_data(out_dims, ctx.GetPlace()); @@ -219,7 +219,7 @@ class TemporalShiftGradOpCUDAKernel : public framework::OpKernel { int t = ctx.Attr("seg_num"); float shift_ratio = ctx.Attr("shift_ratio"); const std::string data_format_str = ctx.Attr("data_format"); - const DataLayout data_layout = phi::StringToDataLayout(data_format_str); + const DataLayout data_layout = common::StringToDataLayout(data_format_str); const int nt = output_grad->dims()[0]; const int c = (data_layout == DataLayout::kNCHW ? output_grad->dims()[1] @@ -238,8 +238,8 @@ class TemporalShiftGradOpCUDAKernel : public framework::OpKernel { const int c2 = static_cast(c * 2 * shift_ratio); framework::DDim in_grad_dims = - (data_layout == DataLayout::kNCHW ? phi::make_ddim({nt, c, h, w}) - : phi::make_ddim({nt, h, w, c})); + (data_layout == DataLayout::kNCHW ? common::make_ddim({nt, c, h, w}) + : common::make_ddim({nt, h, w, c})); const T* output_grad_data = output_grad->data(); T* input_grad_data = input_grad->mutable_data(in_grad_dims, ctx.GetPlace()); diff --git a/paddle/fluid/operators/temporal_shift_op.h b/paddle/fluid/operators/temporal_shift_op.h index ec2533316e107f..47007a10038b4c 100644 --- a/paddle/fluid/operators/temporal_shift_op.h +++ b/paddle/fluid/operators/temporal_shift_op.h @@ -97,7 +97,7 @@ class TemporalShiftGradKernel : public framework::OpKernel { int t = ctx.Attr("seg_num"); float shift_ratio = ctx.Attr("shift_ratio"); const std::string data_format_str = ctx.Attr("data_format"); - const DataLayout data_layout = phi::StringToDataLayout(data_format_str); + const DataLayout data_layout = common::StringToDataLayout(data_format_str); const int nt = output_grad->dims()[0]; const int c = (data_layout == DataLayout::kNCHW ? output_grad->dims()[1] @@ -116,8 +116,8 @@ class TemporalShiftGradKernel : public framework::OpKernel { const int c2 = static_cast(c * 2 * shift_ratio); framework::DDim in_grad_dims = - (data_layout == DataLayout::kNCHW ? phi::make_ddim({nt, c, h, w}) - : phi::make_ddim({nt, h, w, c})); + (data_layout == DataLayout::kNCHW ? common::make_ddim({nt, c, h, w}) + : common::make_ddim({nt, h, w, c})); const T* output_grad_data = output_grad->data(); T* input_grad_data = input_grad->mutable_data(in_grad_dims, ctx.GetPlace()); diff --git a/paddle/fluid/operators/tensor_array_to_tensor_op.cc b/paddle/fluid/operators/tensor_array_to_tensor_op.cc index 6188106f64bfab..69c7446d85d470 100644 --- a/paddle/fluid/operators/tensor_array_to_tensor_op.cc +++ b/paddle/fluid/operators/tensor_array_to_tensor_op.cc @@ -109,9 +109,9 @@ class LoDTensorArray2TensorOp : public framework::OperatorBase { } } } - auto vec = phi::vectorize(out_dims); + auto vec = common::vectorize(out_dims); vec.insert(vec.begin() + axis, inx.size()); // NOLINT - out.Resize(phi::make_ddim(vec)); + out.Resize(common::make_ddim(vec)); LodTensorArray2LodTensorVector(scope, base_name, Input("X"), &names); @@ -178,16 +178,16 @@ class LoDTensorArray2TensorOpInferShape : public framework::InferShapeBase { if (ctx->IsRuntime()) return; auto dims = ctx->GetInputDim("X"); // if the shape is empty - if (dims == phi::make_ddim({0UL})) return; + if (dims == common::make_ddim({0UL})) return; // otherwise, suppose the shape of array is the shape of tensor in the // array, which is consistent with what tensor_array_read_write dose auto axis = ctx->Attrs().Get("axis"); auto use_stack = ctx->Attrs().Get("use_stack"); if (use_stack) { - auto dim_vec = phi::vectorize(dims); + auto dim_vec = common::vectorize(dims); // use -1 for the stack dim size dim_vec.insert(dim_vec.begin() + axis, -1); - dims = phi::make_ddim(dim_vec); + dims = common::make_ddim(dim_vec); } else { // use -1 for the concat dim size dims[axis] = -1; diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h index 77b0ebe1ce4937..8c75a7bc00f1c8 100644 --- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h +++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h @@ -23,6 +23,7 @@ #include #include +#include "paddle/common/errors.h" #include "paddle/fluid/framework/data_device_transform.h" #include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/op_registry.h" @@ -39,7 +40,6 @@ #include "paddle/phi/common/data_type.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/kernels/cast_kernel.h" #include "paddle/phi/kernels/funcs/data_type_transform.h" #include "paddle/utils/string/string_helper.h" @@ -288,7 +288,7 @@ class TensorRTEngineOp : public framework::OperatorBase { scope, name_real); VLOG(4) << "trt engine runtime input name(" << name << "), dims(" << t.dims() << ")"; - auto t_shape = phi::vectorize(t.dims()); + auto t_shape = common::vectorize(t.dims()); runtime_input_shape.insert(std::make_pair(name, t_shape)); // We need collect value range for shape tensor for Paddle-TRT's use. // To be noticed, this method to identify all shape tensors is based on @@ -446,7 +446,7 @@ class TensorRTEngineOp : public framework::OperatorBase { if (param_names_.count(x)) continue; auto &t = inference::analysis::GetFromScope(scope, x); calib_buffers[x] = t.memory_size(); - auto t_shape = phi::vectorize(t.dims()); + auto t_shape = common::vectorize(t.dims()); runtime_batch = t_shape[0]; } calib_res->calib_ = std::make_unique( @@ -549,7 +549,7 @@ class TensorRTEngineOp : public framework::OperatorBase { framework::TensorCopy(t, dev_place, dev_ctx, &out); t.ShareDataWith(out); } - auto t_shape = phi::vectorize(t.dims()); + auto t_shape = common::vectorize(t.dims()); // This must be a zero dimension tensor. // At present, we convert it to a 1D tensor to feed them into Trt. @@ -736,7 +736,7 @@ class TensorRTEngineOp : public framework::OperatorBase { platform::errors::NotFound( "Output variable %s is not found in TensorRT subgraph.", y)); auto *fluid_t = fluid_v->GetMutable(); - fluid_t->Resize(phi::make_ddim(ddim)); + fluid_t->Resize(common::make_ddim(ddim)); PADDLE_ENFORCE_LT(bind_index, num_bindings, diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc b/paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc index 23ccf702685577..04130e3f242397 100644 --- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc +++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc @@ -37,7 +37,7 @@ void CreateCUDATensor(framework::Scope* scope, const std::vector& shape) { auto* var = scope->Var(name); auto* tensor = var->GetMutable(); - auto dims = phi::make_ddim(shape); + auto dims = common::make_ddim(shape); tensor->Resize(dims); platform::CUDAPlace place; phi::GPUContext ctx(place); diff --git a/paddle/fluid/operators/top_k_op.cu b/paddle/fluid/operators/top_k_op.cu index f1674bc5005a0b..ef6172b6965f22 100644 --- a/paddle/fluid/operators/top_k_op.cu +++ b/paddle/fluid/operators/top_k_op.cu @@ -87,7 +87,7 @@ class TopkOpCUDAKernel : public framework::OpKernel { framework::DDim inputdims = input->dims(); const int64_t input_height = - phi::product(phi::slice_ddim(inputdims, 0, inputdims.size() - 1)); + common::product(common::slice_ddim(inputdims, 0, inputdims.size() - 1)); const int64_t input_width = inputdims[inputdims.size() - 1]; const auto& dev_ctx = ctx.cuda_device_context(); if ((input_width <= 1024 || k >= 128 || k == input_width)) { @@ -157,7 +157,7 @@ class TopkOpGradCUDAKernel : public framework::OpKernel { framework::DDim xdims = x->dims(); const size_t row = - phi::product(phi::slice_ddim(xdims, 0, xdims.size() - 1)); + common::product(common::slice_ddim(xdims, 0, xdims.size() - 1)); const size_t col = xdims[xdims.size() - 1]; const auto& dev_ctx = context.cuda_device_context(); const int kMaxHeight = 2048; diff --git a/paddle/fluid/operators/top_k_op.h b/paddle/fluid/operators/top_k_op.h index 27f246415a94ca..f8fa53e2ad5056 100644 --- a/paddle/fluid/operators/top_k_op.h +++ b/paddle/fluid/operators/top_k_op.h @@ -49,7 +49,7 @@ class TopkKernel : public framework::OpKernel { // reshape input to a flattern matrix(like flat_inner_dims) framework::DDim inputdims = input->dims(); const size_t row = - phi::product(phi::slice_ddim(inputdims, 0, inputdims.size() - 1)); + common::product(common::slice_ddim(inputdims, 0, inputdims.size() - 1)); const size_t col = inputdims[inputdims.size() - 1]; Eigen::DSizes flat2dims(row, col); // NOTE: eigen shape doesn't affect paddle tensor. @@ -106,7 +106,7 @@ class TopkGradKernel : public framework::OpKernel { framework::DDim xdims = x->dims(); const size_t row = - phi::product(phi::slice_ddim(xdims, 0, xdims.size() - 1)); + common::product(common::slice_ddim(xdims, 0, xdims.size() - 1)); const size_t col = xdims[xdims.size() - 1]; memset(x_grad_data, 0, row * col * sizeof(T)); diff --git a/paddle/fluid/operators/top_k_op_xpu.cc b/paddle/fluid/operators/top_k_op_xpu.cc index df1725265ebde7..55d3fa8624a8cd 100644 --- a/paddle/fluid/operators/top_k_op_xpu.cc +++ b/paddle/fluid/operators/top_k_op_xpu.cc @@ -63,7 +63,7 @@ class TopkXPUKernel : public framework::OpKernel { // reshape input to a flattern matrix(like flat_inner_dims) framework::DDim inputdims = input->dims(); const size_t row = - phi::product(phi::slice_ddim(inputdims, 0, inputdims.size() - 1)); + common::product(common::slice_ddim(inputdims, 0, inputdims.size() - 1)); const size_t col = inputdims[inputdims.size() - 1]; // int sorted_topk(Context* ctx, const T* x, T* y, int* index, int m, int n, diff --git a/paddle/fluid/operators/transfer_layout_op.h b/paddle/fluid/operators/transfer_layout_op.h index edb78e87e52334..52633640fa95bc 100644 --- a/paddle/fluid/operators/transfer_layout_op.h +++ b/paddle/fluid/operators/transfer_layout_op.h @@ -93,7 +93,7 @@ class TransferLayoutFunctor { } auto out_tz = out_tensor.dims().size() == 0 ? std::vector{1} - : phi::vectorize(out_tensor.dims()); + : common::vectorize(out_tensor.dims()); dnnl::memory::data_type in_type = phi::funcs::ToOneDNNDataType(in_tensor.dtype()); @@ -134,7 +134,7 @@ class TransferLayoutFunctor { const phi::DenseTensor &in, phi::DenseTensor *out) const { PADDLE_ENFORCE_EQ( - phi::arity(in.dims()), + common::arity(in.dims()), 4, platform::errors::InvalidArgument( "Input dimension arity only can be 4, the input dimension is %s.", @@ -149,7 +149,7 @@ class TransferLayoutFunctor { dst_dim[i] = src_dim[axis[i]]; } - out->Resize(phi::make_ddim(dst_dim)); + out->Resize(common::make_ddim(dst_dim)); out->mutable_data(in.place(), in.type()); framework::VisitDataType( diff --git a/paddle/fluid/operators/transpose_op.cc b/paddle/fluid/operators/transpose_op.cc index 652f88fec8f45a..417299d24db072 100644 --- a/paddle/fluid/operators/transpose_op.cc +++ b/paddle/fluid/operators/transpose_op.cc @@ -25,7 +25,7 @@ phi::KernelKey TransposeOp::GetExpectedKernelType( const framework::ExecutionContext &ctx) const { auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); auto &data_format = ctx.Attr("data_format"); - phi::DataLayout layout_ = phi::StringToDataLayout(data_format); + phi::DataLayout layout_ = common::StringToDataLayout(data_format); return phi::KernelKey( ctx.GetPlace(), layout_, phi::TransToPhiDataType(data_type)); } @@ -101,7 +101,7 @@ class TransposeOpGrad : public framework::OperatorWithKernel { auto data_type = OperatorWithKernel::IndicateVarDataType( ctx, framework::GradVarName("Out")); std::string data_format = ctx.Attr("data_format"); - phi::DataLayout layout_ = phi::StringToDataLayout(data_format); + phi::DataLayout layout_ = common::StringToDataLayout(data_format); return phi::KernelKey( ctx.GetPlace(), layout_, phi::TransToPhiDataType(data_type)); } @@ -121,7 +121,7 @@ void Transpose2Op::InferShape(framework::InferShapeContext *ctx) const { for (int i = 0; i < in_dims.size(); ++i) { x_shape_dim[i + 1] = in_dims[i]; } - ctx->SetOutputDim("XShape", phi::make_ddim(x_shape_dim)); + ctx->SetOutputDim("XShape", common::make_ddim(x_shape_dim)); ctx->ShareLoD("X", /*->*/ "XShape"); } @@ -129,7 +129,7 @@ phi::KernelKey Transpose2Op::GetExpectedKernelType( const framework::ExecutionContext &ctx) const { auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); auto &data_format = ctx.Attr("data_format"); - phi::DataLayout layout_ = phi::StringToDataLayout(data_format); + phi::DataLayout layout_ = common::StringToDataLayout(data_format); return phi::KernelKey( ctx.GetPlace(), layout_, phi::TransToPhiDataType(data_type)); } @@ -233,7 +233,7 @@ class Transpose2OpGrad : public framework::OperatorWithKernel { OperatorWithKernel::IndicateVarDataType(ctx, framework::GradVarName("Out")); std::string data_format = ctx.Attr("data_format"); - phi::DataLayout layout_ = phi::StringToDataLayout(data_format); + phi::DataLayout layout_ = common::StringToDataLayout(data_format); return phi::KernelKey( ctx.GetPlace(), layout_, phi::TransToPhiDataType(data_type)); } diff --git a/paddle/fluid/operators/unbind_op.h b/paddle/fluid/operators/unbind_op.h index 51347e45929886..7a5bf4d34c47c7 100644 --- a/paddle/fluid/operators/unbind_op.h +++ b/paddle/fluid/operators/unbind_op.h @@ -33,7 +33,7 @@ static inline framework::DDim UnbindOutsDims(const framework::DDim in_dims, for (int i = 0; i < in_dims.size(); i++) { if (i != axis) out_dims.push_back(in_dims[i]); } - return phi::make_ddim(out_dims); + return common::make_ddim(out_dims); } template diff --git a/paddle/fluid/operators/uniform_random_batch_size_like_op.cc b/paddle/fluid/operators/uniform_random_batch_size_like_op.cc index 4ff2c885c6930f..6b84fd1612e656 100644 --- a/paddle/fluid/operators/uniform_random_batch_size_like_op.cc +++ b/paddle/fluid/operators/uniform_random_batch_size_like_op.cc @@ -79,11 +79,11 @@ class CPUUniformRandomKernel : public framework::OpKernel { tensor = selected_rows->mutable_value(); auto shape = ctx.Attr>("shape"); if (!new_shape.empty()) shape = new_shape; - tensor->Resize(phi::make_ddim(shape)); + tensor->Resize(common::make_ddim(shape)); selected_rows->mutable_rows()->reserve(shape[0]); } else if (out_var->IsType()) { tensor = out_var->GetMutable(); - if (!new_shape.empty()) tensor->Resize(phi::make_ddim(new_shape)); + if (!new_shape.empty()) tensor->Resize(common::make_ddim(new_shape)); } else { PADDLE_THROW(platform::errors::InvalidArgument( "Expected type of Output(out) in uniform_random_op must be Tensor, " diff --git a/paddle/fluid/operators/uniform_random_batch_size_like_op.cu b/paddle/fluid/operators/uniform_random_batch_size_like_op.cu index 1bbd6eba3c662e..0cf50142c04a0d 100644 --- a/paddle/fluid/operators/uniform_random_batch_size_like_op.cu +++ b/paddle/fluid/operators/uniform_random_batch_size_like_op.cu @@ -39,11 +39,11 @@ class GPUUniformRandomKernel : public framework::OpKernel { tensor = selected_rows->mutable_value(); auto shape = context.Attr>("shape"); if (!new_shape.empty()) shape = new_shape; - tensor->Resize(phi::make_ddim(shape)); + tensor->Resize(common::make_ddim(shape)); selected_rows->mutable_rows()->reserve(shape[0]); } else if (out_var->IsType()) { tensor = out_var->GetMutable(); - if (!new_shape.empty()) tensor->Resize(phi::make_ddim(new_shape)); + if (!new_shape.empty()) tensor->Resize(common::make_ddim(new_shape)); } else { PADDLE_THROW(platform::errors::InvalidArgument( "Expected type of Output(out) in uniform_random_op must be " diff --git a/paddle/fluid/operators/uniform_random_op.h b/paddle/fluid/operators/uniform_random_op.h index 16bce515f2a7fd..458794223dc743 100644 --- a/paddle/fluid/operators/uniform_random_op.h +++ b/paddle/fluid/operators/uniform_random_op.h @@ -75,7 +75,7 @@ inline std::vector GetNewDataFromShapeTensorList( auto tensor = list_new_shape_tensor[i]; PADDLE_ENFORCE_EQ( tensor->dims(), - phi::make_ddim({1}), + common::make_ddim({1}), platform::errors::InvalidArgument( "Shape of dim tensor in uniform_random_op should be [1]" "But received tensor's dim=%s.", diff --git a/paddle/fluid/operators/unique_op.h b/paddle/fluid/operators/unique_op.h index 4d9b39d2dd262e..4d7a9eb5e49378 100644 --- a/paddle/fluid/operators/unique_op.h +++ b/paddle/fluid/operators/unique_op.h @@ -74,7 +74,7 @@ struct UniqueOpFunctor { if (count_ != nullptr) { // Resize the count tensor dims to allocate the memory - count_->Resize(phi::make_ddim({static_cast(uniq.size())})); + count_->Resize(common::make_ddim({static_cast(uniq.size())})); IndexT* count_data = count_->mutable_data(platform::CPUPlace()); // init count_data to 0 memset(count_data, 0, uniq.size() * sizeof(IndexT)); @@ -106,7 +106,7 @@ struct UniqueOpFunctor { } } - out_->Resize(phi::make_ddim({static_cast(uniq.size())})); + out_->Resize(common::make_ddim({static_cast(uniq.size())})); auto out_data = out_->mutable_data(platform::CPUPlace()); std::memcpy(out_data, uniq.data(), uniq.size() * sizeof(InT)); } @@ -143,13 +143,13 @@ static void UniqueFlattendTensor(const framework::ExecutionContext& context, bool return_counts) { const InT* in_data = in.data(); std::set unique(in_data, in_data + in.numel()); - out->Resize(phi::make_ddim({static_cast(unique.size())})); + out->Resize(common::make_ddim({static_cast(unique.size())})); auto out_data = out->mutable_data(context.GetPlace()); std::copy(unique.begin(), unique.end(), out_data); if (return_index) { auto* indices = context.Output("Indices"); - indices->Resize(phi::make_ddim({out->numel()})); + indices->Resize(common::make_ddim({out->numel()})); auto indices_data = indices->mutable_data(context.GetPlace()); std::unordered_map indices_map; indices_map.reserve(out->numel()); @@ -164,7 +164,7 @@ static void UniqueFlattendTensor(const framework::ExecutionContext& context, if (return_inverse) { auto* inverse = context.Output("Index"); - inverse->Resize(phi::make_ddim({in.numel()})); + inverse->Resize(common::make_ddim({in.numel()})); auto inverse_data = inverse->mutable_data(context.GetPlace()); std::unordered_map inverse_map; inverse_map.reserve(out->numel()); @@ -178,7 +178,7 @@ static void UniqueFlattendTensor(const framework::ExecutionContext& context, if (return_counts) { auto* count = context.Output("Counts"); - count->Resize(phi::make_ddim({out->numel()})); + count->Resize(common::make_ddim({out->numel()})); auto count_data = count->mutable_data(context.GetPlace()); std::unordered_map counts_map; counts_map.reserve(out->numel()); @@ -242,18 +242,18 @@ static void UniqueDim(const framework::ExecutionContext& context, std::iota(permute.begin(), permute.end(), 0); permute[axis] = 0; permute[0] = axis; - std::vector in_trans_dims_vec(phi::vectorize(in.dims())); + std::vector in_trans_dims_vec(common::vectorize(in.dims())); in_trans_dims_vec[axis] = in.dims()[0]; in_trans_dims_vec[0] = in.dims()[axis]; phi::DenseTensor in_trans; - framework::DDim in_trans_dims = phi::make_ddim(in_trans_dims_vec); + framework::DDim in_trans_dims = common::make_ddim(in_trans_dims_vec); in_trans.Resize(in_trans_dims); in_trans.mutable_data(context.GetPlace()); auto& dev_ctx = context.template device_context(); phi::funcs::TransCompute( in.dims().size(), dev_ctx, in, &in_trans, permute); // reshape tensor: eg. [dim1, dim0, dim2] -> [dim1, dim0*dim2] - framework::DDim in_trans_flat_dims = phi::flatten_to_2d(in_trans_dims, 1); + framework::DDim in_trans_flat_dims = common::flatten_to_2d(in_trans_dims, 1); in_trans.Resize(in_trans_flat_dims); // sort indices @@ -308,10 +308,10 @@ static void UniqueDim(const framework::ExecutionContext& context, phi::DenseTensor out_trans; std::vector out_trans_dims_vec = in_trans_dims_vec; out_trans_dims_vec[0] = input_unbind.size(); - out_trans.Resize(phi::make_ddim(out_trans_dims_vec)); + out_trans.Resize(common::make_ddim(out_trans_dims_vec)); out_trans.mutable_data(context.GetPlace()); std::swap(out_trans_dims_vec[0], out_trans_dims_vec[axis]); - out->Resize(phi::make_ddim(out_trans_dims_vec)); + out->Resize(common::make_ddim(out_trans_dims_vec)); out->mutable_data(context.GetPlace()); concat_functor(dev_ctx, input_unbind, 0, &out_trans); phi::funcs::TransCompute( diff --git a/paddle/fluid/operators/utils.h b/paddle/fluid/operators/utils.h index cc6a7d7dcf9248..cecd2e2931af62 100644 --- a/paddle/fluid/operators/utils.h +++ b/paddle/fluid/operators/utils.h @@ -30,7 +30,7 @@ inline std::vector GetDataFromTensorList( for (size_t i = 0; i < list_tensor.size(); ++i) { auto tensor = list_tensor[i]; PADDLE_ENFORCE_EQ(tensor->dims(), - phi::make_ddim({1}), + common::make_ddim({1}), platform::errors::InvalidArgument( "The shape of Tensor in list must be [1]. " "But received its shape " @@ -71,19 +71,19 @@ inline framework::DDim GetShape(const framework::ExecutionContext& ctx) { if (ctx.HasInput("ShapeTensor")) { auto* shape_tensor = ctx.Input("ShapeTensor"); auto vec_shape = phi::GetVectorFromTensor(shape_tensor); - return phi::make_ddim(vec_shape); + return common::make_ddim(vec_shape); } // 2. shape is a list/tuple containing Tensor auto shape_tensor_list = ctx.MultiInput("ShapeTensorList"); if (shape_tensor_list.size() > 0) { auto vec_shape = GetDataFromTensorList(shape_tensor_list); - return phi::make_ddim(vec_shape); + return common::make_ddim(vec_shape); } // 3. shape is a list/tuple without containing Tensor auto vec_shape = ctx.Attr>("shape"); - return phi::make_ddim(vec_shape); + return common::make_ddim(vec_shape); } template diff --git a/paddle/fluid/operators/var_conv_2d_op.cc b/paddle/fluid/operators/var_conv_2d_op.cc index f41cc2bab2021b..86e3fc3420ed63 100644 --- a/paddle/fluid/operators/var_conv_2d_op.cc +++ b/paddle/fluid/operators/var_conv_2d_op.cc @@ -171,8 +171,8 @@ void VarConv2dOP::InferShape(framework::InferShapeContext* ctx) const { out_dims_vec.push_back(1); std::vector col_dims_vec{-1}; col_dims_vec.push_back(1); - ctx->SetOutputDim("Out", phi::make_ddim(out_dims_vec)); - ctx->SetOutputDim("Col", phi::make_ddim(col_dims_vec)); + ctx->SetOutputDim("Out", common::make_ddim(out_dims_vec)); + ctx->SetOutputDim("Col", common::make_ddim(col_dims_vec)); } } @@ -226,7 +226,7 @@ class CPUVarConv2dOPKernel : public framework::OpKernel { std::vector col_dims_vec{top_size}; col_dims_vec.push_back(1); auto* top_data = - col->mutable_data(phi::make_ddim(col_dims_vec), ctx.GetPlace()); + col->mutable_data(common::make_ddim(col_dims_vec), ctx.GetPlace()); auto* bottom_data = input.data(); int kernel_win_size = kernel_h * kernel_w; @@ -321,7 +321,7 @@ class CPUVarConv2dOPKernel : public framework::OpKernel { std::vector top_dims_vec{top_size}; top_dims_vec.push_back(1); auto* top_data = - top->mutable_data(phi::make_ddim(top_dims_vec), ctx.GetPlace()); + top->mutable_data(common::make_ddim(top_dims_vec), ctx.GetPlace()); auto* w_data = w->data(); auto* col_data = col->data(); diff --git a/paddle/fluid/pir/dialect/CMakeLists.txt b/paddle/fluid/pir/dialect/CMakeLists.txt index 7cac45069a9e1d..e563831e96e61a 100644 --- a/paddle/fluid/pir/dialect/CMakeLists.txt +++ b/paddle/fluid/pir/dialect/CMakeLists.txt @@ -149,7 +149,7 @@ list( set(op_dialect_srcs ${op_dialect_srcs} ${op_source_file} ${api_source_file}) -set(op_dialect_deps phi pir type_info string_helper) +set(op_dialect_deps phi common pir type_info string_helper) cc_library( op_dialect diff --git a/paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.cc b/paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.cc index d34e65e24b7eef..95e77ff6169c68 100644 --- a/paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.cc +++ b/paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.cc @@ -13,12 +13,12 @@ // limitations under the License. #include "paddle/fluid/pir/dialect/kernel/ir/kernel_dialect.h" +#include "paddle/common/ddim.h" #include "paddle/fluid/pir/dialect/kernel/ir/kernel_attribute.h" #include "paddle/fluid/pir/dialect/kernel/ir/kernel_op.h" #include "paddle/fluid/pir/dialect/kernel/ir/kernel_type.h" #include "paddle/fluid/platform/init_phi.h" #include "paddle/phi/common/place.h" -#include "paddle/phi/core/ddim.h" #include "paddle/pir/core/ir_printer.h" REGISTER_FILE_SYMBOLS(kernel_dialect); @@ -46,7 +46,7 @@ void KernelDialect::PrintType(pir::Type type, std::ostream &os) const { os << phi::AllocationTypeStr(tensor_type.place().GetType()) << "_"; os << "tensor<"; - for (auto d : phi::vectorize(tensor_type.dims())) { + for (auto d : common::vectorize(tensor_type.dims())) { os << d; os << "x"; } @@ -58,7 +58,7 @@ void KernelDialect::PrintType(pir::Type type, std::ostream &os) const { os << phi::AllocationTypeStr(tensor_type.place().GetType()) << "_"; os << "tensor<"; - for (auto d : phi::vectorize(tensor_type.dims())) { + for (auto d : common::vectorize(tensor_type.dims())) { os << d; os << "x"; } diff --git a/paddle/fluid/pir/dialect/op_generator/op_build_gen.py b/paddle/fluid/pir/dialect/op_generator/op_build_gen.py index 6c2165940c8e18..39c5d71616a220 100644 --- a/paddle/fluid/pir/dialect/op_generator/op_build_gen.py +++ b/paddle/fluid/pir/dialect/op_generator/op_build_gen.py @@ -395,7 +395,7 @@ def GenBuildOutputs( {name} = std::move(phi::IntArray(std::vector({name}_size, -1))); {name}.SetFromTensor(true); }} else if ({name}_.type().isa()) {{ - size_t {name}_size = phi::product({name}_.type().dyn_cast().dims()); + size_t {name}_size = common::product({name}_.type().dyn_cast().dims()); {name} = std::move(phi::IntArray(std::vector({name}_size, -1))); {name}.SetFromTensor(true); }} else {{ @@ -412,7 +412,7 @@ def GenBuildOutputs( size_t {name}_size = {name}_.type().dyn_cast().size(); {name} = std::vector({name}_size, -1); }} else if ({name}_.type().isa()) {{ - size_t {name}_size = phi::product({name}_.type().dyn_cast().dims()); + size_t {name}_size = common::product({name}_.type().dyn_cast().dims()); {name} = std::vector({name}_size, -1); }} else {{ PADDLE_THROW(phi::errors::Unimplemented("Only support VectorType or DenseTensorType")); diff --git a/paddle/fluid/pir/dialect/operator/ir/api_builder.cc b/paddle/fluid/pir/dialect/operator/ir/api_builder.cc index af2268fda16a89..af00df8704136f 100644 --- a/paddle/fluid/pir/dialect/operator/ir/api_builder.cc +++ b/paddle/fluid/pir/dialect/operator/ir/api_builder.cc @@ -13,7 +13,7 @@ // limitations under the License. #include "paddle/fluid/pir/dialect/operator/ir/api_builder.h" -#include "paddle/pir/core/enforce.h" +#include "paddle/common/enforce.h" #include "paddle/pir/core/ir_context.h" namespace paddle { diff --git a/paddle/fluid/pir/dialect/operator/ir/api_builder.h b/paddle/fluid/pir/dialect/operator/ir/api_builder.h index 92b54c97db7f8c..7d1f1f6422b760 100644 --- a/paddle/fluid/pir/dialect/operator/ir/api_builder.h +++ b/paddle/fluid/pir/dialect/operator/ir/api_builder.h @@ -16,8 +16,8 @@ #include #include +#include "paddle/common/macros.h" #include "paddle/pir/core/builder.h" -#include "paddle/pir/core/macros.h" #include "paddle/pir/core/parameter.h" #include "paddle/pir/core/program.h" diff --git a/paddle/fluid/pir/dialect/operator/ir/attribute_storage.h b/paddle/fluid/pir/dialect/operator/ir/attribute_storage.h index 68f066b0093294..15e49e68e25833 100644 --- a/paddle/fluid/pir/dialect/operator/ir/attribute_storage.h +++ b/paddle/fluid/pir/dialect/operator/ir/attribute_storage.h @@ -14,9 +14,9 @@ #pragma once +#include "paddle/common/layout.h" #include "paddle/phi/common/data_type.h" #include "paddle/phi/common/int_array.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/common/place.h" #include "paddle/pir/core/attribute.h" #include "paddle/pir/core/attribute_base.h" diff --git a/paddle/fluid/pir/dialect/operator/ir/ir_meta_tensor.h b/paddle/fluid/pir/dialect/operator/ir/ir_meta_tensor.h index ae3b82a5e3d0c1..f2af1eb66bf3c0 100644 --- a/paddle/fluid/pir/dialect/operator/ir/ir_meta_tensor.h +++ b/paddle/fluid/pir/dialect/operator/ir/ir_meta_tensor.h @@ -14,6 +14,7 @@ #pragma once +#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/meta_tensor.h" namespace paddle { diff --git a/paddle/fluid/pir/dialect/operator/ir/ir_selected_rows.cc b/paddle/fluid/pir/dialect/operator/ir/ir_selected_rows.cc index d08dfccc25250d..384560ef591fce 100644 --- a/paddle/fluid/pir/dialect/operator/ir/ir_selected_rows.cc +++ b/paddle/fluid/pir/dialect/operator/ir/ir_selected_rows.cc @@ -13,8 +13,7 @@ // limitations under the License. #include "paddle/fluid/pir/dialect/operator/ir/ir_selected_rows.h" - -#include "paddle/pir/core/enforce.h" +#include "paddle/common/enforce.h" namespace paddle { namespace dialect { @@ -51,7 +50,7 @@ IrSelectedRows& IrSelectedRows::operator=(IrSelectedRows&& other) noexcept { return *this; } -int64_t IrSelectedRows::numel() const { return phi::product(dims_); } +int64_t IrSelectedRows::numel() const { return common::product(dims_); } const phi::Place& IrSelectedRows::place() const { IR_THROW("Don't use IrSelectedRows::place method."); diff --git a/paddle/fluid/pir/dialect/operator/ir/ir_tensor.cc b/paddle/fluid/pir/dialect/operator/ir/ir_tensor.cc index be06d3dbfafc52..6383257647323e 100644 --- a/paddle/fluid/pir/dialect/operator/ir/ir_tensor.cc +++ b/paddle/fluid/pir/dialect/operator/ir/ir_tensor.cc @@ -14,7 +14,7 @@ #include "paddle/fluid/pir/dialect/operator/ir/ir_tensor.h" -#include "paddle/pir/core/enforce.h" +#include "paddle/common/enforce.h" namespace paddle { namespace dialect { @@ -51,7 +51,7 @@ IrTensor& IrTensor::operator=(IrTensor&& other) noexcept { return *this; } -int64_t IrTensor::numel() const { return phi::product(dims_); } +int64_t IrTensor::numel() const { return common::product(dims_); } const phi::Place& IrTensor::place() const { IR_THROW("Don't use IrTensor::place method."); diff --git a/paddle/fluid/pir/dialect/operator/ir/manual_op.cc b/paddle/fluid/pir/dialect/operator/ir/manual_op.cc index 5fdde6aadc08c0..cda564bedbb1df 100644 --- a/paddle/fluid/pir/dialect/operator/ir/manual_op.cc +++ b/paddle/fluid/pir/dialect/operator/ir/manual_op.cc @@ -2086,7 +2086,7 @@ void ExpandOp::Build(pir::Builder &builder, shape = std::move(phi::IntArray(std::vector(shape_size, -2))); shape.SetFromTensor(true); } else if (shape_.type().isa()) { - size_t shape_size = phi::product( + size_t shape_size = common::product( shape_.type().dyn_cast().dims()); // In ExpandInferMeta use -2 to represent the element in expand_shape is a // var. diff --git a/paddle/fluid/pir/dialect/operator/ir/op_dialect.cc b/paddle/fluid/pir/dialect/operator/ir/op_dialect.cc index 69508f198b1102..4c44b91af35b72 100644 --- a/paddle/fluid/pir/dialect/operator/ir/op_dialect.cc +++ b/paddle/fluid/pir/dialect/operator/ir/op_dialect.cc @@ -88,7 +88,7 @@ void OperatorDialect::PrintType(pir::Type type, std::ostream &os) const { os << '.'; if (auto tensor_type = type.dyn_cast()) { os << "tensor<"; - for (auto d : phi::vectorize(tensor_type.dims())) { + for (auto d : common::vectorize(tensor_type.dims())) { os << d; os << "x"; } @@ -96,7 +96,7 @@ void OperatorDialect::PrintType(pir::Type type, std::ostream &os) const { os << ">"; } else if (auto selected_rows_type = type.dyn_cast()) { os << "selectedrows<"; - for (auto d : phi::vectorize(selected_rows_type.dims())) { + for (auto d : common::vectorize(selected_rows_type.dims())) { os << d; os << "x"; } @@ -153,7 +153,7 @@ pir::Type OperatorDialect::ParseType(pir::IrParser &parser) { // NOLINT break; } } - phi::DDim ddim = phi::make_ddim(dim); + phi::DDim ddim = common::make_ddim(dim); pir::Type dtype = parser.ParseType(); std::vector> lod; std::vector lodv; diff --git a/paddle/fluid/pir/dialect/operator/utils/op_yaml_info_parser.cc b/paddle/fluid/pir/dialect/operator/utils/op_yaml_info_parser.cc index bf752a089b4f6f..7fc00acc12a81b 100644 --- a/paddle/fluid/pir/dialect/operator/utils/op_yaml_info_parser.cc +++ b/paddle/fluid/pir/dialect/operator/utils/op_yaml_info_parser.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/pir/dialect/operator/utils/op_yaml_info_parser.h" +#include "paddle/phi/core/enforce.h" namespace paddle { namespace dialect { diff --git a/paddle/fluid/pir/dialect/operator/utils/utils.h b/paddle/fluid/pir/dialect/operator/utils/utils.h index 4bbd454d3ea350..18f9f2950c11fd 100644 --- a/paddle/fluid/pir/dialect/operator/utils/utils.h +++ b/paddle/fluid/pir/dialect/operator/utils/utils.h @@ -18,6 +18,7 @@ #include "paddle/phi/common/int_array.h" #include "paddle/phi/common/scalar.h" #include "paddle/phi/core/attribute.h" +#include "paddle/phi/core/enforce.h" #include "paddle/pir/core/builtin_attribute.h" #include "paddle/pir/core/builtin_type.h" #include "paddle/pir/core/value.h" diff --git a/paddle/fluid/pir/transforms/fusion/conv2d_add_act_fuse_pass.cc b/paddle/fluid/pir/transforms/fusion/conv2d_add_act_fuse_pass.cc index 6adb25f8c2dd06..4c701a4c4a51c7 100644 --- a/paddle/fluid/pir/transforms/fusion/conv2d_add_act_fuse_pass.cc +++ b/paddle/fluid/pir/transforms/fusion/conv2d_add_act_fuse_pass.cc @@ -17,10 +17,10 @@ #include "paddle/fluid/pir/dialect/operator/ir/pd_op.h" #include "paddle/fluid/pir/transforms/fusion/conv2d_add_act_fuse_pass.h" +#include "paddle/common/ddim.h" #include "paddle/fluid/pir/dialect/operator/ir/op_dialect.h" #include "paddle/fluid/pir/dialect/operator/ir/op_type.h" #include "paddle/fluid/pir/transforms/transform_general_functions.h" -#include "paddle/phi/core/ddim.h" namespace { diff --git a/paddle/fluid/pir/transforms/fusion/conv2d_add_fuse_pass.cc b/paddle/fluid/pir/transforms/fusion/conv2d_add_fuse_pass.cc index 35b0e65e16b5b5..037b2b95c6017c 100644 --- a/paddle/fluid/pir/transforms/fusion/conv2d_add_fuse_pass.cc +++ b/paddle/fluid/pir/transforms/fusion/conv2d_add_fuse_pass.cc @@ -22,8 +22,8 @@ #include "paddle/fluid/pir/transforms/fusion/conv2d_add_fuse_pass.h" #include "paddle/fluid/pir/transforms/transform_general_functions.h" +#include "paddle/common/ddim.h" #include "paddle/fluid/pir/drr/api/drr_pattern_base.h" -#include "paddle/phi/core/ddim.h" #include "paddle/pir/pass/pass.h" namespace { diff --git a/paddle/fluid/pir/transforms/fusion/conv2d_bn_fuse_pass.cc b/paddle/fluid/pir/transforms/fusion/conv2d_bn_fuse_pass.cc index 8406d705973031..42129852bc8bc3 100644 --- a/paddle/fluid/pir/transforms/fusion/conv2d_bn_fuse_pass.cc +++ b/paddle/fluid/pir/transforms/fusion/conv2d_bn_fuse_pass.cc @@ -17,10 +17,10 @@ #include "paddle/fluid/pir/dialect/operator/ir/pd_op.h" #include "paddle/fluid/pir/transforms/fusion/conv2d_bn_fuse_pass.h" +#include "paddle/common/ddim.h" #include "paddle/fluid/pir/dialect/operator/ir/op_dialect.h" #include "paddle/fluid/pir/dialect/operator/ir/op_type.h" #include "paddle/fluid/pir/transforms/transform_general_functions.h" -#include "paddle/phi/core/ddim.h" namespace { @@ -60,7 +60,7 @@ class Conv2dBnFusePattern bn_variance.type().dyn_cast().dims(); float epsilon = op.attribute("epsilon").data(); paddle::dialect::FullOp full_op = rewriter.Build( - phi::vectorize(bn_variance_shape), epsilon); + common::vectorize(bn_variance_shape), epsilon); paddle::dialect::AddOp add_op = rewriter.Build( bn_variance.dyn_cast(), full_op.out()); paddle::dialect::SqrtOp sqrt_op = diff --git a/paddle/fluid/pir/transforms/params_sync_among_devices_pass.cc b/paddle/fluid/pir/transforms/params_sync_among_devices_pass.cc index 3e121aa51f0756..51b75ea0335821 100644 --- a/paddle/fluid/pir/transforms/params_sync_among_devices_pass.cc +++ b/paddle/fluid/pir/transforms/params_sync_among_devices_pass.cc @@ -23,7 +23,7 @@ #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" +#include "paddle/common/errors.h" #include "paddle/pir/core/builtin_attribute.h" #include "paddle/pir/core/builtin_op.h" #include "paddle/pir/pass/pass.h" diff --git a/paddle/fluid/pir/transforms/transform_general_functions.h b/paddle/fluid/pir/transforms/transform_general_functions.h index 48399a95a81ce6..c5f138daa41a79 100644 --- a/paddle/fluid/pir/transforms/transform_general_functions.h +++ b/paddle/fluid/pir/transforms/transform_general_functions.h @@ -14,9 +14,9 @@ #pragma once -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" +#include "paddle/common/errors.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #include "paddle/pir/core/operation.h" #include "paddle/pir/core/parameter.h" #include "paddle/pir/core/type.h" diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt index 0cca954a6275a5..113ba40ec0cf31 100644 --- a/paddle/fluid/platform/CMakeLists.txt +++ b/paddle/fluid/platform/CMakeLists.txt @@ -6,9 +6,9 @@ cc_library( cc_test( errors_test SRCS errors_test.cc - DEPS phi enforce) + DEPS phi common enforce) -set(enforce_deps phi) +set(enforce_deps phi common) if(WITH_GPU) set(enforce_deps ${enforce_deps} external_error_proto) endif() @@ -16,30 +16,30 @@ endif() cc_library( enforce INTERFACE SRCS enforce.cc - DEPS ${enforce_deps}) + DEPS ${enforce_deps} common) cc_library(monitor SRCS monitor.cc) cc_test( enforce_test SRCS enforce_test.cc - DEPS enforce) + DEPS enforce common) cc_test( cpu_info_test SRCS cpu_info_test.cc - DEPS phi) + DEPS phi common) cc_test( os_info_test SRCS os_info_test.cc - DEPS phi) + DEPS phi common) cc_library( place SRCS place.cc - DEPS enforce phi) + DEPS enforce phi common) cc_test( place_test SRCS place_test.cc - DEPS place glog phi) + DEPS place glog phi common) if(WITH_MKLDNN) set(MKLDNN_CTX_DEPS mkldnn) @@ -53,7 +53,7 @@ add_subdirectory(dynload) cc_library( cpu_helper SRCS cpu_helper.cc - DEPS cblas enforce) + DEPS cblas enforce common) cc_test( cpu_helper_test SRCS cpu_helper_test.cc @@ -82,13 +82,13 @@ if(WITH_GPU) nv_library( stream_callback_manager SRCS stream_callback_manager.cc - DEPS simple_threadpool enforce) + DEPS simple_threadpool enforce common) endif() if(WITH_ROCM) hip_library( stream_callback_manager SRCS stream_callback_manager.cc - DEPS simple_threadpool enforce) + DEPS simple_threadpool enforce common) endif() if(WITH_GPU OR WITH_ROCM) @@ -101,14 +101,14 @@ if(WITH_GLOO) cc_library( gloo_context SRCS gloo_context.cc - DEPS framework_proto gloo_wrapper enforce) + DEPS framework_proto gloo_wrapper enforce common) endif() # separate init from device_context to avoid cycle dependencies cc_library( init SRCS init.cc - DEPS device_context phi memcpy) + DEPS device_context phi common memcpy) # memcpy depends on device_context, here add deps individually for # avoiding cycle dependencies @@ -130,12 +130,13 @@ cc_library( ${dgc_deps} dlpack phi + common ${XPU_CTX_DEPS}) cc_library( collective_helper SRCS collective_helper.cc gen_comm_id_helper.cc - DEPS framework_proto device_context enforce) + DEPS framework_proto device_context enforce common) if(WITH_GPU OR WITH_ROCM) target_link_libraries(device_context gpu_resource_pool) @@ -159,7 +160,7 @@ set(DEVICE_EVENT_LIBS) cc_library( device_event_base SRCS device_event_base.cc - DEPS place enforce device_context op_registry) + DEPS place enforce device_context op_registry common) set(DEVICE_EVENT_LIBS device_event_base CACHE INTERNAL "device event libs") @@ -188,12 +189,12 @@ if(WITH_GPU) cuda_graph_with_memory_pool SRCS cuda_graph_with_memory_pool.cc DEPS ${DEVICE_EVENT_LIBS} device_event_custom_device device_context - allocator phi) + allocator phi common) else() nv_library( cuda_graph_with_memory_pool SRCS cuda_graph_with_memory_pool.cc - DEPS ${DEVICE_EVENT_LIBS} device_context allocator phi) + DEPS ${DEVICE_EVENT_LIBS} device_context allocator phi common) endif() nv_test( device_context_test @@ -245,6 +246,7 @@ cc_library( lodtensor_printer SRCS lodtensor_printer.cc DEPS phi + common place tensor scope @@ -263,6 +265,7 @@ if(WITH_GPU) profiler SRCS profiler.cc profiler.cu DEPS phi + common gpu_info enforce dynload_cuda @@ -275,6 +278,7 @@ elseif(WITH_ROCM) profiler SRCS profiler.cc profiler.cu DEPS phi + common gpu_info enforce new_profiler @@ -286,6 +290,7 @@ elseif(WITH_XPU) profiler SRCS profiler.cc DEPS phi + common enforce dynload_xpti new_profiler @@ -296,7 +301,13 @@ else() cc_library( profiler SRCS profiler.cc - DEPS phi enforce new_profiler stats op_proto_maker shape_inference) + DEPS phi + common + enforce + new_profiler + stats + op_proto_maker + shape_inference) endif() cc_test( @@ -332,7 +343,7 @@ if(WITH_GPU) nv_test( test_limit_gpu_memory SRCS test_limit_gpu_memory.cu - DEPS gpu_info phi) + DEPS gpu_info phi common) nv_library( cuda_device_guard SRCS cuda_device_guard.cc @@ -347,7 +358,7 @@ if(WITH_ROCM) hip_test( test_limit_gpu_memory SRCS test_limit_gpu_memory.cu - DEPS gpu_info phi) + DEPS gpu_info phi common) hip_library( cuda_device_guard SRCS cuda_device_guard.cc @@ -359,7 +370,7 @@ if(NOT APPLE AND NOT WIN32) cc_test( device_code_test SRCS device_code_test.cc - DEPS phi lod_tensor) + DEPS phi common lod_tensor) endif() endif() @@ -381,4 +392,4 @@ cc_library( cc_test( init_phi_test SRCS init_phi_test.cc - DEPS phi init_phi) + DEPS phi common init_phi) diff --git a/paddle/fluid/platform/bfloat16_test.cu b/paddle/fluid/platform/bfloat16_test.cu index cec83cbd11fe94..4e6bdb94625ddc 100644 --- a/paddle/fluid/platform/bfloat16_test.cu +++ b/paddle/fluid/platform/bfloat16_test.cu @@ -59,7 +59,7 @@ TEST(bfloat16, lod_tensor_on_gpu) { phi::DenseTensor dst_tensor; bfloat16 *src_ptr = - src_tensor.mutable_data(phi::make_ddim({2, 2}), CPUPlace()); + src_tensor.mutable_data(common::make_ddim({2, 2}), CPUPlace()); bfloat16 arr[4] = { bfloat16(1.0f), bfloat16(0.5f), bfloat16(0.33333f), bfloat16(0.0f)}; diff --git a/paddle/fluid/platform/cuda_graph_with_memory_pool.h b/paddle/fluid/platform/cuda_graph_with_memory_pool.h index 78f36a77e5f9cd..570ee01ec0a511 100644 --- a/paddle/fluid/platform/cuda_graph_with_memory_pool.h +++ b/paddle/fluid/platform/cuda_graph_with_memory_pool.h @@ -14,10 +14,10 @@ #pragma once +#include "paddle/common/macros.h" #include "paddle/phi/backends/gpu/cuda/cuda_graph_with_memory_pool.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/macros.h" namespace paddle { namespace platform { diff --git a/paddle/fluid/platform/device/custom/CMakeLists.txt b/paddle/fluid/platform/device/custom/CMakeLists.txt index 8e081781e298d6..c01bead7b03e66 100644 --- a/paddle/fluid/platform/device/custom/CMakeLists.txt +++ b/paddle/fluid/platform/device/custom/CMakeLists.txt @@ -2,9 +2,9 @@ if(WITH_CUSTOM_DEVICE) cc_library( custom_device_resource_pool SRCS custom_device_resource_pool.cc - DEPS phi glog enforce monitor) + DEPS phi common glog enforce monitor) cc_test( custom_device_test SRCS custom_device_test.cc - DEPS phi gradient_accumulator) + DEPS phi common gradient_accumulator) endif() diff --git a/paddle/fluid/platform/device/custom/custom_device_test.cc b/paddle/fluid/platform/device/custom/custom_device_test.cc index 7cb38b8850b7c6..b36355b2386be6 100644 --- a/paddle/fluid/platform/device/custom/custom_device_test.cc +++ b/paddle/fluid/platform/device/custom/custom_device_test.cc @@ -81,22 +81,22 @@ void TestTensorMutableData(const paddle::platform::Place& place) { float* p1 = nullptr; float* p2 = nullptr; // initialization - p1 = src_tensor.mutable_data(phi::make_ddim({1, 2, 3}), place); + p1 = src_tensor.mutable_data(common::make_ddim({1, 2, 3}), place); auto p1_holder = src_tensor.Holder(); EXPECT_NE(p1, nullptr); // set src_tensor a new dim with large size // momery is supposed to be re-allocated - p2 = src_tensor.mutable_data(phi::make_ddim({3, 1024}), place); + p2 = src_tensor.mutable_data(common::make_ddim({3, 1024}), place); auto p2_holder = src_tensor.Holder(); EXPECT_NE(p2, nullptr); EXPECT_NE(p1_holder.get(), p2_holder.get()); // set src_tensor a new dim with same size // momery block is supposed to be unchanged - p1 = src_tensor.mutable_data(phi::make_ddim({2, 2, 3}), place); + p1 = src_tensor.mutable_data(common::make_ddim({2, 2, 3}), place); EXPECT_EQ(p1, p2); // set src_tensor a new dim with smaller size // momery block is supposed to be unchanged - p2 = src_tensor.mutable_data(phi::make_ddim({2, 2}), place); + p2 = src_tensor.mutable_data(common::make_ddim({2, 2}), place); EXPECT_EQ(p1, p2); } @@ -104,7 +104,7 @@ void TestTensorShareDataWith(const paddle::platform::Place& place) { std::cout << "TestTensorShareDataWith on " << place << std::endl; phi::DenseTensor src_tensor; phi::DenseTensor dst_tensor; - src_tensor.mutable_data(phi::make_ddim({2, 3, 4}), place); + src_tensor.mutable_data(common::make_ddim({2, 3, 4}), place); dst_tensor.ShareDataWith(src_tensor); ASSERT_EQ(src_tensor.data(), dst_tensor.data()); } @@ -118,7 +118,7 @@ void TestTensorUtils(const paddle::platform::Place& place) { phi::DenseTensor gpu_tensor; phi::DenseTensor dst_tensor; - int* src_ptr = src_tensor.mutable_data(phi::make_ddim({3, 3}), + int* src_ptr = src_tensor.mutable_data(common::make_ddim({3, 3}), paddle::platform::CPUPlace()); std::array arr = {1, 2, 3, 4, 5, 6, 7, 8, 9}; diff --git a/paddle/fluid/platform/device/gpu/CMakeLists.txt b/paddle/fluid/platform/device/gpu/CMakeLists.txt index 897f8d3732b730..65c3fb20631675 100644 --- a/paddle/fluid/platform/device/gpu/CMakeLists.txt +++ b/paddle/fluid/platform/device/gpu/CMakeLists.txt @@ -3,7 +3,13 @@ if(WITH_GPU) nv_library( gpu_info SRCS gpu_info.cc - DEPS phi glog enforce monitor dynload_cuda malloc) + DEPS phi + common + glog + enforce + monitor + dynload_cuda + malloc) nv_test(cuda_helper_test SRCS cuda_helper_test.cu) nv_test( @@ -15,7 +21,7 @@ elseif(WITH_ROCM) hip_library( gpu_info SRCS gpu_info.cc - DEPS phi glog enforce monitor dynload_cuda) + DEPS phi common glog enforce monitor dynload_cuda) hip_test(cuda_helper_test SRCS cuda_helper_test.cu) hip_test( diff --git a/paddle/fluid/platform/device/gpu/cuda/CMakeLists.txt b/paddle/fluid/platform/device/gpu/cuda/CMakeLists.txt index 07901054b3b337..a535cd74478437 100644 --- a/paddle/fluid/platform/device/gpu/cuda/CMakeLists.txt +++ b/paddle/fluid/platform/device/gpu/cuda/CMakeLists.txt @@ -1,7 +1,7 @@ nv_library( cuda_profiler SRCS cuda_profiler.cc - DEPS enforce) + DEPS enforce common) nv_test( cudnn_helper_test diff --git a/paddle/fluid/platform/device/ipu/CMakeLists.txt b/paddle/fluid/platform/device/ipu/CMakeLists.txt index a4a6db37837063..68bed1034af530 100644 --- a/paddle/fluid/platform/device/ipu/CMakeLists.txt +++ b/paddle/fluid/platform/device/ipu/CMakeLists.txt @@ -51,5 +51,5 @@ if(WITH_IPU) cc_library( ipu_info SRCS ${IPU_INFO_SRC} - DEPS popart-only enforce) + DEPS popart-only enforce common) endif() diff --git a/paddle/fluid/platform/device/ipu/ipu_compiler.cc b/paddle/fluid/platform/device/ipu/ipu_compiler.cc index ec6c8a49647b18..811f897cbda7b4 100644 --- a/paddle/fluid/platform/device/ipu/ipu_compiler.cc +++ b/paddle/fluid/platform/device/ipu/ipu_compiler.cc @@ -415,7 +415,7 @@ void Compiler::LowerConstants(const Scope* scope) { ConstantOpAttrVisitor visitor(tensor, dtype); auto value = op_desc->GetAttr("value"); paddle::visit(visitor, value); - auto ddim = phi::make_ddim(shape); + auto ddim = common::make_ddim(shape); tensor->Resize(ddim); auto const_data = std::unique_ptr(); diff --git a/paddle/fluid/platform/device/ipu/ipu_executor.cc b/paddle/fluid/platform/device/ipu/ipu_executor.cc index ee749f3a19a15d..d0792689228de1 100644 --- a/paddle/fluid/platform/device/ipu/ipu_executor.cc +++ b/paddle/fluid/platform/device/ipu/ipu_executor.cc @@ -210,7 +210,7 @@ void Executor::Run(const std::vector &inputs, } auto *tensor = outputs[i]; - tensor->Resize(phi::make_ddim(output_shape)); + tensor->Resize(common::make_ddim(output_shape)); auto fetch_dtype = fetch_info.dataType(); auto paddle_type = PopartDType2VarType(fetch_dtype); tensor->mutable_data(ctx.GetPlace(), @@ -427,7 +427,7 @@ void Executor::RunPopef(const std::vector &inputs, auto *tensor = outputs[i]; // resize output size to make data_ptr valid. - tensor->Resize(phi::make_ddim(output_shape)); + tensor->Resize(common::make_ddim(output_shape)); tensor->mutable_data(ctx.GetPlace(), framework::TransToPhiDataType(paddle_dtype)); diff --git a/paddle/fluid/platform/device/xpu/CMakeLists.txt b/paddle/fluid/platform/device/xpu/CMakeLists.txt index f9e9659fa9f4cc..6a61d750b501d9 100644 --- a/paddle/fluid/platform/device/xpu/CMakeLists.txt +++ b/paddle/fluid/platform/device/xpu/CMakeLists.txt @@ -20,6 +20,7 @@ cc_library( device_context place phi + common dynload_xpti) cc_library( xpu_op_list @@ -30,6 +31,7 @@ cc_library( device_context op_kernel_type phi + common dynload_xpti) cc_library( xpu_resource_pool diff --git a/paddle/fluid/platform/device_code_test.cc b/paddle/fluid/platform/device_code_test.cc index 6b58453f03ea83..d72722de96ae04 100644 --- a/paddle/fluid/platform/device_code_test.cc +++ b/paddle/fluid/platform/device_code_test.cc @@ -60,8 +60,8 @@ TEST(DeviceCode, cuda) { phi::DenseTensor cpu_z; float scale = 2; - auto dims = - phi::make_ddim({static_cast(256), static_cast(1024)}); + auto dims = common::make_ddim( + {static_cast(256), static_cast(1024)}); phi::DeviceContextPool& pool = phi::DeviceContextPool::Instance(); auto* cpu_ctx = reinterpret_cast(pool.Get(phi::CPUPlace())); cpu_x.Resize(dims); diff --git a/paddle/fluid/platform/dynload/CMakeLists.txt b/paddle/fluid/platform/dynload/CMakeLists.txt index 4cb3bfdb3adaef..29f7b91a171572 100644 --- a/paddle/fluid/platform/dynload/CMakeLists.txt +++ b/paddle/fluid/platform/dynload/CMakeLists.txt @@ -1,7 +1,7 @@ cc_library( dynamic_loader SRCS dynamic_loader.cc - DEPS glog enforce phi) + DEPS glog enforce phi common) list( APPEND @@ -57,20 +57,20 @@ if(WITH_ROCM) hip_library( dynload_cuda SRCS ${HIP_SRCS} - DEPS dynamic_loader phi) + DEPS dynamic_loader phi common) cc_library( dynload_warpctc SRCS warpctc.cc - DEPS dynamic_loader warpctc phi) + DEPS dynamic_loader warpctc phi common) else() nv_library( dynload_cuda SRCS ${CUDA_SRCS} - DEPS dynamic_loader phi) + DEPS dynamic_loader phi common) cc_library( dynload_warpctc SRCS warpctc.cc - DEPS dynamic_loader warpctc phi) + DEPS dynamic_loader warpctc phi common) endif() if(WITH_XPU) cc_library( @@ -86,6 +86,6 @@ if(MKL_FOUND AND WITH_ONEMKL) cc_library( dynload_mklrt SRCS mklrt.cc - DEPS dynamic_loader phi) + DEPS dynamic_loader phi common) target_include_directories(dynload_mklrt PRIVATE ${MKL_INCLUDE}) endif() diff --git a/paddle/fluid/platform/enforce.h b/paddle/fluid/platform/enforce.h index d9c9398461d5c7..1a82b05f3bc3af 100644 --- a/paddle/fluid/platform/enforce.h +++ b/paddle/fluid/platform/enforce.h @@ -103,10 +103,6 @@ limitations under the License. */ #endif #include "paddle/phi/core/flags.h" -namespace phi { -class ErrorSummary; -} // namespace phi - PHI_DECLARE_int32(call_stack_level); namespace paddle { diff --git a/paddle/fluid/platform/errors.h b/paddle/fluid/platform/errors.h index 758af3e2d9137e..b13a8b8d7a7129 100644 --- a/paddle/fluid/platform/errors.h +++ b/paddle/fluid/platform/errors.h @@ -13,10 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#include "paddle/phi/core/errors.h" +#include "paddle/common/errors.h" namespace paddle { namespace platform { -namespace errors = ::phi::errors; -using error = ::phi::ErrorCode; +namespace errors = ::common::errors; +using error = ::common::ErrorCode; } // namespace platform } // namespace paddle diff --git a/paddle/fluid/platform/float16_test.cu b/paddle/fluid/platform/float16_test.cu index 3297b3a2326dac..4575b54d48c9bf 100644 --- a/paddle/fluid/platform/float16_test.cu +++ b/paddle/fluid/platform/float16_test.cu @@ -320,7 +320,7 @@ TEST(float16, lod_tensor_on_gpu) { phi::DenseTensor dst_tensor; float16 *src_ptr = - src_tensor.mutable_data(phi::make_ddim({2, 2}), CPUPlace()); + src_tensor.mutable_data(common::make_ddim({2, 2}), CPUPlace()); float16 arr[4] = { float16(1.0f), float16(0.5f), float16(0.33333f), float16(0.0f)}; diff --git a/paddle/fluid/platform/macros.h b/paddle/fluid/platform/macros.h index 3f854d40b8b23c..33ccc87fe32893 100644 --- a/paddle/fluid/platform/macros.h +++ b/paddle/fluid/platform/macros.h @@ -14,4 +14,4 @@ limitations under the License. */ #pragma once -#include "paddle/phi/core/macros.h" +#include "paddle/common/macros.h" diff --git a/paddle/fluid/platform/monitor.h b/paddle/fluid/platform/monitor.h index bfbbcf3db77be5..bcc5dba0b5732d 100644 --- a/paddle/fluid/platform/monitor.h +++ b/paddle/fluid/platform/monitor.h @@ -26,7 +26,7 @@ #include "glog/logging.h" -#include "paddle/phi/core/macros.h" +#include "paddle/common/macros.h" namespace paddle { namespace platform { diff --git a/paddle/fluid/platform/profiler/CMakeLists.txt b/paddle/fluid/platform/profiler/CMakeLists.txt index 85eba90ec6166f..73fc8b79b4e4ef 100644 --- a/paddle/fluid/platform/profiler/CMakeLists.txt +++ b/paddle/fluid/platform/profiler/CMakeLists.txt @@ -1,24 +1,24 @@ cc_library( host_tracer SRCS host_tracer.cc - DEPS framework_proto enforce phi var_type_traits) + DEPS framework_proto enforce phi common var_type_traits) cc_library( cuda_tracer SRCS cuda_tracer.cc cupti_data_process.cc - DEPS workqueue_utils enforce glog) + DEPS workqueue_utils enforce glog common) cc_library( xpu_tracer SRCS xpu_tracer.cc - DEPS enforce glog) + DEPS enforce glog common) add_subdirectory(custom_device) cc_library( event_node SRCS event_node.cc - DEPS enforce place) + DEPS enforce place common) cc_library( profiler_utils SRCS utils.cc - DEPS enforce glog) + DEPS enforce glog common) add_subdirectory(dump) cc_library( profiler_logger @@ -32,7 +32,7 @@ cc_library( cc_library( cpu_utilization SRCS cpu_utilization.cc - DEPS phi enforce glog) + DEPS phi common enforce glog common) cc_library( new_profiler SRCS profiler.cc diff --git a/paddle/fluid/platform/profiler/custom_device/CMakeLists.txt b/paddle/fluid/platform/profiler/custom_device/CMakeLists.txt index f4fe05d0e7de98..ece3e7466f0550 100644 --- a/paddle/fluid/platform/profiler/custom_device/CMakeLists.txt +++ b/paddle/fluid/platform/profiler/custom_device/CMakeLists.txt @@ -1,4 +1,4 @@ cc_library( custom_tracer SRCS custom_tracer.cc - DEPS workqueue_utils enforce glog) + DEPS workqueue_utils enforce glog common) diff --git a/paddle/fluid/prim/api/auto_code_generated/tensor_operants_gen.py b/paddle/fluid/prim/api/auto_code_generated/tensor_operants_gen.py index 5f439485eb1bc4..378f57a468cd46 100644 --- a/paddle/fluid/prim/api/auto_code_generated/tensor_operants_gen.py +++ b/paddle/fluid/prim/api/auto_code_generated/tensor_operants_gen.py @@ -27,7 +27,7 @@ #include "paddle/phi/api/include/tensor.h" #include "paddle/phi/common/scalar.h" #include "paddle/phi/common/int_array.h" -#include "paddle/phi/core/macros.h" +#include "paddle/common/macros.h" #include "paddle/utils/test_macros.h" """ @@ -152,7 +152,7 @@ class TEST_API EagerTensorOperants : public TensorOperantsBase { #include "paddle/phi/api/include/tensor.h" #include "paddle/phi/common/scalar.h" #include "paddle/phi/common/int_array.h" -#include "paddle/phi/core/macros.h" +#include "paddle/common/macros.h" #include "paddle/utils/test_macros.h" """ diff --git a/paddle/fluid/prim/api/composite_backward/composite_backward_api.h b/paddle/fluid/prim/api/composite_backward/composite_backward_api.h index 6e12d6fa464cc7..767d0a653c9265 100644 --- a/paddle/fluid/prim/api/composite_backward/composite_backward_api.h +++ b/paddle/fluid/prim/api/composite_backward/composite_backward_api.h @@ -20,12 +20,12 @@ #include +#include "paddle/common/ddim.h" #include "paddle/fluid/prim/api/all.h" #include "paddle/fluid/prim/api/composite_backward/composite_double_backward_api.h" #include "paddle/fluid/prim/api/generated_prim/prim_generated_api.h" #include "paddle/phi/common/amp_type_traits.h" #include "paddle/phi/common/int_array.h" -#include "paddle/phi/core/ddim.h" namespace paddle { namespace prim { @@ -36,12 +36,12 @@ using IntArray = paddle::experimental::IntArrayBase; template void hardswish_grad(const Tensor& x, const Tensor& out_grad, Tensor* x_grad) { if (x_grad) { - auto offset = full(phi::vectorize(x.dims()), 3.0, x.dtype()); + auto offset = full(common::vectorize(x.dims()), 3.0, x.dtype()); auto condition = less_equal(x, offset); auto tmp1 = where(condition, out_grad * ((x / 3.0) + 0.5), out_grad); auto res = where( - less_than(x, full(phi::vectorize(x.dims()), -3.0, x.dtype())), - full(phi::vectorize(x.dims()), 0.0, x.dtype()), + less_than(x, full(common::vectorize(x.dims()), -3.0, x.dtype())), + full(common::vectorize(x.dims()), 0.0, x.dtype()), tmp1); set_output(res, x_grad); } @@ -54,7 +54,7 @@ void leaky_relu_grad(const Tensor& out, Tensor* x_grad) { if (x_grad) { auto condition = greater_than( - out, full(phi::vectorize(out.dims()), 0.0, out.dtype())); + out, full(common::vectorize(out.dims()), 0.0, out.dtype())); auto res = where(condition, out_grad, out_grad * negative_slope); set_output(res, x_grad); } @@ -88,10 +88,11 @@ template void relu_grad(const Tensor& out, const Tensor& out_grad, Tensor* x_grad) { if (x_grad) { auto condition = greater_than( - out, full(phi::vectorize(out.dims()), 0.0, out.dtype())); - auto res = where(condition, - out_grad, - full(phi::vectorize(out.dims()), 0.0, out.dtype())); + out, full(common::vectorize(out.dims()), 0.0, out.dtype())); + auto res = + where(condition, + out_grad, + full(common::vectorize(out.dims()), 0.0, out.dtype())); set_output(res, x_grad); } } @@ -119,7 +120,7 @@ void softmax_grad(const Tensor& out, } } else { set_output( - full(phi::vectorize(out_grad.dims()), 0.0, out_grad.dtype()), + full(common::vectorize(out_grad.dims()), 0.0, out_grad.dtype()), x_grad); } } @@ -139,7 +140,7 @@ void gather_grad(const Tensor& x, const Tensor& out_grad, const Scalar& axis, Tensor* grad_x) { - auto zero_tensor = full(phi::vectorize(x.dims()), 0.0, x.dtype()); + auto zero_tensor = full(common::vectorize(x.dims()), 0.0, x.dtype()); std::vector tmp_perm; // change axis to rank 0 @@ -189,7 +190,7 @@ void tanh_grad(const Tensor& out, const Tensor& grad_out, Tensor* grad_x) { template void reshape_grad(const Tensor& x, const Tensor& grad_out, Tensor* grad_x) { if (grad_x) { - auto grad_x_tmp = reshape(grad_out, phi::vectorize(x.dims())); + auto grad_x_tmp = reshape(grad_out, common::vectorize(x.dims())); set_output(grad_x_tmp, grad_x); } } @@ -229,8 +230,8 @@ void subtract_grad(const Tensor& x, by_pass(scale_out_grad, dy); } else { auto dy_reduce_res = - scale_out_grad.sum(phi::vectorize(reduce_dim), y.dtype(), false); - auto dy_tmp = reshape(dy_reduce_res, phi::vectorize(y.dims())); + scale_out_grad.sum(common::vectorize(reduce_dim), y.dtype(), false); + auto dy_tmp = reshape(dy_reduce_res, common::vectorize(y.dims())); set_output(dy_tmp, dy); } } else { @@ -245,8 +246,8 @@ void subtract_grad(const Tensor& x, by_pass(out_grad, dx); } else { auto dx_reduce_res = - out_grad.sum(phi::vectorize(reduce_dim), x.dtype(), false); - auto dx_tmp = reshape(dx_reduce_res, phi::vectorize(x.dims())); + out_grad.sum(common::vectorize(reduce_dim), x.dtype(), false); + auto dx_tmp = reshape(dx_reduce_res, common::vectorize(x.dims())); set_output(dx_tmp, dx); } } else { @@ -270,8 +271,8 @@ void add_grad(const Tensor& x, by_pass(out_grad, dy); } else { auto dy_reduce_res = - out_grad.sum(phi::vectorize(reduce_dim), y.dtype(), false); - auto dy_tmp = reshape(dy_reduce_res, phi::vectorize(y.dims())); + out_grad.sum(common::vectorize(reduce_dim), y.dtype(), false); + auto dy_tmp = reshape(dy_reduce_res, common::vectorize(y.dims())); set_output(dy_tmp, dy); } @@ -287,8 +288,8 @@ void add_grad(const Tensor& x, by_pass(out_grad, dx); } else { auto dx_reduce_res = - out_grad.sum(phi::vectorize(reduce_dim), x.dtype(), false); - auto dx_tmp = reshape(dx_reduce_res, phi::vectorize(x.dims())); + out_grad.sum(common::vectorize(reduce_dim), x.dtype(), false); + auto dx_tmp = reshape(dx_reduce_res, common::vectorize(x.dims())); set_output(dx_tmp, dx); } } else { @@ -307,7 +308,7 @@ void sum_grad(const Tensor& x, if (!x_grad) { return; } - std::vector x_dim = phi::vectorize(x.dims()); + std::vector x_dim = common::vectorize(x.dims()); int64_t axis_size = axis.size(); int64_t x_dim_size = x_dim.size(); reduce_all = false; @@ -363,8 +364,8 @@ void divide_grad(const Tensor& x, set_output(dy_res, dy); } else { auto dy_reduce_res = - dy_res.sum(phi::vectorize(reduce_dim), y.dtype(), false); - auto dy_tmp = reshape(dy_reduce_res, phi::vectorize(y.dims())); + dy_res.sum(common::vectorize(reduce_dim), y.dtype(), false); + auto dy_tmp = reshape(dy_reduce_res, common::vectorize(y.dims())); set_output(dy_tmp, dy); } } else { @@ -373,7 +374,7 @@ void divide_grad(const Tensor& x, } // indicate we will compute dy if (dx) { // dx = (1/y) * dout - auto one_tensor = full(phi::vectorize(y.dims()), 1.0, y.dtype()); + auto one_tensor = full(common::vectorize(y.dims()), 1.0, y.dtype()); auto dx_res = one_tensor / y * out_grad; if (y.dims() != x.dims()) { // Maybe need reduce here @@ -382,8 +383,8 @@ void divide_grad(const Tensor& x, set_output(dx_res, dx); } else { auto dx_reduce_res = - dx_res.sum(phi::vectorize(reduce_dim), x.dtype(), false); - auto dx_tmp = reshape(dx_reduce_res, phi::vectorize(x.dims())); + dx_res.sum(common::vectorize(reduce_dim), x.dtype(), false); + auto dx_tmp = reshape(dx_reduce_res, common::vectorize(x.dims())); set_output(dx_tmp, dx); } @@ -411,8 +412,8 @@ void elementwise_pow_grad(const Tensor& x, set_output(dy_res, dy); } else { auto dy_reduce_res = - dy_res.sum(phi::vectorize(reduce_dim), y.dtype(), false); - auto dy_tmp = reshape(dy_reduce_res, phi::vectorize(y.dims())); + dy_res.sum(common::vectorize(reduce_dim), y.dtype(), false); + auto dy_tmp = reshape(dy_reduce_res, common::vectorize(y.dims())); set_output(dy_tmp, dy); } } else { @@ -431,8 +432,8 @@ void elementwise_pow_grad(const Tensor& x, set_output(dx_res, dx); } else { auto dx_reduce_res = - dx_res.sum(phi::vectorize(reduce_dim), x.dtype(), false); - auto dx_tmp = reshape(dx_reduce_res, phi::vectorize(x.dims())); + dx_res.sum(common::vectorize(reduce_dim), x.dtype(), false); + auto dx_tmp = reshape(dx_reduce_res, common::vectorize(x.dims())); set_output(dx_tmp, dx); } @@ -455,7 +456,7 @@ template void floor_grad(const Tensor& out_grad, Tensor* x_grad) { if (x_grad) { auto zero_tensor = - full(phi::vectorize(out_grad.dims()), 0.0, out_grad.dtype()); + full(common::vectorize(out_grad.dims()), 0.0, out_grad.dtype()); set_output(zero_tensor, x_grad); } } @@ -498,7 +499,7 @@ void multiply_grad(const Tensor& x, set_output(x_grad_unreduce, x_grad); } else { auto x_grad_reduced = x_grad_unreduce.sum( - phi::vectorize(axes), x_grad_unreduce.dtype(), false); + common::vectorize(axes), x_grad_unreduce.dtype(), false); if (x_grad_reduced.dims().size() != x.dims().size()) { x_grad_reduced = reshape(x_grad_reduced, x.shape()); } @@ -516,7 +517,7 @@ void multiply_grad(const Tensor& x, set_output(y_grad_unreduce, y_grad); } else { auto y_grad_reduced = y_grad_unreduce.sum( - phi::vectorize(axes), y_grad_unreduce.dtype(), false); + common::vectorize(axes), y_grad_unreduce.dtype(), false); if (y_grad_reduced.dims().size() != y.dims().size()) { y_grad_reduced = reshape(y_grad_reduced, y.shape()); } @@ -534,13 +535,13 @@ void expand_grad(const Tensor& x, const IntArray& shape, Tensor* x_grad) { if (x_grad) { - auto out_dims = phi::make_ddim(shape.GetData()); + auto out_dims = common::make_ddim(shape.GetData()); if (out_dims != x.dims()) { auto axes = get_reduce_dims(x.dims(), out_dims); if (!axes.size()) { by_pass(out_grad, x_grad); } else { - auto reduced = out_grad.sum(phi::vectorize(axes), x.dtype(), false); + auto reduced = out_grad.sum(common::vectorize(axes), x.dtype(), false); if (reduced.dims().size() != x.dims().size()) { reduced = reshape(reduced, x.shape()); } @@ -609,7 +610,7 @@ void slice_grad(const Tensor& input, if (decrease_size > 0) { if (decrease_size == static_cast(in_dims.size())) { // all dims decrease - out_dims = phi::make_ddim(std::vector(decrease_size, 1)); + out_dims = common::make_ddim(std::vector(decrease_size, 1)); } else { origin_out_shape.resize(out_dims.size() + decrease_size, -1); for (size_t i = 0; i < decrease_size; ++i) { @@ -623,7 +624,7 @@ void slice_grad(const Tensor& input, ++index; } } - out_dims = phi::make_ddim(origin_out_shape); + out_dims = common::make_ddim(origin_out_shape); } } @@ -705,7 +706,7 @@ void group_norm_grad(const Tensor& x, // // cal d_bias: // d_bias = sum(dy, axes=(0,2,3)) - DataLayout data_layout_ = phi::StringToDataLayout(data_layout); + DataLayout data_layout_ = common::StringToDataLayout(data_layout); if (data_layout_ != DataLayout::kNCHW) { PADDLE_THROW(phi::errors::InvalidArgument("Unsupported storage order: %s", data_layout)); @@ -723,7 +724,7 @@ void group_norm_grad(const Tensor& x, out_grad_data = cast(out_grad, phi::DataType::FLOAT32); } - std::vector x_dims = phi::vectorize(x.dims()); + std::vector x_dims = common::vectorize(x.dims()); auto add_axis = std::vector({-1}); const int N = x_dims[0]; const int C = x_dims[1]; @@ -881,7 +882,7 @@ void layer_norm_grad(const Tensor& x, auto d_mean_d_std = (1.0 / shape_2) * (d_mean + d_std); auto x_grad_tmp = dx_end - d_mean_d_std; - x_grad_tmp = reshape(x_grad_tmp, phi::vectorize(x.dims())); + x_grad_tmp = reshape(x_grad_tmp, common::vectorize(x.dims())); if (x.dtype() == phi::DataType::FLOAT16 || x.dtype() == phi::DataType::BFLOAT16) { @@ -962,7 +963,7 @@ void topk_grad(const Tensor& x, by_pass(out_grad, x_grad); return; } - auto zero_tensor = full(phi::vectorize(x.dims()), 0, x.dtype()); + auto zero_tensor = full(common::vectorize(x.dims()), 0, x.dtype()); auto x_grad_tmp = put_along_axis(zero_tensor, indices, out_grad, axis); set_output(x_grad_tmp, x_grad); } @@ -974,7 +975,7 @@ void gather_nd_grad(const Tensor& x, const Tensor& out_grad, Tensor* x_grad) { if (x_grad) { - auto zero_tensor = full(phi::vectorize(x.dims()), 0.0, x.dtype()); + auto zero_tensor = full(common::vectorize(x.dims()), 0.0, x.dtype()); auto x_grad_tmp = scatter_nd_add(zero_tensor, index, out_grad); set_output(x_grad_tmp, x_grad); } @@ -989,7 +990,7 @@ void prod_grad(const Tensor& x, bool reduce_all, Tensor* x_grad) { if (x_grad) { - std::vector x_dim = phi::vectorize(x.dims()); + std::vector x_dim = common::vectorize(x.dims()); int64_t axis_size = axis.size(); int64_t x_dim_size = x_dim.size(); reduce_all = false; @@ -1044,8 +1045,8 @@ void max_grad(const Tensor& x, if (!x_grad) { return; } - auto zero_tensor = full(phi::vectorize(x.dims()), 0.0, x.dtype()); - std::vector x_dim = phi::vectorize(x.dims()); + auto zero_tensor = full(common::vectorize(x.dims()), 0.0, x.dtype()); + std::vector x_dim = common::vectorize(x.dims()); int64_t axis_size = axis.size(); int64_t x_dim_size = x_dim.size(); reduce_all = false; @@ -1095,8 +1096,9 @@ void assign_grad(const Tensor& out_grad, Tensor* x_grad) { template void erf_grad(const Tensor& x, const Tensor& out_grad, Tensor* x_grad) { if (x_grad) { - auto m_2_sqrt_pi = full(phi::vectorize(x.dims()), M_2_SQRTPI, x.dtype()); - auto neg_one = full(phi::vectorize(x.dims()), -1.0, x.dtype()); + auto m_2_sqrt_pi = + full(common::vectorize(x.dims()), M_2_SQRTPI, x.dtype()); + auto neg_one = full(common::vectorize(x.dims()), -1.0, x.dtype()); auto neg_tmp = neg_one * x * x; auto mul_tmp = m_2_sqrt_pi * exp(neg_tmp); set_output(out_grad * mul_tmp, x_grad); @@ -1119,8 +1121,8 @@ void maximum_grad(const Tensor& x, set_output(dx_res, x_grad); } else { auto dx_reduce_res = - dx_res.sum(phi::vectorize(reduce_dim), x.dtype(), false); - auto dx_tmp = reshape(dx_reduce_res, phi::vectorize(x.dims())); + dx_res.sum(common::vectorize(reduce_dim), x.dtype(), false); + auto dx_tmp = reshape(dx_reduce_res, common::vectorize(x.dims())); set_output(dx_tmp, x_grad); } } else { @@ -1138,8 +1140,8 @@ void maximum_grad(const Tensor& x, set_output(dy_res, y_grad); } else { auto dy_reduce_res = - dy_res.sum(phi::vectorize(reduce_dim), y.dtype(), false); - auto dy_tmp = reshape(dy_reduce_res, phi::vectorize(y.dims())); + dy_res.sum(common::vectorize(reduce_dim), y.dtype(), false); + auto dy_tmp = reshape(dy_reduce_res, common::vectorize(y.dims())); set_output(dy_tmp, y_grad); } } else { @@ -1198,7 +1200,7 @@ void scatter_grad(const Tensor& index, Tensor* updates_grad) { if (x_grad) { auto zero_tensor = - full(phi::vectorize(updates.dims()), 0.0, updates.dtype()); + full(common::vectorize(updates.dims()), 0.0, updates.dtype()); auto tmp_grad = scatter(out_grad, index, zero_tensor, false); set_output(tmp_grad, x_grad); } @@ -1231,7 +1233,7 @@ void batch_norm_grad(const Tensor& x, Tensor* bias_grad) { use_global_stats = is_test || use_global_stats; - DataLayout data_layout_ = phi::StringToDataLayout(data_layout); + DataLayout data_layout_ = common::StringToDataLayout(data_layout); Tensor x_data = x; Tensor out_grad_data = out_grad; @@ -1268,7 +1270,7 @@ void batch_norm_grad(const Tensor& x, if (use_global_stats) { auto eps = - full(phi::vectorize(run_var.dims()), epsilon, run_var.dtype()); + full(common::vectorize(run_var.dims()), epsilon, run_var.dtype()); mean_data = run_mean; rsqrt_var = (run_var + eps).pow(-0.5); } else { @@ -1573,8 +1575,8 @@ void minimum_grad(const Tensor& x, set_output(dx_res, x_grad); } else { auto dx_reduce_res = - dx_res.sum(phi::vectorize(reduce_dim), x.dtype(), false); - auto dx_tmp = reshape(dx_reduce_res, phi::vectorize(x.dims())); + dx_res.sum(common::vectorize(reduce_dim), x.dtype(), false); + auto dx_tmp = reshape(dx_reduce_res, common::vectorize(x.dims())); set_output(dx_tmp, x_grad); } } else { @@ -1592,8 +1594,8 @@ void minimum_grad(const Tensor& x, set_output(dy_res, y_grad); } else { auto dy_reduce_res = - dy_res.sum(phi::vectorize(reduce_dim), y.dtype(), false); - auto dy_tmp = reshape(dy_reduce_res, phi::vectorize(y.dims())); + dy_res.sum(common::vectorize(reduce_dim), y.dtype(), false); + auto dy_tmp = reshape(dy_reduce_res, common::vectorize(y.dims())); set_output(dy_tmp, y_grad); } } else { @@ -1609,13 +1611,13 @@ void tile_grad(const Tensor& x, Tensor* x_grad) { if (x_grad) { auto repeat_times_data = repeat_times.GetData(); - auto out_grad_shape = phi::vectorize(out_grad.dims()); + auto out_grad_shape = common::vectorize(out_grad.dims()); auto result = out_grad; for (int i = 0; i < static_cast(repeat_times_data.size()); i++) { int size = out_grad_shape[i] / repeat_times_data[i]; std::vector sections(repeat_times_data[i], size); auto split_arr = split(result, IntArray(sections), i); - result = full(phi::vectorize(split_arr[0].dims()), 0.0, x.dtype()); + result = full(common::vectorize(split_arr[0].dims()), 0.0, x.dtype()); for (int j = 0; j < static_cast(split_arr.size()); j++) { result = split_arr[j] + result; } diff --git a/paddle/fluid/prim/api/composite_backward/composite_double_backward_api.h b/paddle/fluid/prim/api/composite_backward/composite_double_backward_api.h index e20e4a965c9939..1bb91d977cd1e2 100644 --- a/paddle/fluid/prim/api/composite_backward/composite_double_backward_api.h +++ b/paddle/fluid/prim/api/composite_backward/composite_double_backward_api.h @@ -20,11 +20,11 @@ #include +#include "paddle/common/ddim.h" #include "paddle/fluid/prim/api/all.h" #include "paddle/fluid/prim/api/generated_prim/prim_generated_api.h" #include "paddle/phi/common/amp_type_traits.h" #include "paddle/phi/common/int_array.h" -#include "paddle/phi/core/ddim.h" namespace paddle { namespace prim { @@ -135,9 +135,9 @@ void matmul_double_grad(const Tensor& x, Tensor* y_grad, Tensor* grad_out_grad) { // Get dims from the input x, y, output_grad - std::vector x_dims = vectorize(x.dims()); - std::vector y_dims = vectorize(y.dims()); - std::vector grad_out_dims = vectorize(grad_out.dims()); + std::vector x_dims = common::vectorize(x.dims()); + std::vector y_dims = common::vectorize(y.dims()); + std::vector grad_out_dims = common::vectorize(grad_out.dims()); int x_ndim = x_dims.size(); int y_ndim = y_dims.size(); @@ -384,12 +384,13 @@ void matmul_double_grad(const Tensor& x, } // recover the original dim of output (delete 1) - std::vector dx_dims = - dx.initialized() ? vectorize(dx.dims()) : std::vector({}); - std::vector dy_dims = - dy.initialized() ? vectorize(dy.dims()) : std::vector({}); - std::vector ddout_dims = - ddout.initialized() ? vectorize(ddout.dims()) : std::vector({}); + std::vector dx_dims = dx.initialized() ? common::vectorize(dx.dims()) + : std::vector({}); + std::vector dy_dims = dy.initialized() ? common::vectorize(dy.dims()) + : std::vector({}); + std::vector ddout_dims = ddout.initialized() + ? common::vectorize(ddout.dims()) + : std::vector({}); if (x_ndim == 1 && y_ndim == 1) { if (dx.initialized() && dx_dims[0] == 1) { dx = reshape(dx, IntArray(x_dims)); @@ -470,7 +471,7 @@ void multiply_double_grad(const Tensor& x, if (!axes.size()) { set_output(dx, x_grad); } else { - auto dx_reduce = dx.sum(phi::vectorize(axes), dx.dtype(), false); + auto dx_reduce = dx.sum(common::vectorize(axes), dx.dtype(), false); if (dx_reduce.dims().size() != x.dims().size()) { dx_reduce = reshape(dx_reduce, x.shape()); } @@ -481,7 +482,7 @@ void multiply_double_grad(const Tensor& x, } } else { - auto dx = full(phi::vectorize(x.dims()), 0.0, x.dtype()); + auto dx = full(common::vectorize(x.dims()), 0.0, x.dtype()); set_output(dx, x_grad); } } @@ -493,7 +494,7 @@ void multiply_double_grad(const Tensor& x, if (!axes.size()) { set_output(dy, y_grad); } else { - auto dy_reduce = dy.sum(phi::vectorize(axes), dy.dtype(), false); + auto dy_reduce = dy.sum(common::vectorize(axes), dy.dtype(), false); if (dy_reduce.dims().size() != y.dims().size()) { dy_reduce = reshape(dy_reduce, y.shape()); } @@ -503,7 +504,7 @@ void multiply_double_grad(const Tensor& x, set_output(dy, y_grad); } } else { - auto dy = full(phi::vectorize(y.dims()), 0.0, y.dtype()); + auto dy = full(common::vectorize(y.dims()), 0.0, y.dtype()); set_output(dy, y_grad); } } @@ -516,7 +517,8 @@ void multiply_double_grad(const Tensor& x, } else if (grad_y_grad) { ddout = grad_y_grad.get() * x; } else { - ddout = full(phi::vectorize(grad_out.dims()), 0.0, grad_out.dtype()); + ddout = + full(common::vectorize(grad_out.dims()), 0.0, grad_out.dtype()); } set_output(ddout, grad_out_grad); } @@ -531,7 +533,7 @@ void add_double_grad(const Tensor& y, Tensor* grad_out_grad) { if (grad_out_grad) { // ddout = ddx + ddy - Tensor ddout = full(phi::vectorize(grad_out.dims()), 0.0, y.dtype()); + Tensor ddout = full(common::vectorize(grad_out.dims()), 0.0, y.dtype()); if (!grad_x_grad && !grad_y_grad) { set_output(ddout, grad_out_grad); } else { @@ -563,9 +565,9 @@ void add_triple_grad(const paddle::optional& grad_grad_x, by_pass(grad_grad_out_grad, grad_grad_y_grad); } else { auto dddy_reduce_res = grad_grad_out_grad.sum( - phi::vectorize(reduce_dim), grad_grad_y.get().dtype(), false); - auto dddy_tmp = reshape(dddy_reduce_res, - phi::vectorize(grad_grad_y.get().dims())); + common::vectorize(reduce_dim), grad_grad_y.get().dtype(), false); + auto dddy_tmp = reshape( + dddy_reduce_res, common::vectorize(grad_grad_y.get().dims())); set_output(dddy_tmp, grad_grad_y_grad); } } else { @@ -585,9 +587,9 @@ void add_triple_grad(const paddle::optional& grad_grad_x, by_pass(grad_grad_out_grad, grad_grad_x_grad); } else { auto dddx_reduce_res = grad_grad_out_grad.sum( - phi::vectorize(reduce_dim), grad_grad_x.get().dtype(), false); - auto dddx_tmp = reshape(dddx_reduce_res, - phi::vectorize(grad_grad_x.get().dims())); + common::vectorize(reduce_dim), grad_grad_x.get().dtype(), false); + auto dddx_tmp = reshape( + dddx_reduce_res, common::vectorize(grad_grad_x.get().dims())); set_output(dddx_tmp, grad_grad_x_grad); } } else { @@ -611,7 +613,8 @@ void subtract_double_grad(const Tensor& y, if (!grad_x_grad && !grad_y_grad) { grad_out_grad = nullptr; } else { - Tensor ddout = full(phi::vectorize(grad_out.dims()), 0.0, y.dtype()); + Tensor ddout = + full(common::vectorize(grad_out.dims()), 0.0, y.dtype()); if (grad_x_grad) { ddout = ddout + grad_x_grad.get(); } diff --git a/paddle/fluid/prim/api/manual_prim/utils/static_utils.cc b/paddle/fluid/prim/api/manual_prim/utils/static_utils.cc index f89a898ca1a58e..2f76e8bbd966f0 100644 --- a/paddle/fluid/prim/api/manual_prim/utils/static_utils.cc +++ b/paddle/fluid/prim/api/manual_prim/utils/static_utils.cc @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/common/macros.h" #include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/op_desc.h" @@ -21,7 +22,6 @@ #include "paddle/fluid/prim/utils/static/desc_tensor.h" #include "paddle/fluid/prim/utils/static/static_global_utils.h" #include "paddle/phi/api/include/tensor.h" -#include "paddle/phi/core/macros.h" #include "paddle/phi/core/utils/data_type.h" namespace paddle { namespace prim { diff --git a/paddle/fluid/prim/api/manual_prim/utils/utils.h b/paddle/fluid/prim/api/manual_prim/utils/utils.h index d37a50c21a8e7b..90a25f8bf1e1fd 100644 --- a/paddle/fluid/prim/api/manual_prim/utils/utils.h +++ b/paddle/fluid/prim/api/manual_prim/utils/utils.h @@ -15,13 +15,13 @@ #pragma once #include #include +#include "paddle/common/ddim.h" #include "paddle/fluid/framework/op_proto_maker.h" #include "paddle/fluid/operators/common_infer_shape_functions.h" #include "paddle/fluid/prim/api/generated_prim/prim_generated_api.h" #include "paddle/phi/common/data_type.h" #include "paddle/phi/common/int_array.h" #include "paddle/phi/common/place.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/kernels/funcs/blas/blas.h" namespace paddle { @@ -72,7 +72,7 @@ static phi::DDim get_reduce_dims_from_out(const phi::DDim& dout_dims, i)); } } - return phi::make_ddim(result); + return common::make_ddim(result); } static phi::DDim get_reduce_dims(const phi::DDim& x_dims, @@ -91,7 +91,7 @@ static std::vector get_reduce_dims(const Tensor& dx, if (dout_ndim < x_ndim) { return std::vector({}); } - const std::vector dx_dims = phi::vectorize(dx.dims()); + const std::vector dx_dims = common::vectorize(dx.dims()); std::vector broadcast_dims(dout_ndim); std::fill( broadcast_dims.data(), broadcast_dims.data() + dout_ndim - x_ndim, 1); diff --git a/paddle/fluid/prim/utils/static/desc_tensor.h b/paddle/fluid/prim/utils/static/desc_tensor.h index 7d8c939fec122f..cf4db764bb5ca5 100644 --- a/paddle/fluid/prim/utils/static/desc_tensor.h +++ b/paddle/fluid/prim/utils/static/desc_tensor.h @@ -13,9 +13,9 @@ // limitations under the License. #pragma once +#include "paddle/common/ddim.h" #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/var_desc.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/extended_tensor.h" #include "paddle/phi/core/utils/data_type.h" #include "paddle/utils/any.h" @@ -27,7 +27,7 @@ class DescTensor : public phi::ExtendedTensor, public phi::TypeInfoTraits { public: explicit DescTensor(framework::VarDesc* desc) - : desc_ptr_(desc), dims_(phi::make_ddim(desc->GetShape())) {} + : desc_ptr_(desc), dims_(common::make_ddim(desc->GetShape())) {} static const char* name() { return "DescTensor"; } std::string Name() const { return desc_ptr_->Name(); } @@ -35,7 +35,7 @@ class DescTensor : public phi::ExtendedTensor, std::vector shape() const { return desc_ptr_->GetShape(); } const phi::DDim& dims() const override { - dims_ = phi::make_ddim(desc_ptr_->GetShape()); + dims_ = common::make_ddim(desc_ptr_->GetShape()); return dims_; } diff --git a/paddle/fluid/primitive/backend/CMakeLists.txt b/paddle/fluid/primitive/backend/CMakeLists.txt index ec3d39c8739c10..3e857778a4f144 100644 --- a/paddle/fluid/primitive/backend/CMakeLists.txt +++ b/paddle/fluid/primitive/backend/CMakeLists.txt @@ -4,7 +4,7 @@ if(WITH_PYTHON OR NOT ON_INFER) cc_library( primitive_backend_eager_experimental SRCS ${eager_backend_files} - DEPS final_dygraph_function eager_utils phi) + DEPS final_dygraph_function eager_utils phi common) endif() set(static_backend_files ${CMAKE_CURRENT_SOURCE_DIR}/generated/generated_static_backend.cc diff --git a/paddle/fluid/primitive/composite/composite.h b/paddle/fluid/primitive/composite/composite.h index bb1a91a110793f..9b7323b76f2ae9 100644 --- a/paddle/fluid/primitive/composite/composite.h +++ b/paddle/fluid/primitive/composite/composite.h @@ -31,7 +31,7 @@ Tensor mean_decomp(const Tensor& x, const IntArray& axis, bool keepdim) { if (need_cast) { x_tmp = cast(x, phi::DataType::FLOAT32); } - std::vector x_dim = phi::vectorize(x_tmp.dims()); + std::vector x_dim = common::vectorize(x_tmp.dims()); int64_t axis_size = axis.size(); int64_t x_dim_size = x_dim.size(); auto axis_ = std::vector(); @@ -54,7 +54,7 @@ Tensor mean_decomp(const Tensor& x, const IntArray& axis, bool keepdim) { } auto sum_x = sum(x_tmp, IntArray(axis_), x_tmp.dtype(), keepdim); auto res = - sum_x / full(phi::vectorize(sum_x.dims()), value, sum_x.dtype()); + sum_x / full(common::vectorize(sum_x.dims()), value, sum_x.dtype()); if (need_cast) { return cast(res, org_dtype); } else { @@ -93,7 +93,7 @@ Tensor pow_decomp(const Tensor& x, const paddle::Scalar& y) { Tensor y_full; if (valid_type(y.dtype())) { - y_full = full(phi::vectorize(x_cast.dims()), y, x_cast.dtype()); + y_full = full(common::vectorize(x_cast.dims()), y, x_cast.dtype()); } else { PADDLE_THROW(phi::errors::InvalidArgument( "Unsupported data type: %s", phi::DataTypeToString(y.dtype()))); @@ -128,9 +128,9 @@ std::tuple batch_norm_decomp( x_cast = cast(x, phi::DataType::FLOAT32); } - std::vector x_dim = phi::vectorize(x_cast.dims()); + std::vector x_dim = common::vectorize(x_cast.dims()); int rank = x_dim.size(); - DataLayout data_layout_ = phi::StringToDataLayout(data_layout); + DataLayout data_layout_ = common::StringToDataLayout(data_layout); int feature_axis; if (data_layout_ == DataLayout::kNCHW) { feature_axis = 1; @@ -177,9 +177,10 @@ std::tuple batch_norm_decomp( run_mean_ = run_mean * momentum + batch_mean * (1. - momentum); run_var_ = run_var * momentum + batch_var * (1. - momentum); } else { - batch_mean = full(phi::vectorize(run_mean.dims()), 0, run_mean.dtype()); + batch_mean = + full(common::vectorize(run_mean.dims()), 0, run_mean.dtype()); auto batch_var = - full(phi::vectorize(run_var.dims()), 0, run_var.dtype()); + full(common::vectorize(run_var.dims()), 0, run_var.dtype()); inv_std = elementwise_pow((batch_var + epsilon), half); if (data_layout_ == DataLayout::kNHWC) { x_hat = @@ -195,10 +196,10 @@ std::tuple batch_norm_decomp( Tensor y; Tensor new_scale = scale ? scale.get() - : full(phi::vectorize(x_cast.dims()), 1, x_cast.dtype()); + : full(common::vectorize(x_cast.dims()), 1, x_cast.dtype()); Tensor new_bias = bias ? bias.get() - : full(phi::vectorize(x_cast.dims()), 0, x_cast.dtype()); + : full(common::vectorize(x_cast.dims()), 0, x_cast.dtype()); if (data_layout_ == DataLayout::kNHWC) { y = x_hat * new_scale + new_bias; } else { @@ -254,9 +255,9 @@ Tensor silu_decomp(const Tensor& x) { } // res = x / (1 + exp(-x)) - auto one = full(phi::vectorize(x.dims()), 1, x_tmp.dtype()); + auto one = full(common::vectorize(x.dims()), 1, x_tmp.dtype()); auto exp_temp = - exp(full(phi::vectorize(x.dims()), -1, x_tmp.dtype()) * x_tmp); + exp(full(common::vectorize(x.dims()), -1, x_tmp.dtype()) * x_tmp); auto res = x_tmp / (exp_temp + one); if (need_cast) { return cast(res, org_dtype); @@ -267,7 +268,7 @@ Tensor silu_decomp(const Tensor& x) { template Tensor relu_decomp(const Tensor& x) { - return maximum(x, full(phi::vectorize(x.dims()), 0.0, x.dtype())); + return maximum(x, full(common::vectorize(x.dims()), 0.0, x.dtype())); } template @@ -281,7 +282,7 @@ Tensor rsqrt_decomp(const Tensor& x) { } auto ans = elementwise_pow( - x_cast, full(phi::vectorize(x_cast.dims()), -0.5, x_cast.dtype())); + x_cast, full(common::vectorize(x_cast.dims()), -0.5, x_cast.dtype())); if (need_cast) { return cast(ans, org_dtype); } else { @@ -326,7 +327,7 @@ std::tuple layer_norm_decomp( x_cast = cast(x_cast, phi::DataType::FLOAT32); } - auto x_dim = phi::vectorize(x.dims()); + auto x_dim = common::vectorize(x.dims()); for (size_t i = begin_norm_axis; i < x_dim.size(); i++) { axis.push_back(static_cast(i)); } @@ -337,7 +338,7 @@ std::tuple layer_norm_decomp( auto var_tmp3 = variance + epsilon; auto rsqrt_var = elementwise_pow( var_tmp3, - full(phi::vectorize(var_tmp3.dims()), -0.5, var_tmp3.dtype())); + full(common::vectorize(var_tmp3.dims()), -0.5, var_tmp3.dtype())); auto out = difference * rsqrt_var; auto scale_ptr = scale.get_ptr(); @@ -462,7 +463,7 @@ Tensor sqrt_decomp(const Tensor& x) { } auto ans = elementwise_pow( - x_cast, full(phi::vectorize(x_cast.dims()), 0.5, x_cast.dtype())); + x_cast, full(common::vectorize(x_cast.dims()), 0.5, x_cast.dtype())); if (need_cast) { return cast(ans, org_dtype); } else { @@ -476,22 +477,24 @@ Tensor gelu_decomp(const Tensor& x, bool approximate) { const double PM_SQRT1_2 = 0.70710678118654752440; /* 1/sqrt(2) */ auto org_dtype = x.dtype(); - auto half = full(phi::vectorize(x.dims()), 0.5, org_dtype); - auto one = full(phi::vectorize(x.dims()), 1.0, org_dtype); + auto half = full(common::vectorize(x.dims()), 0.5, org_dtype); + auto one = full(common::vectorize(x.dims()), 1.0, org_dtype); if (approximate) { // gelu(x) = 0.5 * x * (1 + tanh(sqrt(2 / \pi) * (x + 0.044715 * x^{3}))) - auto kAlpha = - full(phi::vectorize(x.dims()), PM_2_SQRTPI * PM_SQRT1_2, org_dtype); - auto GELU_CONSTANT = full(phi::vectorize(x.dims()), 0.044715, org_dtype); - auto x_pow3 = - elementwise_pow(x, full(phi::vectorize(x.dims()), 3, org_dtype)); + auto kAlpha = full( + common::vectorize(x.dims()), PM_2_SQRTPI * PM_SQRT1_2, org_dtype); + auto GELU_CONSTANT = + full(common::vectorize(x.dims()), 0.044715, org_dtype); + auto x_pow3 = elementwise_pow( + x, full(common::vectorize(x.dims()), 3, org_dtype)); auto tanh_out = tanh(kAlpha * (x + x_pow3 * GELU_CONSTANT)); auto res = x * half * (one + tanh_out); return res; } else { // gelu(x) = 0.5 * x * (1 + erf(x / sqrt(2))) - auto M_SQRT1_2T = full(phi::vectorize(x.dims()), PM_SQRT1_2, org_dtype); + auto M_SQRT1_2T = + full(common::vectorize(x.dims()), PM_SQRT1_2, org_dtype); auto erf_out = one + erf(x * M_SQRT1_2T); auto res = x * half * erf_out; diff --git a/paddle/fluid/primitive/rule/vjp/details.h b/paddle/fluid/primitive/rule/vjp/details.h index 703e72f3bac0d5..16cef793c3f72b 100644 --- a/paddle/fluid/primitive/rule/vjp/details.h +++ b/paddle/fluid/primitive/rule/vjp/details.h @@ -47,8 +47,8 @@ void divide_grad(const Tensor& x, set_output(dy_res, dy); } else { auto dy_reduce_res = - sum(dy_res, phi::vectorize(reduce_dim), y.dtype(), false); - auto dy_tmp = reshape(dy_reduce_res, phi::vectorize(y.dims())); + sum(dy_res, common::vectorize(reduce_dim), y.dtype(), false); + auto dy_tmp = reshape(dy_reduce_res, common::vectorize(y.dims())); set_output(dy_tmp, dy); } } else { @@ -57,7 +57,7 @@ void divide_grad(const Tensor& x, } // indicate we will compute dy if (dx) { // dx = (1/y) * dout - auto one_tensor = full(phi::vectorize(y.dims()), 1.0, y.dtype()); + auto one_tensor = full(common::vectorize(y.dims()), 1.0, y.dtype()); auto dx_res = one_tensor / y * out_grad; if (y.dims() != x.dims()) { // Maybe need reduce here @@ -66,8 +66,8 @@ void divide_grad(const Tensor& x, set_output(dx_res, dx); } else { auto dx_reduce_res = - sum(dx_res, phi::vectorize(reduce_dim), x.dtype(), false); - auto dx_tmp = reshape(dx_reduce_res, phi::vectorize(x.dims())); + sum(dx_res, common::vectorize(reduce_dim), x.dtype(), false); + auto dx_tmp = reshape(dx_reduce_res, common::vectorize(x.dims())); set_output(dx_tmp, dx); } @@ -87,7 +87,7 @@ void sum_grad(const Tensor& x, if (!x_grad) { return; } - std::vector x_dim = phi::vectorize(x.dims()); + std::vector x_dim = common::vectorize(x.dims()); int64_t axis_size = axis.size(); int64_t x_dim_size = x_dim.size(); reduce_all = false; @@ -206,8 +206,8 @@ void reshape_grad(const Tensor& xshape, if (grad_x) { // xshape: [0] + x.shape auto xshape_dims = xshape.dims(); - auto x_dims = phi::slice_ddim(xshape_dims, 1, xshape_dims.size()); - auto grad_x_tmp = reshape(grad_out, phi::vectorize(x_dims)); + auto x_dims = common::slice_ddim(xshape_dims, 1, xshape_dims.size()); + auto grad_x_tmp = reshape(grad_out, common::vectorize(x_dims)); set_output(grad_x_tmp, grad_x); } } @@ -296,8 +296,8 @@ void add_grad(const Tensor& x, by_pass(out_grad, dy); } else { auto dy_reduce_res = - out_grad.sum(phi::vectorize(reduce_dim), y.dtype(), false); - auto dy_tmp = reshape(dy_reduce_res, phi::vectorize(y.dims())); + out_grad.sum(common::vectorize(reduce_dim), y.dtype(), false); + auto dy_tmp = reshape(dy_reduce_res, common::vectorize(y.dims())); set_output(dy_tmp, dy); } @@ -313,8 +313,8 @@ void add_grad(const Tensor& x, by_pass(out_grad, dx); } else { auto dx_reduce_res = - out_grad.sum(phi::vectorize(reduce_dim), x.dtype(), false); - auto dx_tmp = reshape(dx_reduce_res, phi::vectorize(x.dims())); + out_grad.sum(common::vectorize(reduce_dim), x.dtype(), false); + auto dx_tmp = reshape(dx_reduce_res, common::vectorize(x.dims())); set_output(dx_tmp, dx); } } else { @@ -339,8 +339,8 @@ void subtract_grad(const Tensor& x, by_pass(scale_out_grad, dy); } else { auto dy_reduce_res = - scale_out_grad.sum(phi::vectorize(reduce_dim), y.dtype(), false); - auto dy_tmp = reshape(dy_reduce_res, phi::vectorize(y.dims())); + scale_out_grad.sum(common::vectorize(reduce_dim), y.dtype(), false); + auto dy_tmp = reshape(dy_reduce_res, common::vectorize(y.dims())); set_output(dy_tmp, dy); } } else { @@ -355,8 +355,8 @@ void subtract_grad(const Tensor& x, by_pass(out_grad, dx); } else { auto dx_reduce_res = - out_grad.sum(phi::vectorize(reduce_dim), x.dtype(), false); - auto dx_tmp = reshape(dx_reduce_res, phi::vectorize(x.dims())); + out_grad.sum(common::vectorize(reduce_dim), x.dtype(), false); + auto dx_tmp = reshape(dx_reduce_res, common::vectorize(x.dims())); set_output(dx_tmp, dx); } } else { @@ -380,7 +380,7 @@ void multiply_grad(const Tensor& x, set_output(x_grad_unreduce, x_grad); } else { auto x_grad_reduced = x_grad_unreduce.sum( - phi::vectorize(axes), x_grad_unreduce.dtype(), false); + common::vectorize(axes), x_grad_unreduce.dtype(), false); if (x_grad_reduced.dims().size() != x.dims().size()) { x_grad_reduced = reshape(x_grad_reduced, x.shape()); } @@ -398,7 +398,7 @@ void multiply_grad(const Tensor& x, set_output(y_grad_unreduce, y_grad); } else { auto y_grad_reduced = y_grad_unreduce.sum( - phi::vectorize(axes), y_grad_unreduce.dtype(), false); + common::vectorize(axes), y_grad_unreduce.dtype(), false); if (y_grad_reduced.dims().size() != y.dims().size()) { y_grad_reduced = reshape(y_grad_reduced, y.shape()); } @@ -428,8 +428,8 @@ void elementwise_pow_grad(const Tensor& x, set_output(dy_res, dy); } else { auto dy_reduce_res = - dy_res.sum(phi::vectorize(reduce_dim), y.dtype(), false); - auto dy_tmp = reshape(dy_reduce_res, phi::vectorize(y.dims())); + dy_res.sum(common::vectorize(reduce_dim), y.dtype(), false); + auto dy_tmp = reshape(dy_reduce_res, common::vectorize(y.dims())); set_output(dy_tmp, dy); } } else { @@ -448,8 +448,8 @@ void elementwise_pow_grad(const Tensor& x, set_output(dx_res, dx); } else { auto dx_reduce_res = - dx_res.sum(phi::vectorize(reduce_dim), x.dtype(), false); - auto dx_tmp = reshape(dx_reduce_res, phi::vectorize(x.dims())); + dx_res.sum(common::vectorize(reduce_dim), x.dtype(), false); + auto dx_tmp = reshape(dx_reduce_res, common::vectorize(x.dims())); set_output(dx_tmp, dx); } @@ -508,7 +508,7 @@ void layer_norm_grad(const Tensor& x, auto tmp = (1.0 / (variance_ + epsilon)); // M,1 // auto sqrt_var_1 = sqrt(tmp); // M,1 auto sqrt_var_1 = elementwise_pow( - tmp, full(phi::vectorize(tmp.dims()), 0.5, tmp.dtype())); + tmp, full(common::vectorize(tmp.dims()), 0.5, tmp.dtype())); auto x_sub_mean_mul_sqrt_var_1 = x_sub_mean * sqrt_var_1; if (x_grad) { @@ -528,7 +528,7 @@ void layer_norm_grad(const Tensor& x, auto d_mean_d_std = (1.0 / shape_2) * (d_mean + d_std); auto x_grad_tmp = dx_end - d_mean_d_std; - x_grad_tmp = reshape(x_grad_tmp, phi::vectorize(x.dims())); + x_grad_tmp = reshape(x_grad_tmp, common::vectorize(x.dims())); if (x.dtype() == phi::DataType::FLOAT16 || x.dtype() == phi::DataType::BFLOAT16) { @@ -601,8 +601,9 @@ void dropout_grad(const Tensor& mask, template void erf_grad(const Tensor& x, const Tensor& out_grad, Tensor* x_grad) { if (x_grad) { - auto m_2_sqrt_pi = full(phi::vectorize(x.dims()), M_2_SQRTPI, x.dtype()); - auto neg_one = full(phi::vectorize(x.dims()), -1.0, x.dtype()); + auto m_2_sqrt_pi = + full(common::vectorize(x.dims()), M_2_SQRTPI, x.dtype()); + auto neg_one = full(common::vectorize(x.dims()), -1.0, x.dtype()); auto neg_tmp = neg_one * x * x; auto mul_tmp = m_2_sqrt_pi * exp(neg_tmp); set_output(out_grad * mul_tmp, x_grad); @@ -615,13 +616,13 @@ void expand_grad(const Tensor& x, const IntArray& shape, Tensor* x_grad) { if (x_grad) { - auto out_dims = phi::make_ddim(shape.GetData()); + auto out_dims = common::make_ddim(shape.GetData()); if (out_dims != x.dims()) { auto axes = get_reduce_dims(x.dims(), out_dims); if (!axes.size()) { by_pass(out_grad, x_grad); } else { - auto reduced = out_grad.sum(phi::vectorize(axes), x.dtype(), false); + auto reduced = out_grad.sum(common::vectorize(axes), x.dtype(), false); if (reduced.dims().size() != x.dims().size()) { reduced = reshape(reduced, x.shape()); } @@ -732,8 +733,8 @@ void maximum_grad(const Tensor& x, set_output(dx_res, x_grad); } else { auto dx_reduce_res = - dx_res.sum(phi::vectorize(reduce_dim), x.dtype(), false); - auto dx_tmp = reshape(dx_reduce_res, phi::vectorize(x.dims())); + dx_res.sum(common::vectorize(reduce_dim), x.dtype(), false); + auto dx_tmp = reshape(dx_reduce_res, common::vectorize(x.dims())); set_output(dx_tmp, x_grad); } } else { @@ -751,8 +752,8 @@ void maximum_grad(const Tensor& x, set_output(dy_res, y_grad); } else { auto dy_reduce_res = - dy_res.sum(phi::vectorize(reduce_dim), y.dtype(), false); - auto dy_tmp = reshape(dy_reduce_res, phi::vectorize(y.dims())); + dy_res.sum(common::vectorize(reduce_dim), y.dtype(), false); + auto dy_tmp = reshape(dy_reduce_res, common::vectorize(y.dims())); set_output(dy_tmp, y_grad); } } else { @@ -765,10 +766,11 @@ template void relu_grad(const Tensor& out, const Tensor& out_grad, Tensor* x_grad) { if (x_grad) { auto condition = greater_than( - out, full(phi::vectorize(out.dims()), 0.0, out.dtype())); - auto res = where(condition, - out_grad, - full(phi::vectorize(out.dims()), 0.0, out.dtype())); + out, full(common::vectorize(out.dims()), 0.0, out.dtype())); + auto res = + where(condition, + out_grad, + full(common::vectorize(out.dims()), 0.0, out.dtype())); set_output(res, x_grad); } } @@ -779,7 +781,7 @@ void gather_nd_grad(const Tensor& x, const Tensor& out_grad, Tensor* x_grad) { if (x_grad) { - auto zero_tensor = full(phi::vectorize(x.dims()), 0.0, x.dtype()); + auto zero_tensor = full(common::vectorize(x.dims()), 0.0, x.dtype()); auto x_grad_tmp = scatter_nd_add(zero_tensor, index, out_grad); set_output(x_grad_tmp, x_grad); } @@ -822,8 +824,8 @@ void max_grad(const Tensor& x, if (!x_grad) { return; } - auto zero_tensor = full(phi::vectorize(x.dims()), 0.0, x.dtype()); - std::vector x_dim = phi::vectorize(x.dims()); + auto zero_tensor = full(common::vectorize(x.dims()), 0.0, x.dtype()); + std::vector x_dim = common::vectorize(x.dims()); int64_t axis_size = axis.size(); int64_t x_dim_size = x_dim.size(); reduce_all = false; @@ -882,7 +884,7 @@ void slice_grad(const Tensor& input, if (decrease_size > 0) { if (decrease_size == static_cast(in_dims.size())) { // all dims decrease - out_dims = phi::make_ddim(std::vector(decrease_size, 1)); + out_dims = common::make_ddim(std::vector(decrease_size, 1)); } else { origin_out_shape.resize(out_dims.size() + decrease_size, -1); for (size_t i = 0; i < decrease_size; ++i) { @@ -896,7 +898,7 @@ void slice_grad(const Tensor& input, ++index; } } - out_dims = phi::make_ddim(origin_out_shape); + out_dims = common::make_ddim(origin_out_shape); } } @@ -937,13 +939,13 @@ void tile_grad(const Tensor& x, Tensor* x_grad) { if (x_grad) { auto repeat_times_data = repeat_times.GetData(); - auto out_grad_shape = phi::vectorize(out_grad.dims()); + auto out_grad_shape = common::vectorize(out_grad.dims()); auto result = out_grad; for (int i = 0; i < static_cast(repeat_times_data.size()); i++) { int size = out_grad_shape[i] / repeat_times_data[i]; std::vector sections(repeat_times_data[i], size); auto split_arr = split(result, IntArray(sections), i); - result = full(phi::vectorize(split_arr[0].dims()), 0.0, x.dtype()); + result = full(common::vectorize(split_arr[0].dims()), 0.0, x.dtype()); for (int j = 0; j < static_cast(split_arr.size()); j++) { result = split_arr[j] + result; } diff --git a/paddle/fluid/primitive/type/lazy_tensor.h b/paddle/fluid/primitive/type/lazy_tensor.h index df9f7f35ae8a85..792ccaa208fbad 100644 --- a/paddle/fluid/primitive/type/lazy_tensor.h +++ b/paddle/fluid/primitive/type/lazy_tensor.h @@ -13,9 +13,9 @@ // limitations under the License. #pragma once +#include "paddle/common/ddim.h" #include "paddle/fluid/pir/dialect/operator/ir/op_type.h" #include "paddle/fluid/pir/dialect/operator/utils/utils.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/extended_tensor.h" #include "paddle/phi/core/utils/data_type.h" #include "paddle/pir/core/value.h" diff --git a/paddle/fluid/primitive/utils/CMakeLists.txt b/paddle/fluid/primitive/utils/CMakeLists.txt index babaa5cd7da7ff..6b3458d7844bca 100644 --- a/paddle/fluid/primitive/utils/CMakeLists.txt +++ b/paddle/fluid/primitive/utils/CMakeLists.txt @@ -2,9 +2,9 @@ if(WITH_PYTHON OR NOT ON_INFER) cc_library( primitive_eager_utils_experimental SRCS eager_utils.cc - DEPS phi common_infer_shape_functions) + DEPS phi common common_infer_shape_functions) endif() cc_library( primitive_static_utils_experimental SRCS static_utils.cc - DEPS phi common_infer_shape_functions op_dialect) + DEPS phi common common_infer_shape_functions op_dialect) diff --git a/paddle/fluid/primitive/utils/utils.h b/paddle/fluid/primitive/utils/utils.h index 4490cc683ab70a..e38398f4814859 100644 --- a/paddle/fluid/primitive/utils/utils.h +++ b/paddle/fluid/primitive/utils/utils.h @@ -15,10 +15,10 @@ #pragma once #include +#include "paddle/common/ddim.h" #include "paddle/fluid/operators/common_infer_shape_functions.h" #include "paddle/fluid/primitive/type/lazy_tensor.h" #include "paddle/phi/api/include/tensor.h" -#include "paddle/phi/core/ddim.h" namespace paddle { namespace primitive { @@ -133,7 +133,7 @@ static phi::DDim get_reduce_dims_from_out(const phi::DDim& dout_dims, i)); } } - return phi::make_ddim(result); + return common::make_ddim(result); } static phi::DDim get_reduce_dims(const phi::DDim& x_dims, diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt index d3e0d0ec00343a..a864a70da2db57 100755 --- a/paddle/fluid/pybind/CMakeLists.txt +++ b/paddle/fluid/pybind/CMakeLists.txt @@ -19,6 +19,7 @@ set(PYBIND_DEPS layer tracer engine + common scope_pool analysis_predictor imperative_profiler @@ -43,7 +44,6 @@ set(PYBIND_DEPS program_translator pir_transforms pir - common new_profiler jit_layer jit_property @@ -64,7 +64,8 @@ if(WITH_PSCORE) endif() endif() if(WITH_RPC) - set(PYBIND_DEPS ${PYBIND_DEPS} paddle_rpc ${EXTERNAL_BRPC_DEPS} zlib phi) + set(PYBIND_DEPS ${PYBIND_DEPS} paddle_rpc ${EXTERNAL_BRPC_DEPS} zlib phi + common) endif() if(WITH_GPU OR WITH_ROCM) set(PYBIND_DEPS ${PYBIND_DEPS} dynload_cuda) @@ -361,10 +362,11 @@ if(WITH_PYTHON) list(APPEND EAGER_OP_IMPL_DEPS ${op_impl_path}/ir.dll) endif() - # add_custom_command( - # OUTPUT ${op_impl_path}/common.dll COMMAND ${CMAKE_COMMAND} -E copy - # ${COMMON_LIB} ${op_impl_path}) - # list(APPEND EAGER_OP_IMPL_DEPS ${op_impl_path}/common.dll) + add_custom_command( + OUTPUT ${op_impl_path}/common.dll + COMMAND ${CMAKE_COMMAND} -E copy ${COMMON_LIB} ${op_impl_path} + DEPENDS common) + list(APPEND EAGER_OP_IMPL_DEPS ${op_impl_path}/common.dll) if(${CBLAS_PROVIDER} STREQUAL MKLML) add_custom_command( @@ -502,6 +504,7 @@ if(WITH_PYTHON) list(APPEND PYBIND_DEPS backward) list(APPEND PYBIND_DEPS grad_node_info) list(APPEND PYBIND_DEPS phi) + list(APPEND PYBIND_DEPS common) list(APPEND PYBIND_DEPS final_dygraph_function) list(APPEND PYBIND_DEPS final_dygraph_node) list(APPEND PYBIND_DEPS dygraph_function) diff --git a/paddle/fluid/pybind/auto_parallel_py.cc b/paddle/fluid/pybind/auto_parallel_py.cc index d993da4c64fa67..4b73d24163d83d 100644 --- a/paddle/fluid/pybind/auto_parallel_py.cc +++ b/paddle/fluid/pybind/auto_parallel_py.cc @@ -795,7 +795,7 @@ static void parse_tensors(PyObject *obj, DistTensorSpec in = py::cast(PyList_GetItem(obj, i)); VLOG(6) << "Vector emplace_back DistTensorSpec: " << in.to_string(); ins.emplace_back(phi::distributed::DistMetaTensor( - phi::make_ddim(in.shape()), in.dist_attr())); + common::make_ddim(in.shape()), in.dist_attr())); } ctx->EmplaceBackInputs(ins); } @@ -807,7 +807,7 @@ static void parse_tensor(PyObject *obj, DistTensorSpec in = py::cast(obj); VLOG(6) << "DistTensorSpec: " << in.to_string(); ctx->EmplaceBackInput(phi::distributed::DistMetaTensor( - phi::make_ddim(in.shape()), in.dist_attr())); + common::make_ddim(in.shape()), in.dist_attr())); } // TODO(ljz) support other types diff --git a/paddle/fluid/pybind/eager.cc b/paddle/fluid/pybind/eager.cc index d60fe8799c10ff..894ede8db18d2b 100644 --- a/paddle/fluid/pybind/eager.cc +++ b/paddle/fluid/pybind/eager.cc @@ -83,7 +83,7 @@ void EmptyTensorInitializer(TensorObject* self, const std::vector& dims = {0}, framework::proto::VarType::Type var_type = paddle::framework::proto::VarType::LOD_TENSOR) { - auto ddims = phi::make_ddim(dims); + auto ddims = common::make_ddim(dims); self->tensor.set_name(name); auto autograd_meta = egr::EagerUtils::autograd_meta(&(self->tensor)); autograd_meta->SetPersistable(persistable); @@ -126,7 +126,7 @@ void EmptyStringTensorInitializer(TensorObject* self, const std::string& name, const paddle::platform::Place& place, const std::vector& dims = {}) { - auto ddims = phi::make_ddim(dims); + auto ddims = common::make_ddim(dims); self->tensor.set_name(name); // Note(zhoushunjie): Only support CPUPlace when create StringTensor auto actual_place = platform::CPUPlace(); @@ -135,7 +135,7 @@ void EmptyStringTensorInitializer(TensorObject* self, std::shared_ptr string_tensor = std::make_shared(&string_allocator, phi::StringTensorMeta{ddims}); - if (phi::product(ddims) > 0) { + if (common::product(ddims) > 0) { string_tensor->mutable_data(actual_place); } self->tensor.set_impl(string_tensor); @@ -153,7 +153,7 @@ void CreateDistTensorWithNumpyValue(TensorObject* self, paddle::framework::proto::VarType::FP32, const std::vector& dims = {0}) { #ifdef PADDLE_WITH_DISTRIBUTE - auto ddims = phi::make_ddim(dims); + auto ddims = common::make_ddim(dims); self->tensor.set_name(name); auto autograd_meta = egr::EagerUtils::autograd_meta(&(self->tensor)); autograd_meta->SetPersistable(persistable); @@ -229,7 +229,7 @@ void CreateDistTensorWithNumpyValue(TensorObject* self, paddle::framework::proto::VarType::FP32, const std::vector& dims = {0}) { #ifdef PADDLE_WITH_DISTRIBUTE - auto ddims = phi::make_ddim(dims); + auto ddims = common::make_ddim(dims); self->tensor.set_name(name); auto autograd_meta = egr::EagerUtils::autograd_meta(&(self->tensor)); autograd_meta->SetPersistable(persistable); diff --git a/paddle/fluid/pybind/eager_functions.cc b/paddle/fluid/pybind/eager_functions.cc index 356b447988db07..df84ca68b9182b 100644 --- a/paddle/fluid/pybind/eager_functions.cc +++ b/paddle/fluid/pybind/eager_functions.cc @@ -512,7 +512,7 @@ static PyObject* eager_api__get_custom_operator_inplace_reverse_idx( // This function copies from function `EmptyTensorInitializer` with default // parameters static Tensor InitializedEmptyTensor() { - auto ddims = phi::make_ddim({0}); + auto ddims = common::make_ddim({0}); auto tensor = paddle::Tensor(); tensor.set_name( egr::Controller::Instance().GenerateUniqueName("generated_tensor")); @@ -882,7 +882,7 @@ static PyObject* eager_api_sparse_coo_tensor(PyObject* self, // sort and merge duplicate indices std::shared_ptr coo_tensor = std::make_shared( - *dense_indices, *dense_elements, phi::make_ddim(dense_shape)); + *dense_indices, *dense_elements, common::make_ddim(dense_shape)); tensor.set_impl(coo_tensor); auto name = egr::Controller::Instance().GenerateUniqueName("generated_tensor"); @@ -932,7 +932,7 @@ static PyObject* eager_api_sparse_csr_tensor(PyObject* self, std::make_shared(*dense_crows, *dense_cols, *dense_elements, - phi::make_ddim(dense_shape)); + common::make_ddim(dense_shape)); tensor.set_impl(csr_tensor); auto name = egr::Controller::Instance().GenerateUniqueName("generated_tensor"); diff --git a/paddle/fluid/pybind/eager_math_op_patch.cc b/paddle/fluid/pybind/eager_math_op_patch.cc index aa7a27db207364..bfe10107b24d87 100644 --- a/paddle/fluid/pybind/eager_math_op_patch.cc +++ b/paddle/fluid/pybind/eager_math_op_patch.cc @@ -44,13 +44,13 @@ typedef SSIZE_T ssize_t; #include "pybind11/numpy.h" #include "pybind11/pybind11.h" #pragma GCC diagnostic ignored "-Wmissing-field-initializers" +#include "paddle/common/ddim.h" #include "paddle/fluid/eager/api/generated/eager_generated/forwards/dygraph_functions.h" #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/python_headers.h" #include "paddle/fluid/memory/allocation/mmap_allocator.h" #include "paddle/fluid/pybind/op_function_common.h" #include "paddle/fluid/pybind/tensor_py.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/kernels/funcs/math_function.h" namespace paddle { diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc index 6323c895d3896e..5effab997848d9 100644 --- a/paddle/fluid/pybind/eager_method.cc +++ b/paddle/fluid/pybind/eager_method.cc @@ -52,6 +52,7 @@ typedef SSIZE_T ssize_t; #include "pybind11/numpy.h" #include "pybind11/pybind11.h" #pragma GCC diagnostic ignored "-Wmissing-field-initializers" +#include "paddle/common/ddim.h" #include "paddle/fluid/eager/amp_utils.h" #include "paddle/fluid/eager/api/generated/eager_generated/forwards/dygraph_functions.h" #include "paddle/fluid/eager/eager_amp_auto_cast.h" @@ -59,7 +60,6 @@ typedef SSIZE_T ssize_t; #include "paddle/fluid/memory/allocation/mmap_allocator.h" #include "paddle/fluid/pybind/tensor_py.h" #include "paddle/phi/api/lib/data_transform.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/distributed/auto_parallel/dist_tensor.h" #include "paddle/phi/core/distributed/auto_parallel/reshard/reshard_function.h" #include "paddle/phi/core/distributed/auto_parallel/reshard/reshard_function_registry.h" @@ -1461,7 +1461,7 @@ static PyObject* tensor__getitem_from_offset(TensorObject* self, const auto& tensor_dims = tensor.dims(); std::vector dims(tensor_dims.size()); - std::vector stride = phi::vectorize(tensor.strides()); + std::vector stride = common::vectorize(tensor.strides()); size_t numel = 1; for (int i = tensor_dims.size() - 1; i >= 0; --i) { diff --git a/paddle/fluid/pybind/eager_properties.cc b/paddle/fluid/pybind/eager_properties.cc index 582d15909e9411..985086d05b5f1e 100644 --- a/paddle/fluid/pybind/eager_properties.cc +++ b/paddle/fluid/pybind/eager_properties.cc @@ -583,7 +583,7 @@ PyObject* tensor_properties_get_local_shape(TensorObject* self, void* closure) { #ifdef PADDLE_WITH_DISTRIBUTE phi::distributed::DistTensor* dist_tensor = static_cast(self->tensor.impl().get()); - return ToPyObject(phi::vectorize(dist_tensor->local_dims())); + return ToPyObject(common::vectorize(dist_tensor->local_dims())); #else PADDLE_THROW(platform::errors::Unavailable( "The `_local_shape` property of (Dist)Tensor is not supported " @@ -657,7 +657,7 @@ PyObject* tensor_properties_get_shape(TensorObject* self, void* closure) { << " tensor layout: " << self->tensor.layout() << " tensor's shape size is : " << value.size(); std::vector dims = value; - if (change_dim && phi::DataLayoutToString(desired_layout) == "NCHW") { + if (change_dim && common::DataLayoutToString(desired_layout) == "NCHW") { // NCHW -> NHWC VLOG(6) << "layout autotune get Shape from NCHW -> NHWC " << value[0] << " " << value[1] << " " << value[2] << " " << value[3] << " to " @@ -667,7 +667,7 @@ PyObject* tensor_properties_get_shape(TensorObject* self, void* closure) { value[2] = dims[3]; value[3] = dims[1]; } else if (change_dim && - phi::DataLayoutToString(desired_layout) == "NHWC") { + common::DataLayoutToString(desired_layout) == "NHWC") { // NHWC -> NCHW VLOG(6) << "layout autotune get Shape from NHWC -> NCHW " << value[0] << " " << value[1] << " " << value[2] << " " << value[3] << " to " @@ -798,7 +798,7 @@ PyObject* tensor_properties_get_layout(TensorObject* self, void* closure) { VLOG(3) << "VariableCompatTensor does not support `layout` method."; return ToPyObject(layout); } else { - return ToPyObject(phi::DataLayoutToString(self->tensor.layout())); + return ToPyObject(common::DataLayoutToString(self->tensor.layout())); } return ToPyObject(layout); diff --git a/paddle/fluid/pybind/eager_utils.cc b/paddle/fluid/pybind/eager_utils.cc index a465dde78f2637..2b8f36f8988cfa 100644 --- a/paddle/fluid/pybind/eager_utils.cc +++ b/paddle/fluid/pybind/eager_utils.cc @@ -11,6 +11,7 @@ limitations under the License. */ #include "paddle/fluid/pybind/eager_utils.h" #include +#include "paddle/common/exception.h" #include "paddle/pir/core/value.h" // Avoid a problem with copysign defined in pyconfig.h on Windows. #ifdef copysign @@ -136,7 +137,7 @@ void ConvertToDistTensor(Tensor* x, const phi::distributed::ProcessMesh* mesh) { "as it's not phi::DenseTensor.", x->name())); phi::distributed::TensorDistAttr dist_attr( - phi::vectorize(x->impl()->dims())); + common::vectorize(x->impl()->dims())); dist_attr.set_process_mesh(*mesh); auto dense_t = std::static_pointer_cast(x->impl()); // auto parallel in dygraph doesn't support strided kernel. @@ -1878,7 +1879,7 @@ paddle::Tensor CreateTensorFromVarDesc( auto var_type = var_desc.GetType(); - auto ddims = phi::make_ddim(dims); + auto ddims = common::make_ddim(dims); tensor.set_name(var_desc.Name()); auto autograd_meta = egr::EagerUtils::autograd_meta(&tensor); autograd_meta->SetPersistable(false); diff --git a/paddle/fluid/pybind/eval_frame_tools.cc b/paddle/fluid/pybind/eval_frame_tools.cc index 3b8df99eb2a3f3..da09c2478c02cd 100644 --- a/paddle/fluid/pybind/eval_frame_tools.cc +++ b/paddle/fluid/pybind/eval_frame_tools.cc @@ -18,9 +18,9 @@ #include +#include "paddle/common/errors.h" #include "paddle/fluid/platform/profiler/event_tracing.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" /*============================ Dict Tree ================================*/ diff --git a/paddle/fluid/pybind/exception.cc b/paddle/fluid/pybind/exception.cc index 7c166021f7b004..bede935a14dc3b 100644 --- a/paddle/fluid/pybind/exception.cc +++ b/paddle/fluid/pybind/exception.cc @@ -14,8 +14,8 @@ limitations under the License. */ #include "paddle/fluid/pybind/exception.h" +#include "paddle/common/exception.h" #include "paddle/fluid/memory/allocation/allocator.h" -#include "paddle/phi/api/ext/exception.h" namespace paddle { namespace pybind { @@ -128,7 +128,7 @@ void ThrowExceptionToPython(std::exception_ptr p) { PyErr_SetString(EnforceNotMetException, e.what()); break; } - } catch (const paddle::PD_Exception& e) { + } catch (const common::PD_Exception& e) { PyErr_SetString(PyExc_OSError, e.what()); } } diff --git a/paddle/fluid/pybind/global_value_getter_setter.cc b/paddle/fluid/pybind/global_value_getter_setter.cc index 7f8cac9ee1edf9..57ae90688e7689 100644 --- a/paddle/fluid/pybind/global_value_getter_setter.cc +++ b/paddle/fluid/pybind/global_value_getter_setter.cc @@ -22,11 +22,11 @@ #include #include +#include "paddle/common/macros.h" #include "paddle/fluid/framework/python_headers.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/errors.h" #include "paddle/fluid/platform/macros.h" -#include "paddle/phi/core/macros.h" #include "paddle/utils/flags.h" #include "pybind11/stl.h" diff --git a/paddle/fluid/pybind/pir.cc b/paddle/fluid/pybind/pir.cc index 7d5aa7863f0e7d..e44ccfe88915f7 100644 --- a/paddle/fluid/pybind/pir.cc +++ b/paddle/fluid/pybind/pir.cc @@ -1156,7 +1156,7 @@ SplitedResult SplitForwardBackward( } auto value_type = v.type().dyn_cast(); auto dtype = paddle::dialect::TransToPhiDataType(value_type.dtype()); - auto shape = phi::vectorize(value_type.dims()); + auto shape = common::vectorize(value_type.dims()); auto place = phi::Place(); paddle::dialect::DataOp op = diff --git a/paddle/fluid/pybind/pir.h b/paddle/fluid/pybind/pir.h index 81ae155bbd28ef..30c3e83af4a9e8 100644 --- a/paddle/fluid/pybind/pir.h +++ b/paddle/fluid/pybind/pir.h @@ -15,8 +15,8 @@ #pragma once #include +#include "paddle/common/ddim.h" #include "paddle/phi/common/data_type.h" -#include "paddle/phi/core/ddim.h" #include "paddle/pir/core/op_result.h" namespace paddle { diff --git a/paddle/fluid/pybind/process_group_utils.h b/paddle/fluid/pybind/process_group_utils.h index 5795e1aa8a8ce7..3ba9ec3239c371 100644 --- a/paddle/fluid/pybind/process_group_utils.h +++ b/paddle/fluid/pybind/process_group_utils.h @@ -97,21 +97,21 @@ struct SplitDenseTensor { std::vector); auto *kernel_fn = kernel.GetVariadicKernelFn(); - auto in_dims = phi::vectorize(in.dims()); - auto origin_out_dims = phi::vectorize(out->at(0)->dims()); + auto in_dims = common::vectorize(in.dims()); + auto origin_out_dims = common::vectorize(out->at(0)->dims()); for (auto *tensor : *out) { if (origin_out_dims.size() != in_dims.size()) { std::vector new_dims({1}); new_dims.insert( new_dims.end(), origin_out_dims.begin(), origin_out_dims.end()); - tensor->Resize(phi::make_ddim(new_dims)); + tensor->Resize(common::make_ddim(new_dims)); } } (*kernel_fn)(context, in, out->size(), phi::Scalar(0), *out); for (auto *tensor : *out) { - auto tensor_dims = phi::vectorize(tensor->dims()); + auto tensor_dims = common::vectorize(tensor->dims()); if (tensor_dims.size() != origin_out_dims.size()) { - tensor->Resize(phi::make_ddim(origin_out_dims)); + tensor->Resize(common::make_ddim(origin_out_dims)); } } } diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 41e897bb8a2431..cfd8fb5f6e36f4 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -635,7 +635,7 @@ static void inline CreateVariableIfNotExist( Py_DECREF(py_var_desc); var = const_cast(&scope)->Var(para_name); auto *tensor_temp = var->GetMutable(); - tensor_temp->Resize(phi::make_ddim(var_desc.GetShape())); + tensor_temp->Resize(common::make_ddim(var_desc.GetShape())); tensor_temp->mutable_data( exe->GetPlace(), framework::TransToPhiDataType(var_desc.GetDataType())); @@ -1016,8 +1016,8 @@ PYBIND11_MODULE(libpaddle, m) { m.def( "broadcast_shape", [](const std::vector &x_dim, const std::vector &y_dim) { - return phi::vectorize(operators::details::BroadcastTwoDims( - phi::make_ddim(x_dim), phi::make_ddim(y_dim), -1)); + return common::vectorize(operators::details::BroadcastTwoDims( + common::make_ddim(x_dim), common::make_ddim(y_dim), -1)); }); m.def( diff --git a/paddle/fluid/pybind/reader_py.cc b/paddle/fluid/pybind/reader_py.cc index f4b4ff4cf42ca6..a136afe4cca383 100644 --- a/paddle/fluid/pybind/reader_py.cc +++ b/paddle/fluid/pybind/reader_py.cc @@ -23,6 +23,7 @@ #include "Python.h" +#include "paddle/common/ddim.h" #include "paddle/fluid/framework/reader.h" #include "paddle/fluid/imperative/layer.h" #include "paddle/fluid/imperative/tracer.h" @@ -30,7 +31,6 @@ #include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h" #include "paddle/fluid/operators/reader/py_reader.h" #include "paddle/fluid/platform/place.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/flags.h" #include "paddle/utils/flags.h" #include "pybind11/stl.h" @@ -56,7 +56,7 @@ static paddle::optional> DiffTensorShape( if (UNLIKELY(rank == 0)) { if (!target_shape.empty()) { // Tensor rank = 0 but desc does not match - return phi::vectorize(tensor_shape); + return common::vectorize(tensor_shape); } else { return paddle::none; } @@ -77,12 +77,12 @@ static paddle::optional> DiffTensorShape( tensor_shape[0] = split_size; if (target_shape[0] >= 0) { // need check dim 0 if (tensor_shape[0] != target_shape[0]) { - return phi::vectorize(tensor_shape); + return common::vectorize(tensor_shape); } if (remainder > 0) { tensor_shape[0] = remainder; - return phi::vectorize(tensor_shape); + return common::vectorize(tensor_shape); } } } @@ -95,7 +95,7 @@ static paddle::optional> DiffTensorShape( "Tensor shape at dim %d must not be less than 0", idx)); if (target_shape[idx] >= 0 && tensor_shape[static_cast(idx)] != target_shape[idx]) { - return phi::vectorize(tensor_shape); + return common::vectorize(tensor_shape); } } @@ -152,7 +152,7 @@ class MultiDeviceFeedReader { pin_memory_(pin_memory) { std::vector dims; for (auto &shape : shapes) { - dims.push_back(phi::make_ddim(shape)); + dims.push_back(common::make_ddim(shape)); } auto first_reader = std::make_shared( diff --git a/paddle/fluid/pybind/tensor.cc b/paddle/fluid/pybind/tensor.cc index de0f3da2deb14e..aa4e92c6e8af67 100644 --- a/paddle/fluid/pybind/tensor.cc +++ b/paddle/fluid/pybind/tensor.cc @@ -221,14 +221,16 @@ void BindTensor(pybind11::module &m) { // NOLINT .def("_is_initialized", [](const phi::DenseTensor &self) { return self.IsInitialized(); }) .def("_get_dims", - [](const phi::DenseTensor &self) { return vectorize(self.dims()); }) + [](const phi::DenseTensor &self) { + return common::vectorize(self.dims()); + }) .def("_set_dims", [](phi::DenseTensor &self, const std::vector &dim) { - self.Resize(phi::make_ddim(dim)); + self.Resize(common::make_ddim(dim)); }) .def("_set_layout", [](phi::DenseTensor &self, const std::string &layout) { - self.set_layout(phi::StringToDataLayout(layout)); + self.set_layout(common::StringToDataLayout(layout)); }) .def("_alloc_float", [](phi::DenseTensor &self, paddle::platform::CustomPlace &place) { @@ -402,7 +404,7 @@ void BindTensor(pybind11::module &m) { // NOLINT .def( "shape", - [](phi::DenseTensor &self) { return vectorize(self.dims()); }, + [](phi::DenseTensor &self) { return common::vectorize(self.dims()); }, R"DOC( Return the shape of Tensor. @@ -468,7 +470,7 @@ void BindTensor(pybind11::module &m) { // NOLINT }) .def("_layout", [](phi::DenseTensor &self) { - return phi::DataLayoutToString(self.layout()); + return common::DataLayoutToString(self.layout()); }) .def("_share_data_with", &phi::DenseTensor::ShareDataWith) .def("__getitem__", PySliceTensor, py::return_value_policy::reference) @@ -517,7 +519,7 @@ void BindTensor(pybind11::module &m) { // NOLINT new_lod.reserve(lod.size()); std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod)); PADDLE_ENFORCE_EQ( - CheckLoD(new_lod, vectorize(self.dims()).front()), + CheckLoD(new_lod, common::vectorize(self.dims()).front()), true, platform::errors::InvalidArgument( "The provided LoD is invalid, the LoD is %s", new_lod)); @@ -559,7 +561,8 @@ void BindTensor(pybind11::module &m) { // NOLINT std::back_inserter(new_lod)); LoD new_offset_lod = ConvertToOffsetBasedLoD(new_lod); PADDLE_ENFORCE_EQ( - CheckLoD(new_offset_lod, vectorize(self.dims()).front()), + CheckLoD(new_offset_lod, + common::vectorize(self.dims()).front()), true, platform::errors::InvalidArgument( "The provided recursive_sequence_lengths info is " @@ -660,8 +663,9 @@ void BindTensor(pybind11::module &m) { // NOLINT [](phi::DenseTensor &self) -> bool { // Check that the lod info is valid and match the outermost // dimension of the Tensor data - return CheckLoD(self.lod(), - static_cast(vectorize(self.dims()).front())); + return CheckLoD( + self.lod(), + static_cast(common::vectorize(self.dims()).front())); }, R"DOC( Check whether the LoD of the Tensor is valid. @@ -725,7 +729,7 @@ void BindTensor(pybind11::module &m) { // NOLINT size_t size = t[0].cast(); auto dtype = static_cast(t[1].cast()); - auto dims = phi::make_ddim(t[2].cast>()); + auto dims = common::make_ddim(t[2].cast>()); auto lod_info = t[3].cast(); auto device_id = t[4].cast(); @@ -787,8 +791,12 @@ void BindTensor(pybind11::module &m) { // NOLINT framework::SizeOfType( framework::TransToProtoVarType(self.type())); - return py::make_tuple(_handle, (py::size_t)offset_bytes, data_size, - type_idx, vectorize(self.dims()), self.lod(), + return py::make_tuple(_handle, + (py::size_t)offset_bytes, + data_size, + type_idx, + common::vectorize(self.dims()), + self.lod(), device_id); }, R"DOC( @@ -832,7 +840,7 @@ void BindTensor(pybind11::module &m) { // NOLINT tensor.ResetHolderWithType( shared_reader_holder, static_cast(t[3].cast())); - tensor.Resize(phi::make_ddim(t[4].cast>())); + tensor.Resize(common::make_ddim(t[4].cast>())); tensor.set_lod(t[5].cast()); return tensor; @@ -911,7 +919,7 @@ void BindTensor(pybind11::module &m) { // NOLINT return py::make_tuple(mmap_allocation->ipc_name(), mmap_allocation->size(), type_idx, - vectorize(self.dims()), self.lod()); + common::vectorize(self.dims()), self.lod()); }, R"DOC( Serialize CPU lod tensor in shared memory to tuple. @@ -953,7 +961,7 @@ void BindTensor(pybind11::module &m) { // NOLINT tensor.ResetHolderWithType( shared_holder, static_cast(t[2].cast())); - tensor.Resize(phi::make_ddim(t[3].cast>())); + tensor.Resize(common::make_ddim(t[3].cast>())); tensor.set_lod(t[4].cast()); return tensor; @@ -1017,7 +1025,7 @@ void BindTensor(pybind11::module &m) { // NOLINT return py::make_tuple(mmap_writer_allocation->ipc_name(), mmap_writer_allocation->size(), type_idx, - vectorize(t.dims()), t.lod()); + common::vectorize(t.dims()), t.lod()); }, [](py::tuple t) { // __setstate__ if (t.size() != 5) @@ -1041,7 +1049,7 @@ void BindTensor(pybind11::module &m) { // NOLINT tensor.ResetHolderWithType( shared_reader_holder, static_cast(t[2].cast())); - tensor.Resize(phi::make_ddim(t[3].cast>())); + tensor.Resize(common::make_ddim(t[3].cast>())); tensor.set_lod(t[4].cast()); return tensor; diff --git a/paddle/fluid/pybind/tensor_py.h b/paddle/fluid/pybind/tensor_py.h index 4b50fd5084ed69..dd5bd7f1d91c4d 100644 --- a/paddle/fluid/pybind/tensor_py.h +++ b/paddle/fluid/pybind/tensor_py.h @@ -389,7 +389,7 @@ void SetTensorFromPyArrayT( for (decltype(array.ndim()) i = 0; i < array.ndim(); ++i) { dims.push_back(static_cast(array.shape()[i])); } - self->Resize(phi::make_ddim(dims)); + self->Resize(common::make_ddim(dims)); if (paddle::platform::is_cpu_place(place)) { if (zero_copy) { @@ -556,7 +556,7 @@ void SetStringTensorFromPyArray(phi::StringTensor *self, for (decltype(array.ndim()) i = 0; i < array.ndim(); ++i) { dims.push_back(static_cast(array.shape()[i])); } - self->Resize(phi::make_ddim(dims)); + self->Resize(common::make_ddim(dims)); auto itemsize = array.itemsize(); if (paddle::platform::is_cpu_place(place)) { auto dst = self->mutable_data(place); @@ -609,7 +609,7 @@ void SetUVATensorFromPyArrayImpl( dims.emplace_back(static_cast(array.shape()[i])); numel *= static_cast(array.shape()[i]); } - self_tensor->Resize(phi::make_ddim(dims)); + self_tensor->Resize(common::make_ddim(dims)); auto data_type = framework::ToDataType(std::type_index(typeid(T))); const auto &need_allocate_size = numel * framework::SizeOfType(data_type); @@ -652,7 +652,7 @@ void SetUVATensorFromPyArray(const std::shared_ptr &self, #if defined(PADDLE_WITH_CUDA) VLOG(4) << "Running in SetUVATensorFromPyArray for Phi::Tensor."; phi::DenseTensorMeta meta = - phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::make_ddim({1, 1})); + phi::DenseTensorMeta(phi::DataType::FLOAT32, common::make_ddim({1, 1})); std::shared_ptr tmp_t = std::make_shared( std::make_unique( paddle::platform::CPUPlace()) @@ -672,7 +672,7 @@ void _sliceCompute(const phi::DenseTensor *in, const std::vector &axes, const std::vector &starts) { auto &eigen_place = *ctx.eigen_device(); - auto out_dims = phi::vectorize(out->dims()); + auto out_dims = common::vectorize(out->dims()); auto in_dims = in->dims(); auto offsets = Eigen::DSizes(); @@ -708,8 +708,8 @@ void _concatCompute(const std::vector &ins, if (axis == 0 && ins.size() < 10) { size_t output_offset = 0; for (auto &in : ins) { - auto in_stride = phi::stride_numel(in.dims()); - auto out_stride = phi::stride_numel(out->dims()); + auto in_stride = common::stride_numel(in.dims()); + auto out_stride = common::stride_numel(out->dims()); phi::funcs::StridedNumelCopyWithAxis( ctx, axis, diff --git a/paddle/fluid/sub_graph/sub_graph_checker.cc b/paddle/fluid/sub_graph/sub_graph_checker.cc index 89a7a00d58d553..b13f25fcb7ff20 100644 --- a/paddle/fluid/sub_graph/sub_graph_checker.cc +++ b/paddle/fluid/sub_graph/sub_graph_checker.cc @@ -336,7 +336,7 @@ void SubGraphChecker::InitInputs(const std::vector& input_values, for (size_t i = 0; i < input_values.size(); ++i) { auto tensor_type = input_values[i].type().dyn_cast(); - auto shape = phi::vectorize(tensor_type.dims()); + auto shape = common::vectorize(tensor_type.dims()); auto random = builder .Build( diff --git a/paddle/phi/CMakeLists.txt b/paddle/phi/CMakeLists.txt index 45e4b8bd085d5e..64c18b2b60ff0a 100644 --- a/paddle/phi/CMakeLists.txt +++ b/paddle/phi/CMakeLists.txt @@ -42,7 +42,8 @@ set(PHI_DEPS eigen3 xxhash cblas - utf8proc) + utf8proc + common) set(INFERENCE_DEPS phi_profiler_proto auto_parallel_proto) diff --git a/paddle/phi/api/all.h b/paddle/phi/api/all.h index 74a016a183b2b4..ec521021859706 100644 --- a/paddle/phi/api/all.h +++ b/paddle/phi/api/all.h @@ -29,14 +29,14 @@ limitations under the License. */ #include "paddle/phi/api/include/tensor_utils.h" // phi common headers +#include "paddle/common/layout.h" #include "paddle/phi/common/backend.h" #include "paddle/phi/common/data_type.h" #include "paddle/phi/common/int_array.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/common/scalar.h" // original custom op headers +#include "paddle/common/exception.h" #include "paddle/phi/api/ext/dispatch.h" -#include "paddle/phi/api/ext/exception.h" #include "paddle/phi/api/ext/op_meta_info.h" #include "paddle/phi/api/ext/tensor_compat.h" diff --git a/paddle/phi/api/ext/exception.h b/paddle/phi/api/ext/exception.h deleted file mode 100644 index 92b17b4898d3f7..00000000000000 --- a/paddle/phi/api/ext/exception.h +++ /dev/null @@ -1,99 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include -#include - -namespace paddle { - -//////////////// Exception handling and Error Message ///////////////// -#if !defined(_WIN32) -#define PD_UNLIKELY(expr) (__builtin_expect(static_cast(expr), 0)) -#define PD_LIKELY(expr) (__builtin_expect(static_cast(expr), 1)) -#else -#define PD_UNLIKELY(expr) (expr) -#define PD_LIKELY(expr) (expr) -#endif - -struct PD_Exception : public std::exception { - public: - template - explicit PD_Exception(const std::string& msg, - const char* file, - int line, - const char* default_msg) { - std::ostringstream sout; - if (msg.empty()) { - sout << default_msg << "\n [" << file << ":" << line << "]"; - } else { - sout << msg << "\n [" << file << ":" << line << "]"; - } - err_msg_ = sout.str(); - } - - const char* what() const noexcept override { return err_msg_.c_str(); } - - private: - std::string err_msg_; -}; - -class ErrorMessage { - public: - template - explicit ErrorMessage(const Args&... args) { - build_string(args...); - } - - void build_string() { oss << ""; } - - template - void build_string(const T& t) { - oss << t; - } - - template - void build_string(const T& t, const Args&... args) { - build_string(t); - build_string(args...); - } - - std::string to_string() { return oss.str(); } - - private: - std::ostringstream oss; -}; - -#define PD_CHECK(COND, ...) \ - do { \ - if (PD_UNLIKELY(!(COND))) { \ - auto __message__ = ::paddle::ErrorMessage(__VA_ARGS__).to_string(); \ - throw ::paddle::PD_Exception(__message__, \ - __FILE__, \ - __LINE__, \ - "Expected " #COND \ - ", but it's not satisfied."); \ - } \ - } while (0) - -#define PD_THROW(...) \ - do { \ - auto __message__ = ::paddle::ErrorMessage(__VA_ARGS__).to_string(); \ - throw ::paddle::PD_Exception( \ - __message__, __FILE__, __LINE__, "An error occurred."); \ - } while (0) - -} // namespace paddle diff --git a/paddle/phi/api/ext/op_meta_info.h b/paddle/phi/api/ext/op_meta_info.h index 6ece2298980c4b..c324a783cb67d0 100644 --- a/paddle/phi/api/ext/op_meta_info.h +++ b/paddle/phi/api/ext/op_meta_info.h @@ -20,7 +20,7 @@ limitations under the License. */ #include #include -#include "paddle/phi/api/ext/exception.h" +#include "paddle/common/exception.h" #include "paddle/phi/api/include/dll_decl.h" #include "paddle/phi/api/include/tensor.h" #include "paddle/utils/any.h" diff --git a/paddle/phi/api/include/context_pool.h b/paddle/phi/api/include/context_pool.h index 6b6fe290d6d288..86ba7b9cf75764 100644 --- a/paddle/phi/api/include/context_pool.h +++ b/paddle/phi/api/include/context_pool.h @@ -16,9 +16,9 @@ limitations under the License. */ #include +#include "paddle/common/macros.h" #include "paddle/phi/api/include/dll_decl.h" #include "paddle/phi/common/place.h" -#include "paddle/phi/core/macros.h" #include "paddle/utils/flat_hash_map.h" namespace phi { diff --git a/paddle/phi/api/include/tensor.h b/paddle/phi/api/include/tensor.h index b8d66f6c228c72..a6e78686e1e4ce 100644 --- a/paddle/phi/api/include/tensor.h +++ b/paddle/phi/api/include/tensor.h @@ -29,21 +29,21 @@ using gpuStream_t = cudaStream_t; using gpuStream_t = hipStream_t; #endif +#include "paddle/common/layout.h" #include "paddle/phi/api/include/dll_decl.h" #include "paddle/phi/common/data_type.h" #include "paddle/phi/common/int_array.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/common/place.h" #include "paddle/phi/common/scalar.h" namespace phi { class DenseTensor; +class TensorBase; } // namespace phi -namespace phi { -class TensorBase; +namespace common { class DDim; -} // namespace phi +} // namespace common namespace paddle { // TODO(chenweihang): Remove the experimental namespace for Scalar and IntArray @@ -173,9 +173,9 @@ class PADDLE_API Tensor final { /** * @brief Return the dimensions of Tensor. * - * @return phi::DDim + * @return common::DDim */ - const phi::DDim& dims() const; + const common::DDim& dims() const; /** * @brief Return the shape (dimensions) of Tensor. @@ -190,9 +190,9 @@ class PADDLE_API Tensor final { /** * @brief Return the strides (dimensions) of Tensor. * - * @return phi::DDim + * @return common::DDim */ - const phi::DDim& strides() const; + const common::DDim& strides() const; /** * @brief Reset the shape of the tensor. diff --git a/paddle/phi/api/lib/api_gen_utils.cc b/paddle/phi/api/lib/api_gen_utils.cc index 0dea1ecb3db8e1..5a0316eddade33 100644 --- a/paddle/phi/api/lib/api_gen_utils.cc +++ b/paddle/phi/api/lib/api_gen_utils.cc @@ -338,8 +338,8 @@ void TransStride(const Context& dev_ctx, phi::StridedCopyKernel( dev_ctx, *from, - phi::vectorize(to->dims()), - phi::vectorize(to->strides()), + common::vectorize(to->dims()), + common::vectorize(to->strides()), to->offset(), to); })); @@ -357,8 +357,8 @@ void TransStride(const Context& dev_ctx, phi::StridedCopyKernel( dev_ctx, *from[i], - phi::vectorize(to[i]->dims()), - phi::vectorize(to[i]->strides()), + common::vectorize(to[i]->dims()), + common::vectorize(to[i]->strides()), to[i]->offset(), to[i]); })); @@ -377,8 +377,8 @@ void TransStride(phi::DeviceContext* dev_ctx, phi::StridedCopyKernel( *cpu_ctx, *from, - phi::vectorize(to->dims()), - phi::vectorize(to->strides()), + common::vectorize(to->dims()), + common::vectorize(to->strides()), to->offset(), to); })); @@ -392,8 +392,8 @@ void TransStride(phi::DeviceContext* dev_ctx, phi::StridedCopyKernel( *gpu_ctx, *from, - phi::vectorize(to->dims()), - phi::vectorize(to->strides()), + common::vectorize(to->dims()), + common::vectorize(to->strides()), to->offset(), to); })); @@ -408,8 +408,8 @@ void TransStride(phi::DeviceContext* dev_ctx, phi::StridedCopyKernel( *xpu_ctx, *from, - phi::vectorize(to->dims()), - phi::vectorize(to->strides()), + common::vectorize(to->dims()), + common::vectorize(to->strides()), to->offset(), to); })); @@ -430,8 +430,8 @@ void TransStrideLegacy(phi::DeviceContext* dev_ctx, phi::StridedCopyKernel( *cpu_ctx, *from, - phi::vectorize(to->dims()), - phi::vectorize(to->strides()), + common::vectorize(to->dims()), + common::vectorize(to->strides()), to->offset(), to); })); @@ -444,8 +444,8 @@ void TransStrideLegacy(phi::DeviceContext* dev_ctx, phi::StridedCopyKernel( *gpu_ctx, *from, - phi::vectorize(to->dims()), - phi::vectorize(to->strides()), + common::vectorize(to->dims()), + common::vectorize(to->strides()), to->offset(), to); })); @@ -459,8 +459,8 @@ void TransStrideLegacy(phi::DeviceContext* dev_ctx, phi::StridedCopyKernel( *xpu_ctx, *from, - phi::vectorize(to->dims()), - phi::vectorize(to->strides()), + common::vectorize(to->dims()), + common::vectorize(to->strides()), to->offset(), to); })); @@ -481,8 +481,8 @@ void TransStride(phi::DeviceContext* dev_ctx, phi::StridedCopyKernel( *cpu_ctx, *from[i], - phi::vectorize(to[i]->dims()), - phi::vectorize(to[i]->strides()), + common::vectorize(to[i]->dims()), + common::vectorize(to[i]->strides()), to[i]->offset(), to[i]); })); @@ -496,8 +496,8 @@ void TransStride(phi::DeviceContext* dev_ctx, phi::StridedCopyKernel( *gpu_ctx, *from[i], - phi::vectorize(to[i]->dims()), - phi::vectorize(to[i]->strides()), + common::vectorize(to[i]->dims()), + common::vectorize(to[i]->strides()), to[i]->offset(), to[i]); })); @@ -512,8 +512,8 @@ void TransStride(phi::DeviceContext* dev_ctx, phi::StridedCopyKernel( *xpu_ctx, *from[i], - phi::vectorize(to[i]->dims()), - phi::vectorize(to[i]->strides()), + common::vectorize(to[i]->dims()), + common::vectorize(to[i]->strides()), to[i]->offset(), to[i]); })); @@ -721,7 +721,7 @@ void SetReplicatedDistAttrForOutput( if (out) { // For inplace output, we also need to set replicated dist attr auto dist_attr = - phi::distributed::TensorDistAttr(phi::vectorize(out->dims())); + phi::distributed::TensorDistAttr(common::vectorize(out->dims())); dist_attr.set_process_mesh(process_mesh); out->unsafe_set_dist_attr(dist_attr); } diff --git a/paddle/phi/api/lib/backend_set.h b/paddle/phi/api/lib/backend_set.h index 51e08374bc9d65..af4de2580f5784 100644 --- a/paddle/phi/api/lib/backend_set.h +++ b/paddle/phi/api/lib/backend_set.h @@ -16,7 +16,7 @@ limitations under the License. */ #include -#include "paddle/phi/api/ext/exception.h" +#include "paddle/common/exception.h" #include "paddle/phi/common/backend.h" namespace paddle { namespace experimental { diff --git a/paddle/phi/api/lib/data_type_set.h b/paddle/phi/api/lib/data_type_set.h index 04580e37f8fb52..7f9a1c44de3d3c 100644 --- a/paddle/phi/api/lib/data_type_set.h +++ b/paddle/phi/api/lib/data_type_set.h @@ -16,7 +16,7 @@ limitations under the License. */ #include -#include "paddle/phi/api/ext/exception.h" +#include "paddle/common/exception.h" #include "paddle/phi/common/data_type.h" namespace paddle { namespace experimental { diff --git a/paddle/phi/api/lib/kernel_dispatch.cc b/paddle/phi/api/lib/kernel_dispatch.cc index 2cece35ba1582e..62d05e18c4a614 100644 --- a/paddle/phi/api/lib/kernel_dispatch.cc +++ b/paddle/phi/api/lib/kernel_dispatch.cc @@ -167,11 +167,12 @@ Backend ParseBackendWithInputOrder(const Place& place, const Tensor& tensor) { : ParseBackend(tensor); } -DataLayout ParseLayout(DataLayout layout) { return layout; } -DataLayout ParseLayout(const Tensor& tensor) { return tensor.layout(); } +phi::DataLayout ParseLayout(phi::DataLayout layout) { return layout; } +phi::DataLayout ParseLayout(const Tensor& tensor) { return tensor.layout(); } -DataLayout ParseLayoutWithInputOrder(DataLayout layout, const Tensor& tensor) { - return layout != DataLayout::UNDEFINED ? layout : ParseLayout(tensor); +phi::DataLayout ParseLayoutWithInputOrder(phi::DataLayout layout, + const Tensor& tensor) { + return layout != phi::DataLayout::UNDEFINED ? layout : ParseLayout(tensor); } } // namespace experimental diff --git a/paddle/phi/api/lib/kernel_dispatch.h b/paddle/phi/api/lib/kernel_dispatch.h index 7bd3524ed795c3..9d7de268309e33 100644 --- a/paddle/phi/api/lib/kernel_dispatch.h +++ b/paddle/phi/api/lib/kernel_dispatch.h @@ -18,12 +18,12 @@ limitations under the License. */ #include #include #include +#include "paddle/common/layout.h" #include "paddle/phi/api/include/tensor.h" #include "paddle/phi/api/lib/backend_set.h" #include "paddle/phi/api/lib/data_type_set.h" #include "paddle/phi/backends/all_context.h" #include "paddle/phi/common/data_type.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/core/distributed/auto_parallel/dist_tensor.h" #include "paddle/phi/core/selected_rows.h" #include "paddle/phi/core/sparse_coo_tensor.h" @@ -52,7 +52,7 @@ enum class KernelType { // TODO(chenweihang): support DataLayout and DataType selected struct KernelKeySet { BackendSet backend_set{Backend::UNDEFINED}; - DataLayout layout{DataLayout::UNDEFINED}; + phi::DataLayout layout{phi::DataLayout::UNDEFINED}; DataType dtype{DataType::UNDEFINED}; // TODO(chenweihang): iterate all kernelkey for kernel selection @@ -239,9 +239,10 @@ Backend ParseBackend(T t, Args... args) { } Backend ParseBackendWithInputOrder(const Place& place, const Tensor& tensor); -DataLayout ParseLayout(DataLayout layout); -DataLayout ParseLayout(const Tensor& tensor); -DataLayout ParseLayoutWithInputOrder(DataLayout layout, const Tensor& tensor); +phi::DataLayout ParseLayout(phi::DataLayout layout); +phi::DataLayout ParseLayout(const Tensor& tensor); +phi::DataLayout ParseLayoutWithInputOrder(phi::DataLayout layout, + const Tensor& tensor); template bool AllInputsAreDistTensor(const Args&... args) { diff --git a/paddle/phi/api/lib/tensor.cc b/paddle/phi/api/lib/tensor.cc index 206d5082e62dd1..49c47cbcce363c 100644 --- a/paddle/phi/api/lib/tensor.cc +++ b/paddle/phi/api/lib/tensor.cc @@ -20,11 +20,11 @@ limitations under the License. */ #include "glog/logging.h" +#include "paddle/common/ddim.h" #include "paddle/phi/api/include/context_pool.h" #include "paddle/phi/api/lib/utils/allocator.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_info.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/distributed/auto_parallel/dist_tensor.h" #include "paddle/phi/core/enforce.h" @@ -72,8 +72,9 @@ Tensor::Tensor(const Place &place) { DefaultAllocator alloc(place); impl_ = std::make_shared( &alloc, - phi::DenseTensorMeta( - phi::DataType::FLOAT32, phi::make_ddim({}), phi::DataLayout::NCHW)); + phi::DenseTensorMeta(phi::DataType::FLOAT32, + common::make_ddim({}), + phi::DataLayout::NCHW)); } Tensor::Tensor(const Place &place, const std::vector &shape) { @@ -89,7 +90,7 @@ Tensor::Tensor(const Place &place, const std::vector &shape) { impl_ = std::make_shared( &alloc, phi::DenseTensorMeta(phi::DataType::FLOAT32, - phi::make_ddim({shape}), + common::make_ddim({shape}), phi::DataLayout::NCHW)); } @@ -107,7 +108,7 @@ const phi::DDim &Tensor::dims() const { return impl_->dims(); } std::vector Tensor::shape() const { const auto &dims = impl_->dims(); - return phi::vectorize(dims); + return common::vectorize(dims); } const phi::DDim &Tensor::strides() const { @@ -134,7 +135,8 @@ void Tensor::reshape(const std::vector &shape) { "touching underlying data, this requires the total size of " "the tensor to remain constant."; if (is_dense_tensor()) { - static_cast(impl_.get())->Resize(phi::make_ddim(shape)); + static_cast(impl_.get()) + ->Resize(common::make_ddim(shape)); } else { PADDLE_THROW(phi::errors::Unimplemented( "Only support reshape operation on DenseTensor now.")); @@ -145,7 +147,7 @@ DataType Tensor::dtype() const { return impl_->dtype(); } DataType Tensor::type() const { return impl_->dtype(); } -DataLayout Tensor::layout() const { return impl_->layout(); } +phi::DataLayout Tensor::layout() const { return impl_->layout(); } bool Tensor::is_dense_tensor() const { return phi::DenseTensor::classof(impl_.get()); diff --git a/paddle/phi/api/lib/tensor_utils.cc b/paddle/phi/api/lib/tensor_utils.cc index aa9a678f2e10b5..09f5429a6623c6 100644 --- a/paddle/phi/api/lib/tensor_utils.cc +++ b/paddle/phi/api/lib/tensor_utils.cc @@ -18,6 +18,7 @@ limitations under the License. */ #include "paddle/phi/api/lib/api_registry.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/distributed/auto_parallel/reshard/reshard_utils.h" +#include "paddle/phi/core/enforce.h" #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #ifdef PADDLE_WITH_CUDA @@ -91,7 +92,7 @@ PADDLE_API Tensor from_blob(void* data, } auto meta = - phi::DenseTensorMeta(dtype, phi::make_ddim(shape.GetData()), layout); + phi::DenseTensorMeta(dtype, common::make_ddim(shape.GetData()), layout); size_t size = SizeOf(dtype) * (meta.is_scalar ? 1 : product(meta.dims)); diff --git a/paddle/phi/api/profiler/common_event.h b/paddle/phi/api/profiler/common_event.h index 76b9d5fa609b9b..d9e3ed74fd397a 100644 --- a/paddle/phi/api/profiler/common_event.h +++ b/paddle/phi/api/profiler/common_event.h @@ -18,10 +18,10 @@ #include #include +#include "paddle/common/ddim.h" #include "paddle/phi/api/profiler/event.h" // import EventRole, TODO(TIEXING): remove later #include "paddle/phi/api/profiler/trace_event.h" #include "paddle/phi/core/attribute.h" -#include "paddle/phi/core/ddim.h" namespace phi { diff --git a/paddle/phi/api/profiler/host_event_recorder.h b/paddle/phi/api/profiler/host_event_recorder.h index 349a31a25ad29d..bd75d5e3689d3d 100644 --- a/paddle/phi/api/profiler/host_event_recorder.h +++ b/paddle/phi/api/profiler/host_event_recorder.h @@ -18,8 +18,8 @@ #include #include +#include "paddle/common/macros.h" #include "paddle/phi/common/thread_data_registry.h" -#include "paddle/phi/core/macros.h" #include "paddle/phi/core/os_info.h" namespace phi { diff --git a/paddle/phi/api/profiler/supplement_tracing.h b/paddle/phi/api/profiler/supplement_tracing.h index e93ad63b607ade..fc20f041ec02a7 100644 --- a/paddle/phi/api/profiler/supplement_tracing.h +++ b/paddle/phi/api/profiler/supplement_tracing.h @@ -18,8 +18,8 @@ limitations under the License. */ #include #include +#include "paddle/common/ddim.h" #include "paddle/phi/core/attribute.h" -#include "paddle/phi/core/ddim.h" namespace phi { diff --git a/paddle/phi/api/yaml/generator/tensor_operants_gen.py b/paddle/phi/api/yaml/generator/tensor_operants_gen.py index bb6d42dc03964d..b09e336a138c66 100644 --- a/paddle/phi/api/yaml/generator/tensor_operants_gen.py +++ b/paddle/phi/api/yaml/generator/tensor_operants_gen.py @@ -228,7 +228,7 @@ class TensorOperantsBase { #include "paddle/phi/api/include/tensor.h" #include "paddle/phi/common/scalar.h" #include "paddle/phi/common/int_array.h" -#include "paddle/phi/core/macros.h" +#include "paddle/common/macros.h" """ @@ -349,7 +349,7 @@ class PhiTensorOperants : public TensorOperantsBase { #include "paddle/phi/api/include/tensor.h" #include "paddle/phi/common/scalar.h" #include "paddle/phi/common/int_array.h" -#include "paddle/phi/core/macros.h" +#include "paddle/common/macros.h" #include "paddle/utils/test_macros.h" """ @@ -444,7 +444,7 @@ class TEST_API OperantsManager { #include "glog/logging.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" +#include "paddle/common/errors.h" #include "paddle/utils/flags.h" """ diff --git a/paddle/phi/backends/CMakeLists.txt b/paddle/phi/backends/CMakeLists.txt index 55e629de34e7e2..ed47487553bee7 100644 --- a/paddle/phi/backends/CMakeLists.txt +++ b/paddle/phi/backends/CMakeLists.txt @@ -64,5 +64,5 @@ if(WITH_CUSTOM_DEVICE) cc_test( capi_test SRCS custom/capi_test.cc - DEPS phi) + DEPS phi common) endif() diff --git a/paddle/phi/backends/c_comm_lib.h b/paddle/phi/backends/c_comm_lib.h index 30ebe6d2fa4961..682fc841e05b1a 100644 --- a/paddle/phi/backends/c_comm_lib.h +++ b/paddle/phi/backends/c_comm_lib.h @@ -15,11 +15,11 @@ #pragma once #include +#include "paddle/common/errors.h" +#include "paddle/common/macros.h" #include "paddle/phi/common/data_type.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" -#include "paddle/phi/core/macros.h" #include "paddle/phi/common/reduce_type.h" diff --git a/paddle/phi/backends/context_pool.h b/paddle/phi/backends/context_pool.h index a2bf9ed1cb03df..52f0ced275ac5e 100644 --- a/paddle/phi/backends/context_pool.h +++ b/paddle/phi/backends/context_pool.h @@ -20,10 +20,10 @@ limitations under the License. */ #include // NOLINT #include +#include "paddle/common/macros.h" #include "paddle/phi/backends/all_context.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/device_context.h" -#include "paddle/phi/core/macros.h" #include "paddle/utils/test_macros.h" namespace phi { diff --git a/paddle/phi/backends/device_memory_aligment.h b/paddle/phi/backends/device_memory_aligment.h index 8508d5206558d2..c65e06364acd0e 100644 --- a/paddle/phi/backends/device_memory_aligment.h +++ b/paddle/phi/backends/device_memory_aligment.h @@ -15,10 +15,10 @@ limitations under the License. */ #pragma once #include +#include "paddle/common/errors.h" #include "paddle/phi/backends/cpu/cpu_info.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/backends/gpu/gpu_info.h" #include "paddle/phi/backends/xpu/xpu_info.h" diff --git a/paddle/phi/backends/dynload/CMakeLists.txt b/paddle/phi/backends/dynload/CMakeLists.txt index 838b623ae7b381..2db75d7022f0a5 100644 --- a/paddle/phi/backends/dynload/CMakeLists.txt +++ b/paddle/phi/backends/dynload/CMakeLists.txt @@ -77,7 +77,7 @@ if(WITH_XPU) cc_library( phi_dynload_xpti SRCS xpti.cc - DEPS phi) + DEPS phi common) endif() if(WITH_FLASHATTN) @@ -101,5 +101,5 @@ if(WITH_CUDNN_FRONTEND) nv_test( cudnn_frontend_test SRCS cudnn_frontend_test.cc - DEPS phi cudnn-frontend) + DEPS phi common cudnn-frontend) endif() diff --git a/paddle/phi/backends/event.h b/paddle/phi/backends/event.h index 21dc9f47d7b89e..0ac87f376bfccb 100644 --- a/paddle/phi/backends/event.h +++ b/paddle/phi/backends/event.h @@ -13,8 +13,8 @@ // limitations under the License. #pragma once +#include "paddle/common/macros.h" #include "paddle/phi/common/place.h" -#include "paddle/phi/core/macros.h" namespace phi { diff --git a/paddle/phi/backends/gpu/cuda/cuda_graph.h b/paddle/phi/backends/gpu/cuda/cuda_graph.h index 7b5644128c7cd4..a40cfc93ab81a2 100644 --- a/paddle/phi/backends/gpu/cuda/cuda_graph.h +++ b/paddle/phi/backends/gpu/cuda/cuda_graph.h @@ -26,14 +26,14 @@ #include "glog/logging.h" +#include "paddle/common/errors.h" +#include "paddle/common/macros.h" #include "paddle/phi/backends/context_pool.h" #include "paddle/phi/backends/device_code.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/common/memory_utils.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" -#include "paddle/phi/core/macros.h" #include "paddle/utils/optional.h" #if CUDA_VERSION < 11000 diff --git a/paddle/phi/backends/gpu/cuda/cudnn_desc.h b/paddle/phi/backends/gpu/cuda/cudnn_desc.h index d4fb6930bcc550..33565ba87413ff 100644 --- a/paddle/phi/backends/gpu/cuda/cudnn_desc.h +++ b/paddle/phi/backends/gpu/cuda/cudnn_desc.h @@ -132,7 +132,7 @@ class TensorDescriptor { T* desc() { return desc_.get(); } T* desc() const { return desc_.get(); } void set(const phi::DenseTensor& tensor, const int groups = 1) { - auto dims = phi::vectorize(tensor.dims()); + auto dims = common::vectorize(tensor.dims()); std::vector strides(dims.size()); strides[dims.size() - 1] = 1; for (int i = dims.size() - 2; i >= 0; i--) { @@ -168,7 +168,7 @@ class TensorDescriptor { } void set(const phi::DenseTensor& tensor, const cudnnTensorFormat_t format) { - auto dims = phi::vectorize(tensor.dims()); + auto dims = common::vectorize(tensor.dims()); auto dtype = ToCudnnDataType(tensor.dtype()); set(dims, format, dtype); } @@ -222,7 +222,7 @@ class FilterDescriptor { void set(const phi::DenseTensor& tensor, const cudnnTensorFormat_t format, const int groups = 1) { - auto dims = phi::vectorize(tensor.dims()); + auto dims = common::vectorize(tensor.dims()); auto dtype = ToCudnnDataType(tensor.dtype()); set(dims, format, dtype, groups); } diff --git a/paddle/phi/backends/gpu/cuda/cudnn_helper.h b/paddle/phi/backends/gpu/cuda/cudnn_helper.h index 74db3fc75bcd10..9ca5551857238d 100644 --- a/paddle/phi/backends/gpu/cuda/cudnn_helper.h +++ b/paddle/phi/backends/gpu/cuda/cudnn_helper.h @@ -17,14 +17,14 @@ limitations under the License. */ #include #include +#include "paddle/common/errors.h" +#include "paddle/common/macros.h" #include "paddle/phi/backends/dynload/cudnn.h" #include "paddle/phi/common/bfloat16.h" #include "paddle/phi/common/float16.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" -#include "paddle/phi/core/macros.h" #include "paddle/utils/flags.h" PD_DECLARE_bool(cudnn_deterministic); @@ -374,7 +374,8 @@ class ScopedDropoutDescriptor { PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetDropoutDescriptor( desc_, handle, dropout_prob_, dropout_state_data, state_size, seed)); } else { - auto dropout_state_dims = phi::vectorize(dropout_state_->dims()); + auto dropout_state_dims = + common::vectorize(dropout_state_->dims()); state_size = dropout_state_dims[0]; PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnRestoreDropoutDescriptor( desc_, handle, dropout_prob_, dropout_state_data, state_size, 0)); diff --git a/paddle/phi/backends/gpu/gpu_context.cc b/paddle/phi/backends/gpu/gpu_context.cc index f87e3b3d805393..2a074e24a57405 100644 --- a/paddle/phi/backends/gpu/gpu_context.cc +++ b/paddle/phi/backends/gpu/gpu_context.cc @@ -24,7 +24,7 @@ limitations under the License. */ #include #include "glog/logging.h" -#include "paddle/phi/api/ext/exception.h" +#include "paddle/common/exception.h" #include "paddle/phi/backends/context_pool.h" #include "paddle/phi/backends/gpu/gpu_decls.h" #include "paddle/phi/backends/gpu/gpu_info.h" diff --git a/paddle/phi/backends/gpu/rocm/miopen_desc.h b/paddle/phi/backends/gpu/rocm/miopen_desc.h index ae0e274ca650ef..55758968a30f67 100644 --- a/paddle/phi/backends/gpu/rocm/miopen_desc.h +++ b/paddle/phi/backends/gpu/rocm/miopen_desc.h @@ -121,7 +121,7 @@ class TensorDescriptor { T* desc() const { return desc_.get(); } void set(const phi::DenseTensor& tensor, const int groups = 1) { - auto dims = phi::vectorize(tensor.dims()); + auto dims = common::vectorize(tensor.dims()); std::vector strides(dims.size()); strides[dims.size() - 1] = 1; for (int i = dims.size() - 2; i >= 0; i--) { @@ -145,7 +145,7 @@ class TensorDescriptor { format, MIOPEN_TENSOR_NCHW, phi::errors::InvalidArgument("format should ONLY be NCHW in MIOPEN.")); - auto dims = phi::vectorize(tensor.dims()); + auto dims = common::vectorize(tensor.dims()); std::vector strides(dims.size()); strides[dims.size() - 1] = 1; for (int i = dims.size() - 2; i >= 0; i--) { @@ -195,7 +195,7 @@ class FilterDescriptor { format, MIOPEN_TENSOR_NCHW, phi::errors::InvalidArgument("format should ONLY be NCHW in MIOPEN.")); - auto dims = phi::vectorize(tensor.dims()); + auto dims = common::vectorize(tensor.dims()); std::vector strides(dims.size()); strides[dims.size() - 1] = 1; for (int i = dims.size() - 2; i >= 0; i--) { diff --git a/paddle/phi/backends/gpu/rocm/miopen_helper.h b/paddle/phi/backends/gpu/rocm/miopen_helper.h index f7815e2ed851e0..47603d0950f400 100644 --- a/paddle/phi/backends/gpu/rocm/miopen_helper.h +++ b/paddle/phi/backends/gpu/rocm/miopen_helper.h @@ -19,14 +19,14 @@ limitations under the License. */ #include "paddle/utils/flags.h" +#include "paddle/common/errors.h" +#include "paddle/common/macros.h" #include "paddle/phi/backends/dynload/miopen.h" #include "paddle/phi/common/bfloat16.h" #include "paddle/phi/common/float16.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" -#include "paddle/phi/core/macros.h" // MIOPEN do not have epslion definition #define CUDNN_BN_MIN_EPSILON 1e-05 diff --git a/paddle/phi/backends/onednn/matmul_utils.cc b/paddle/phi/backends/onednn/matmul_utils.cc index 815663ca1e0ccc..ac2b5ab8c575df 100644 --- a/paddle/phi/backends/onednn/matmul_utils.cc +++ b/paddle/phi/backends/onednn/matmul_utils.cc @@ -18,11 +18,11 @@ namespace phi { namespace funcs { DDim RowMatrixDimsFromVector(const DDim& x_dim) { - return x_dim.size() > 1 ? x_dim : make_ddim({1, x_dim[0]}); + return x_dim.size() > 1 ? x_dim : common::make_ddim({1, x_dim[0]}); } DDim ColumnMatrixDimsFromVector(const DDim& y_dim) { - return y_dim.size() > 1 ? y_dim : make_ddim({y_dim[0], 1}); + return y_dim.size() > 1 ? y_dim : common::make_ddim({y_dim[0], 1}); } std::vector TransposeAxis(const std::vector& x, diff --git a/paddle/phi/backends/onednn/matmul_utils.h b/paddle/phi/backends/onednn/matmul_utils.h index 7248e64fe60b16..e1a4777041ceb4 100644 --- a/paddle/phi/backends/onednn/matmul_utils.h +++ b/paddle/phi/backends/onednn/matmul_utils.h @@ -146,7 +146,7 @@ inline void ExecuteMul(const OneDNNContext& dev_ctx, // This kernel is flattening dims so then we need to unflattened version // that should be set in out reshape require plain layout, but // MatmulV2MKLDNNHanlder enforces one so it should work - auto reshape_dims = out->dims().size() != 0 ? vectorize(out->dims()) + auto reshape_dims = out->dims().size() != 0 ? common::vectorize(out->dims()) : std::vector{1}; out->set_mem_desc(dst_memory_p->get_desc().reshape(reshape_dims)); } @@ -178,7 +178,7 @@ inline void ExecuteMatmul(const OneDNNContext& dev_ctx, matmul_p->execute(astream, matmul_args); astream.wait(); - auto reshape_dims = out->dims().size() != 0 ? vectorize(out->dims()) + auto reshape_dims = out->dims().size() != 0 ? common::vectorize(out->dims()) : std::vector{1}; out->set_mem_desc(dst_memory_p->get_desc().reshape(reshape_dims)); } diff --git a/paddle/phi/backends/onednn/onednn_context.h b/paddle/phi/backends/onednn/onednn_context.h index b9f1d490874841..aec9f7f0d5e4f5 100644 --- a/paddle/phi/backends/onednn/onednn_context.h +++ b/paddle/phi/backends/onednn/onednn_context.h @@ -17,8 +17,8 @@ limitations under the License. */ #include #include // NOLINT #include "dnnl.hpp" // NOLINT +#include "paddle/common/layout.h" #include "paddle/phi/backends/cpu/cpu_context.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/attribute.h" #include "paddle/utils/test_macros.h" diff --git a/paddle/phi/backends/onednn/onednn_helper.h b/paddle/phi/backends/onednn/onednn_helper.h index 1d61004b36161f..60c531c7b74435 100644 --- a/paddle/phi/backends/onednn/onednn_helper.h +++ b/paddle/phi/backends/onednn/onednn_helper.h @@ -18,8 +18,8 @@ #include "dnnl.hpp" // NOLINT #include "glog/logging.h" +#include "paddle/common/layout.h" #include "paddle/phi/backends/onednn/onednn_context.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/dense_tensor.h" @@ -263,16 +263,16 @@ inline void MatchShapeToLayout(DenseTensor* tensor_in, // be done. Similarly for dim==1 when you have just one possible combination. if (tensor_in->dims().size() < 3) { VLOG(3) << "Keeping ONEDNN/NHWC/NDHWC output_shape" - << print_dims(phi::vectorize(tensor_in->dims())); + << print_dims(common::vectorize(tensor_in->dims())); return; } switch (from) { case DataLayout::ONEDNN: if ((to == DataLayout::NHWC) || (to == DataLayout::NDHWC)) { - auto dims = phi::vectorize(tensor_in->dims()); + auto dims = common::vectorize(tensor_in->dims()); std::rotate(dims.begin() + 1, dims.begin() + 2, dims.end()); - tensor_in->Resize(phi::make_ddim(dims)); + tensor_in->Resize(common::make_ddim(dims)); VLOG(3) << "Rotating Shape from: ONEDNN to: NHWC/NDHWC output_shape" << print_dims(dims); } @@ -280,9 +280,9 @@ inline void MatchShapeToLayout(DenseTensor* tensor_in, case DataLayout::NHWC: case DataLayout::NDHWC: if (to == DataLayout::ONEDNN) { - auto dims = phi::vectorize(tensor_in->dims()); + auto dims = common::vectorize(tensor_in->dims()); std::rotate(dims.begin() + 1, dims.end() - 1, dims.end()); - tensor_in->Resize(phi::make_ddim(dims)); + tensor_in->Resize(common::make_ddim(dims)); VLOG(3) << "Rotating Shape from: NHWC/NDHWC to: ONEDNN output_shape" << print_dims(dims); } diff --git a/paddle/phi/backends/onednn/onednn_reuse.h b/paddle/phi/backends/onednn/onednn_reuse.h index 990c6ea10bbadd..d9719c6f3e5b2c 100644 --- a/paddle/phi/backends/onednn/onednn_reuse.h +++ b/paddle/phi/backends/onednn/onednn_reuse.h @@ -957,8 +957,8 @@ class BinaryOneDNNHandler : public OneDNNHandlerNoCachingT { : OneDNNHandlerNoCachingT(engine, cpu_place) { use_broadcasting_hack = false; swin_case = false; - const auto src_x_tz = vectorize(x->dims()); - const auto src_y_tz = vectorize(y->dims()); + const auto src_x_tz = common::vectorize(x->dims()); + const auto src_y_tz = common::vectorize(y->dims()); // if output tensor(z) is nullptr then we are computing into oneDNN // managed buffer auto rankdiff = x->dims().size() - y->dims().size(); @@ -968,7 +968,7 @@ class BinaryOneDNNHandler : public OneDNNHandlerNoCachingT { : (y->dims().size() == 0 ? std::vector{1} : src_x_tz)) : (out->dims().size() == 0 ? std::vector{1} - : vectorize(out->dims())); + : common::vectorize(out->dims())); auto src0_md = x->mem_desc(); auto src1_md = y->mem_desc(); @@ -1216,8 +1216,9 @@ class BroadcastDataOneDNNHandler float scale_y, const std::vector& extended_x_dims) : OneDNNHandlerNoCachingT(engine, cpu_place) { - const auto src0_tz = out->dims().size() == 0 ? std::vector{1} - : vectorize(out->dims()); + const auto src0_tz = out->dims().size() == 0 + ? std::vector{1} + : common::vectorize(out->dims()); const auto src0_md = dnnl::memory::desc( src0_tz, OneDNNGetDataType(), GetPlainOneDNNFormat(src0_tz.size())); const auto reshape_dims = @@ -1264,7 +1265,7 @@ class PReluOneDNNHandler const bool is_test) : OneDNNHandlerNoCachingT( engine, cpu_place) { - auto weights_dims = vectorize(weights.dims()); + auto weights_dims = common::vectorize(weights.dims()); // weights must have same size as X only for "element" case if (weights.dims().size() != x.dims().size()) { auto new_weights_dims = std::vector(x.dims().size(), 1); @@ -1465,7 +1466,7 @@ class BatchNormOneDNNHandler } std::shared_ptr AcquireScaleMemory(const DenseTensor* scale) { - auto scale_tz = vectorize(scale->dims()); + auto scale_tz = common::vectorize(scale->dims()); PADDLE_ENFORCE_EQ( scale_tz.size(), 1, @@ -1480,7 +1481,7 @@ class BatchNormOneDNNHandler } std::shared_ptr AcquireShiftMemory(const DenseTensor* shift) { - auto shift_tz = vectorize(shift->dims()); + auto shift_tz = common::vectorize(shift->dims()); PADDLE_ENFORCE_EQ( shift_tz.size(), 1, @@ -1600,8 +1601,8 @@ class PoolingOneDNNHandler auto onednn_paddings = ToOneDNNPadding(copied_paddings); const auto dt = ToOneDNNDataType(input->dtype()); - const auto src_tz = vectorize(input->dims()); - const auto dst_tz = vectorize(output->dims()); + const auto src_tz = common::vectorize(input->dims()); + const auto dst_tz = common::vectorize(output->dims()); const auto dst_md = OneDNNMemDesc(dst_tz, dt, OneDNNMemoryFormat::any); if (ceil_mode) { @@ -1689,9 +1690,9 @@ class PoolingOneDNNHandler copied_strides, copied_kernel_size); - auto src_tz = vectorize(in_x->dims()); - auto diff_src_tz = vectorize(in_x_grad->dims()); - auto diff_dst_tz = vectorize(out_grad->dims()); + auto src_tz = common::vectorize(in_x->dims()); + auto diff_src_tz = common::vectorize(in_x_grad->dims()); + auto diff_dst_tz = common::vectorize(out_grad->dims()); const auto dt = ToOneDNNDataType(in_x->dtype()); auto dst_md = dnnl::memory::desc(diff_dst_tz, dt, OneDNNMemoryFormat::any); @@ -1910,7 +1911,7 @@ static void SetOutMemDescWithUnsqueeze2FuseSupport( } } out->set_mem_desc(out_md.reshape(unsqueezed_op_tz)); - out->Resize(make_ddim(unsqueezed_op_tz)); + out->Resize(common::make_ddim(unsqueezed_op_tz)); } static void SetOutMemDescWithReshape2FuseSupport( @@ -1934,7 +1935,7 @@ static void SetOutMemDescWithReshape2FuseSupport( } out->set_mem_desc(out_md.reshape(fused_reshape2_shape)); - out->Resize(phi::make_ddim(fused_reshape2_shape)); + out->Resize(common::make_ddim(fused_reshape2_shape)); } } // namespace funcs diff --git a/paddle/phi/backends/stream.h b/paddle/phi/backends/stream.h index 4219b1cec49044..43f15ef08c0349 100644 --- a/paddle/phi/backends/stream.h +++ b/paddle/phi/backends/stream.h @@ -14,9 +14,9 @@ #pragma once +#include "paddle/common/macros.h" #include "paddle/phi/backends/callback_manager.h" #include "paddle/phi/common/place.h" -#include "paddle/phi/core/macros.h" namespace phi { diff --git a/paddle/phi/backends/xpu/xpu_context.cc b/paddle/phi/backends/xpu/xpu_context.cc index 14164c4e9ddc7e..e3931d3da19b19 100644 --- a/paddle/phi/backends/xpu/xpu_context.cc +++ b/paddle/phi/backends/xpu/xpu_context.cc @@ -18,7 +18,7 @@ #include "glog/logging.h" -#include "paddle/phi/api/ext/exception.h" +#include "paddle/common/exception.h" #include "paddle/phi/backends/xpu/enforce_xpu.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/os_info.h" diff --git a/paddle/phi/capi/include/type_utils.h b/paddle/phi/capi/include/type_utils.h index 029ee42fe091bc..69b1a213bf01f4 100644 --- a/paddle/phi/capi/include/type_utils.h +++ b/paddle/phi/capi/include/type_utils.h @@ -15,9 +15,9 @@ #pragma once #if !defined(_WIN32) +#include "paddle/common/layout.h" #include "paddle/phi/capi/include/c_data_type.h" #include "paddle/phi/common/data_type.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/core/enforce.h" namespace phi { diff --git a/paddle/phi/capi/include/wrapper_base.h b/paddle/phi/capi/include/wrapper_base.h index 9924f4d5efb6ba..061561008a95e7 100644 --- a/paddle/phi/capi/include/wrapper_base.h +++ b/paddle/phi/capi/include/wrapper_base.h @@ -23,7 +23,7 @@ #include #include -#include "paddle/phi/api/ext/exception.h" +#include "paddle/common/exception.h" #include "paddle/phi/capi/include/c_device_context.h" #include "paddle/phi/capi/include/c_infer_meta_context.h" #include "paddle/phi/capi/include/c_int_array.h" diff --git a/paddle/phi/capi/lib/c_meta_tensor.cc b/paddle/phi/capi/lib/c_meta_tensor.cc index d2493058081584..6ea6eda1a7f23e 100644 --- a/paddle/phi/capi/lib/c_meta_tensor.cc +++ b/paddle/phi/capi/lib/c_meta_tensor.cc @@ -114,7 +114,7 @@ void PD_MetaTensorSetDims(PD_MetaTensor *tensor, } auto cc_tensor = reinterpret_cast(tensor); std::vector shape(dims, dims + ndims); - cc_tensor->set_dims(phi::make_ddim(shape)); + cc_tensor->set_dims(common::make_ddim(shape)); } void PD_MetaTensorSetDataType(PD_MetaTensor *tensor, diff --git a/paddle/phi/capi/lib/c_tensor.cc b/paddle/phi/capi/lib/c_tensor.cc index b460d2e368607c..31a724447b7c7f 100644 --- a/paddle/phi/capi/lib/c_tensor.cc +++ b/paddle/phi/capi/lib/c_tensor.cc @@ -198,7 +198,7 @@ void PD_TensorSetDims(PD_Tensor* tensor, } auto cc_tensor = reinterpret_cast(tensor); std::vector shape(dims, dims + ndims); - cc_tensor->Resize(phi::make_ddim(shape)); + cc_tensor->Resize(common::make_ddim(shape)); } void PD_TensorSetDataType(PD_Tensor* tensor, diff --git a/paddle/phi/common/backend.h b/paddle/phi/common/backend.h index 5540592d5013c8..64dab3ccdeb3b4 100644 --- a/paddle/phi/common/backend.h +++ b/paddle/phi/common/backend.h @@ -16,7 +16,7 @@ limitations under the License. */ #include -#include "paddle/phi/api/ext/exception.h" +#include "paddle/common/exception.h" #include "paddle/phi/common/place.h" namespace paddle { diff --git a/paddle/phi/common/cpstring_impl.h b/paddle/phi/common/cpstring_impl.h index 6783799026d44b..1906fd4e57a444 100644 --- a/paddle/phi/common/cpstring_impl.h +++ b/paddle/phi/common/cpstring_impl.h @@ -24,7 +24,7 @@ limitations under the License. */ #include #include -#include "paddle/phi/core/macros.h" +#include "paddle/common/macros.h" #if (defined(__NVCC__) || defined(__HIPCC__)) #define HOSTDEVICE __host__ __device__ diff --git a/paddle/phi/common/data_type.h b/paddle/phi/common/data_type.h index 58852cf5adb022..36761d673d5396 100644 --- a/paddle/phi/common/data_type.h +++ b/paddle/phi/common/data_type.h @@ -14,7 +14,7 @@ limitations under the License. */ #pragma once -#include "paddle/phi/api/ext/exception.h" +#include "paddle/common/exception.h" #include "paddle/phi/common/bfloat16.h" #include "paddle/phi/common/complex.h" #include "paddle/phi/common/float16.h" diff --git a/paddle/phi/common/int_array.cc b/paddle/phi/common/int_array.cc index 4b5d553006685b..75440bd2d5b818 100644 --- a/paddle/phi/common/int_array.cc +++ b/paddle/phi/common/int_array.cc @@ -14,10 +14,10 @@ limitations under the License. */ #include "paddle/phi/common/int_array.h" +#include "paddle/common/ddim.h" #include "paddle/phi/backends/context_pool.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/common/place.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/tensor_utils.h" namespace paddle { diff --git a/paddle/phi/common/int_array.h b/paddle/phi/common/int_array.h index 0c4b3d4c8ca5b7..6eab8609e54b26 100644 --- a/paddle/phi/common/int_array.h +++ b/paddle/phi/common/int_array.h @@ -16,13 +16,13 @@ limitations under the License. */ #include -#include "paddle/phi/api/ext/exception.h" +#include "paddle/common/exception.h" #include "paddle/phi/common/data_type.h" #include "paddle/phi/common/tensor_ref.h" -namespace phi { +namespace common { class DDim; -} // namespace phi +} // namespace common namespace paddle { class Tensor; @@ -55,7 +55,7 @@ class IntArrayBase { void SetFromTensor(bool val) { is_from_tensor_ = val; } - explicit IntArrayBase(const phi::DDim& dims); + explicit IntArrayBase(const common::DDim& dims); // The Tensor must have one dim IntArrayBase(const T& tensor); // NOLINT diff --git a/paddle/phi/common/memory_utils.h b/paddle/phi/common/memory_utils.h index 2571ec6d2788b7..e2a590ee4d210c 100644 --- a/paddle/phi/common/memory_utils.h +++ b/paddle/phi/common/memory_utils.h @@ -17,11 +17,11 @@ #include // NOLINT #include +#include "paddle/common/macros.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/allocator.h" #include "paddle/phi/core/device_context.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/macros.h" #include "paddle/phi/core/stream.h" #include "paddle/utils/test_macros.h" diff --git a/paddle/phi/common/place.cc b/paddle/phi/common/place.cc index 61bb82176388c2..008f45aa935544 100644 --- a/paddle/phi/common/place.cc +++ b/paddle/phi/common/place.cc @@ -18,7 +18,7 @@ limitations under the License. */ #include #include "glog/logging.h" -#include "paddle/phi/api/ext/exception.h" +#include "paddle/common/exception.h" #include "paddle/phi/backends/gpu/gpu_info.h" namespace phi { diff --git a/paddle/phi/common/place.h b/paddle/phi/common/place.h index 03072468f62e20..8865d9c2690cdb 100644 --- a/paddle/phi/common/place.h +++ b/paddle/phi/common/place.h @@ -18,8 +18,8 @@ limitations under the License. */ #include #include +#include "paddle/common/macros.h" #include "paddle/phi/api/include/dll_decl.h" -#include "paddle/phi/core/macros.h" #include "paddle/utils/test_macros.h" namespace paddle { diff --git a/paddle/phi/common/scalar.h b/paddle/phi/common/scalar.h index 5ed843653887b4..12de9149a96af6 100644 --- a/paddle/phi/common/scalar.h +++ b/paddle/phi/common/scalar.h @@ -19,7 +19,7 @@ limitations under the License. */ #include #include -#include "paddle/phi/api/ext/exception.h" +#include "paddle/common/exception.h" #include "paddle/phi/common/data_type.h" namespace paddle { diff --git a/paddle/phi/common/tensor_ref.h b/paddle/phi/common/tensor_ref.h index aa0338006f4ccd..6ecea89b06ba44 100644 --- a/paddle/phi/common/tensor_ref.h +++ b/paddle/phi/common/tensor_ref.h @@ -20,7 +20,7 @@ #include #include -#include "paddle/phi/api/ext/exception.h" +#include "paddle/common/exception.h" #include "paddle/phi/common/data_type.h" namespace phi { diff --git a/paddle/phi/core/CMakeLists.txt b/paddle/phi/core/CMakeLists.txt index b9061b64087b08..b582ef84942df1 100644 --- a/paddle/phi/core/CMakeLists.txt +++ b/paddle/phi/core/CMakeLists.txt @@ -10,12 +10,10 @@ collect_srcs( core_srcs SRCS flags.cc - errors.cc enforce.cc storage_properties.cc os_info.cc kernel_context.cc - ddim.cc tensor_base.cc allocator.cc tensor_meta.cc diff --git a/paddle/phi/core/attribute.h b/paddle/phi/core/attribute.h index 6f032f4a5bd99c..88ab2dbf1df496 100644 --- a/paddle/phi/core/attribute.h +++ b/paddle/phi/core/attribute.h @@ -17,9 +17,9 @@ #include #include +#include "paddle/common/layout.h" #include "paddle/phi/common/data_type.h" #include "paddle/phi/common/int_array.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/common/scalar.h" #include "paddle/phi/common/tensor_ref.h" #include "paddle/utils/flat_hash_map.h" diff --git a/paddle/phi/core/compat/convert_utils.h b/paddle/phi/core/compat/convert_utils.h index a6ae22ff669fcc..632b7a6d17ef27 100644 --- a/paddle/phi/core/compat/convert_utils.h +++ b/paddle/phi/core/compat/convert_utils.h @@ -14,9 +14,9 @@ limitations under the License. */ #pragma once +#include "paddle/common/layout.h" #include "paddle/phi/common/backend.h" #include "paddle/phi/common/data_type.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/tensor_meta.h" diff --git a/paddle/phi/core/compat/op_utils.h b/paddle/phi/core/compat/op_utils.h index cfa647149669c8..b2c334d89023df 100644 --- a/paddle/phi/core/compat/op_utils.h +++ b/paddle/phi/core/compat/op_utils.h @@ -18,10 +18,10 @@ limitations under the License. */ #include #include "glog/logging.h" +#include "paddle/common/macros.h" #include "paddle/phi/core/compat/arg_map_context.h" #include "paddle/phi/core/enforce.h" #include "paddle/phi/core/infermeta_utils.h" -#include "paddle/phi/core/macros.h" #include "paddle/phi/core/type_defs.h" #include "paddle/utils/flat_hash_map.h" diff --git a/paddle/phi/core/custom_kernel.h b/paddle/phi/core/custom_kernel.h index 5ba14de6a6131c..1f6386a378aeec 100644 --- a/paddle/phi/core/custom_kernel.h +++ b/paddle/phi/core/custom_kernel.h @@ -14,8 +14,8 @@ #pragma once +#include "paddle/common/macros.h" #include "paddle/phi/core/kernel_factory.h" -#include "paddle/phi/core/macros.h" namespace phi { /** diff --git a/paddle/phi/core/ddim.cc b/paddle/phi/core/ddim.cc deleted file mode 100644 index ff95346be17c7a..00000000000000 --- a/paddle/phi/core/ddim.cc +++ /dev/null @@ -1,230 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/phi/core/ddim.h" - -#include - -namespace phi { - -DDim make_ddim(std::initializer_list dims) { - return DDim(dims.begin(), static_cast(dims.size())); -} - -DDim make_ddim(const std::vector& dims) { - return DDim(dims.data(), static_cast(dims.size())); -} - -DDim make_ddim(const std::vector& dims) { - return DDim(dims.data(), static_cast(dims.size())); -} - -struct DDimEqualityVisitor { - explicit DDimEqualityVisitor(const int64_t* d) : d_(d) {} - - template - inline bool operator()(const Dim& self) const { - return UnrollCompare::Run(self.Get(), d_); - } - - const int64_t* d_; -}; - -bool DDim::operator==(const DDim& d) const { - if (size() == -1 && d.size() == -1) { - return true; - } else if (size() == -1 || d.size() == -1) { - return false; - } else { - return size() == d.size() && - this->apply_visitor(DDimEqualityVisitor(d.Get())); - } -} - -bool DDim::operator!=(const DDim& d) const { return !(*this == d); } - -std::string DDim::to_str() const { - std::stringstream ss; - ss << '['; - if (rank_ > 0) ss << dim_[0]; - - for (int i = 1; i < rank_; ++i) ss << ", " << dim_[i]; - ss << ']'; - return ss.str(); -} - -struct ProductVisitor { - template - inline int64_t operator()(const Dim& dim) { - return product(dim); - } -}; - -int64_t product(const DDim& ddim) { - if (ddim.size() == -1) { - return 0; - } - return ddim.apply_visitor(ProductVisitor()); -} - -bool contain_unknown_dim(const DDim& ddim) { - for (int i = 0; i < ddim.size(); ++i) { - if (ddim[i] < 0) { - return true; - } - } - - return false; -} - -DDim slice_ddim(const DDim& dim, int begin, int end) { - PADDLE_ENFORCE_EQ( - (begin >= 0 && end <= dim.size()), - true, - phi::errors::InvalidArgument( - "[begin(%d), end(%d)) must be inside [0, %d) in ddim slice.", - begin, - end, - dim.size())); - // Constructor of DDim would check whether end - begin is valid - return DDim(dim.Get() + begin, end - begin); -} - -int arity(const DDim& d) { return d.size(); } - -struct DDimPrinter { - std::ostream& os; - explicit DDimPrinter(std::ostream& os_) : os(os_) {} - - template - void operator()(const Dim& t) { - os << t; - } -}; - -std::ostream& operator<<(std::ostream& os, const DDim& ddim) { - if (ddim.size() == -1) { - return os; - } - ddim.apply_visitor(DDimPrinter(os)); - return os; -} - -DDim flatten_to_3d(const DDim& src, int num_row_dims, int num_col_dims) { - PADDLE_ENFORCE_GE( - src.size(), - 3, - phi::errors::InvalidArgument("The rank of src dim should be at least 3 " - "in flatten_to_3d, but received %d.", - src.size())); - PADDLE_ENFORCE_EQ( - (num_row_dims >= 1 && num_row_dims < src.size()), - true, - phi::errors::InvalidArgument("The num_row_dims should be inside [1, %d] " - "in flatten_to_3d, but received %d.", - src.size() - 1, - num_row_dims)); - PADDLE_ENFORCE_EQ( - (num_col_dims >= 2 && num_col_dims <= src.size()), - true, - phi::errors::InvalidArgument("The num_col_dims should be inside [2, %d] " - "in flatten_to_3d, but received %d.", - src.size(), - num_col_dims)); - PADDLE_ENFORCE_GE( - num_col_dims, - num_row_dims, - phi::errors::InvalidArgument( - "The num_row_dims should be less than num_col_dims in flatten_to_3d," - "but received num_row_dims = %d, num_col_dims = %d.", - num_row_dims, - num_col_dims)); - - return DDim({product(slice_ddim(src, 0, num_row_dims)), - product(slice_ddim(src, num_row_dims, num_col_dims)), - product(slice_ddim(src, num_col_dims, src.size()))}); -} - -DDim flatten_to_2d(const DDim& src, int num_col_dims) { - return DDim({product(slice_ddim(src, 0, num_col_dims)), - product(slice_ddim(src, num_col_dims, src.size()))}); -} - -DDim flatten_to_1d(const DDim& src) { return DDim({product(src)}); } - -DDim stride(const DDim& ddim) { - DDim strides; - strides.rank_ = ddim.size(); - if (ddim.size() > 0) strides[ddim.size() - 1] = 1; - for (int i = ddim.size() - 2; i >= 0; --i) { - strides[i] = strides[i + 1] * ddim[i + 1]; - } - return strides; -} - -DDim stride_numel(const DDim& ddim) { - DDim strides; - strides.rank_ = ddim.size(); - if (ddim.size() > 0) strides[ddim.size() - 1] = ddim[ddim.size() - 1]; - for (int i = ddim.size() - 2; i >= 0; --i) { - strides[i] = strides[i + 1] * ddim[i]; - } - return strides; -} - -DDim DDim::reshape(std::vector& shape) const { - const DDim& in_dims = *this; - - for (int i = 0; i < static_cast(shape.size()); ++i) { - if (shape[i] == 0) { - shape[i] = static_cast(in_dims.at(i)); - } - } - - // Dim marked as "-1" must be inferred - auto it = std::find(shape.begin(), shape.end(), -1); - if (it != shape.end()) { - int index = static_cast(std::distance(shape.begin(), it)); - int reshape_out_product = - std::accumulate(shape.begin(), shape.end(), -1, std::multiplies()); - shape[index] = static_cast(product(in_dims)) / reshape_out_product; - } - - return phi::make_ddim(shape); -} - -DDim DDim::transpose(const std::vector& axis) const { - const DDim& in_dims = *this; - - DDim out_dims(in_dims); - for (int i = 0; i < static_cast(axis.size()); i++) { - out_dims[i] = in_dims[axis[i]]; - } - return out_dims; -} - -} // namespace phi - -namespace std { - -std::size_t hash::operator()(phi::DDim const& ddim) const { - int ndim = ddim.size(); - std::size_t seed = ndim; - for (int i = 0; i < ndim; ++i) { - seed ^= ddim.Get()[i] + 0x9e3779b9 + (seed << 6) + (seed >> 2); - } - return seed; -} - -} // namespace std diff --git a/paddle/phi/core/ddim.h b/paddle/phi/core/ddim.h deleted file mode 100644 index ff2abdb3b84b39..00000000000000 --- a/paddle/phi/core/ddim.h +++ /dev/null @@ -1,284 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#pragma once -#include -#include -#include -#include -#include - -#include "paddle/phi/api/ext/exception.h" -#include "paddle/phi/core/utils/dim.h" -#include "paddle/utils/test_macros.h" - -namespace phi { - -#define PADDLE_VISIT_DDIM_BASE(rank, callback) \ - case (rank): { \ - constexpr auto kRank = (rank); \ - return (callback); \ - } - -#define PADDLE_VISIT_DDIM(rank, callback) \ - switch (rank) { \ - PADDLE_VISIT_DDIM_BASE(0, callback); \ - PADDLE_VISIT_DDIM_BASE(1, callback); \ - PADDLE_VISIT_DDIM_BASE(2, callback); \ - PADDLE_VISIT_DDIM_BASE(3, callback); \ - PADDLE_VISIT_DDIM_BASE(4, callback); \ - PADDLE_VISIT_DDIM_BASE(5, callback); \ - PADDLE_VISIT_DDIM_BASE(6, callback); \ - PADDLE_VISIT_DDIM_BASE(7, callback); \ - PADDLE_VISIT_DDIM_BASE(8, callback); \ - PADDLE_VISIT_DDIM_BASE(9, callback); \ - default: \ - PD_THROW( \ - "Unimplemented error. Invalid dimension to be accessed. Now only " \ - "supports access to " \ - "dimension 0 to 9, but received dimension is ", \ - rank, \ - "."); \ - } - -template -inline void dynamic_dim_assign(const T1* in, T2* out, int n) { - if (n == -1) { - return; - } - PADDLE_VISIT_DDIM(n, (static_dim_assign(in, out))); -} - -/** - * \brief A dynamically sized dimension. - * - * The number of dimensions must be between [1, 9]. - */ -class DDim { - public: - constexpr static int kMaxRank = 9; - - DDim() : rank_(-1) { dim_[0] = 0; } - - DDim(const DDim& ddim) : dim_() { CopyFrom(ddim); } - - DDim(const int* d, int n) : rank_(n) { - dynamic_dim_assign(d, dim_.GetMutable(), n); - } - - DDim(const int64_t* d, int n) : rank_(n) { - dynamic_dim_assign(d, dim_.GetMutable(), n); - } - - template - /*implicit*/ DDim(const Dim& in) : rank_(D) { // NOLINT - UnsafeCast() = in; - } - - /*implicit*/ DDim(std::initializer_list init_list) - : DDim(init_list.begin(), init_list.size()) {} - - inline DDim& operator=(const DDim& ddim) { return CopyFrom(ddim); } - - template - inline DDim& operator=(const Dim& dim) { - rank_ = D; - UnsafeCast() = dim; - return *this; - } - - inline int64_t& operator[](int idx) { return dim_[idx]; } - - inline int64_t operator[](int idx) const { return dim_[idx]; } - - int64_t& at(int idx) { - PADDLE_ENFORCE_GE(idx, - 0, - phi::errors::InvalidArgument( - "Invalid DDim index to be accessed. The valid index " - "is between 0 and %d, but received index is %d.", - rank_, - idx)); - PADDLE_ENFORCE_LT(idx, - rank_, - phi::errors::InvalidArgument( - "Invalid DDim index to be accessed. The valid index " - "is between 0 and %d, but received index is %d.", - rank_, - idx)); - return dim_[idx]; - } - - int64_t at(int idx) const { - PADDLE_ENFORCE_GE(idx, - 0, - phi::errors::InvalidArgument( - "Invalid DDim index to be accessed. The valid index " - "is between 0 and %d, but received index is %d.", - rank_, - idx)); - PADDLE_ENFORCE_LT(idx, - rank_, - phi::errors::InvalidArgument( - "Invalid DDim index to be accessed. The valid index " - "is between 0 and %d, but received index is %d.", - rank_, - idx)); - return dim_[idx]; - } - - template - typename std::result_of&)>::type apply_visitor( - Visitor&& visitor) { - PADDLE_VISIT_DDIM(rank_, visitor(UnsafeCast())); - } - - template - typename std::result_of&)>::type apply_visitor( - Visitor&& visitor) const { - PADDLE_VISIT_DDIM(rank_, visitor(UnsafeCast())); - } - - bool operator==(const DDim& d) const; - - bool operator!=(const DDim& d) const; - - inline const int64_t* Get() const { return dim_.Get(); } - - inline int64_t* GetMutable() { return dim_.GetMutable(); } - - inline int size() const { return rank_; } - - std::string to_str() const; - - DDim reshape(std::vector& shape) const; // NOLINT - - DDim transpose(const std::vector& axis) const; - - private: - template - inline Dim& UnsafeCast() { - static_assert(D >= 0 && D <= kMaxRank, "Invalid rank"); - auto* p = static_cast(&dim_); - return *reinterpret_cast*>(p); - } - - template - inline const Dim& UnsafeCast() const { - static_assert(D >= 0 && D <= kMaxRank, "Invalid rank"); - auto* p = static_cast(&dim_); - return *reinterpret_cast*>(p); - } - - inline DDim& CopyFrom(const DDim& ddim) { - if (ddim.rank_ == -1) { - rank_ = -1; - return *this; - } - PADDLE_VISIT_DDIM(ddim.rank_, (*this = ddim.UnsafeCast())); - } - - friend DDim stride(const DDim& ddim); - friend DDim stride_numel(const DDim& ddim); - - private: - Dim dim_; - int rank_; -}; - -#undef PADDLE_VISIT_DDIM_BASE -#undef PADDLE_VISIT_DDIM - -/** - * \brief Make a DDim from std::vector - * - * \param dims An vector of ints. Must be sized between [1, 9] - */ -TEST_API DDim make_ddim(const std::vector& dims); - -TEST_API DDim make_ddim(const std::vector& dims); - -/** - * \brief Make a DDim from an initializer list - * - * \param dims An initializer list of ints. Must be sized between [1, 9] - * - */ -TEST_API DDim make_ddim(std::initializer_list dims); - -template -std::vector vectorize(const DDim& ddim) { - if (ddim.size() == -1) { - return std::vector({0}); - } - std::vector result(DDim::kMaxRank); - dynamic_dim_assign(ddim.Get(), result.data(), ddim.size()); - result.resize(ddim.size()); - return result; -} - -TEST_API int64_t product(const DDim& ddim); - -bool contain_unknown_dim(const DDim& ddim); - -/** - * \brief Slice a ddim - * - * Slice dim with [begin, end). - * e.g. DDim d = make_ddim({1,2,3,4,5}); - * slice_ddim(d, 1, 3); ====> {2,3} - */ -DDim slice_ddim(const DDim& dim, int begin, int end); - -/** - * \brief What is the length of this dimension? - * - * \param Dynamic dimension to inspect - */ - -int arity(const DDim& ddim); - -std::ostream& operator<<(std::ostream&, const DDim&); - -/** - * \brief Flatten dim to 3d - * e.g., DDim d = mak_ddim({1, 2, 3, 4, 5, 6}) - * flatten_to_3d(d, 2, 4); ===> {1*2, 3*4, 5*6} ===> {2, 12, 30} - */ -DDim flatten_to_3d(const DDim& src, int num_row_dims, int num_col_dims); - -// Reshape a tensor to a matrix. The matrix's first dimension(column length) -// will be the product of tensor's first `num_col_dims` dimensions. -DDim flatten_to_2d(const DDim& src, int num_col_dims); - -DDim flatten_to_1d(const DDim& src); - -DDim stride(const DDim& ddim); - -DDim stride_numel(const DDim& ddim); -} // namespace phi - -namespace paddle { -namespace framework { - -using DDim = phi::DDim; - -} // namespace framework -} // namespace paddle - -namespace std { -template <> -struct hash { - std::size_t operator()(phi::DDim const& ddim) const; -}; -} // namespace std diff --git a/paddle/phi/core/dense_tensor.h b/paddle/phi/core/dense_tensor.h index 3cbbfde38fe9a7..bcc2b07a89e3a3 100644 --- a/paddle/phi/core/dense_tensor.h +++ b/paddle/phi/core/dense_tensor.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include "paddle/phi/core/allocator.h" +#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/storage_properties.h" #include "paddle/phi/core/stream.h" #include "paddle/phi/core/tensor_base.h" diff --git a/paddle/phi/core/distributed/auto_parallel/dist_tensor.cc b/paddle/phi/core/distributed/auto_parallel/dist_tensor.cc index f4d6be6c779b5e..99161488b54af8 100644 --- a/paddle/phi/core/distributed/auto_parallel/dist_tensor.cc +++ b/paddle/phi/core/distributed/auto_parallel/dist_tensor.cc @@ -45,7 +45,8 @@ DistTensor::DistTensor(const std::shared_ptr& global_value, if (!dist_attr.is_replicated()) { value_ = std::make_shared(); // 1. create replicated global tensor - TensorDistAttr replicated_dist_attr(vectorize(global_value->dims())); + TensorDistAttr replicated_dist_attr( + common::vectorize(global_value->dims())); replicated_dist_attr.set_process_mesh(dist_attr.process_mesh()); DistTensor replicated_tensor(global_value, replicated_dist_attr); @@ -80,7 +81,7 @@ DistTensor::DistTensor(const std::shared_ptr& global_value, } idx++; } - TensorDistAttr dist_attr(vectorize(dist_tensor_meta_.dims())); + TensorDistAttr dist_attr(common::vectorize(dist_tensor_meta_.dims())); dist_attr.set_process_mesh(dist_tensor_meta_.process_mesh()); dist_attr.set_dims_mapping(dist_tensor_meta_.dim_mapping()); dist_attr.set_partial_status(partial_dims); @@ -94,7 +95,8 @@ DistTensor::DistTensor(const std::shared_ptr& global_value, if (!dist_tensor_meta_.is_replicated()) { value_ = std::make_shared(); // 1. create replicated global tensor - TensorDistAttr replicated_dist_attr(vectorize(global_value->dims())); + TensorDistAttr replicated_dist_attr( + common::vectorize(global_value->dims())); replicated_dist_attr.set_process_mesh(process_mesh); DistTensor replicated_tensor(global_value, replicated_dist_attr); diff --git a/paddle/phi/core/distributed/auto_parallel/inferspmd_utils.h b/paddle/phi/core/distributed/auto_parallel/inferspmd_utils.h index 2d444decf640ab..71395507a09519 100644 --- a/paddle/phi/core/distributed/auto_parallel/inferspmd_utils.h +++ b/paddle/phi/core/distributed/auto_parallel/inferspmd_utils.h @@ -19,6 +19,7 @@ limitations under the License. */ #include #include +#include "paddle/common/macros.h" #include "paddle/phi/common/int_array.h" #include "paddle/phi/common/scalar.h" #include "paddle/phi/core/attribute.h" @@ -26,7 +27,6 @@ limitations under the License. */ #include "paddle/phi/core/distributed/auto_parallel/dist_meta_tensor.h" #include "paddle/phi/core/distributed/type_defs.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/macros.h" #include "paddle/phi/core/type_defs.h" #include "paddle/utils/any.h" #include "paddle/utils/flat_hash_map.h" diff --git a/paddle/phi/core/distributed/auto_parallel/placement_types.h b/paddle/phi/core/distributed/auto_parallel/placement_types.h index 08e128d9c6f379..ca92eb8003d64a 100644 --- a/paddle/phi/core/distributed/auto_parallel/placement_types.h +++ b/paddle/phi/core/distributed/auto_parallel/placement_types.h @@ -24,10 +24,10 @@ #include #include +#include "paddle/common/errors.h" #include "paddle/phi/common/reduce_type.h" #include "paddle/phi/core/distributed/auto_parallel/process_mesh.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/core/tensor_meta.h" namespace phi { diff --git a/paddle/phi/core/distributed/auto_parallel/reshard/nd_mesh_reshard_function.cc b/paddle/phi/core/distributed/auto_parallel/reshard/nd_mesh_reshard_function.cc index 778cf72e27a612..7c0f9017366338 100644 --- a/paddle/phi/core/distributed/auto_parallel/reshard/nd_mesh_reshard_function.cc +++ b/paddle/phi/core/distributed/auto_parallel/reshard/nd_mesh_reshard_function.cc @@ -120,13 +120,13 @@ void SameNdMeshReshardFunction::Eval(phi::DeviceContext* dev_ctx, ProcessMesh sub_mesh = GetSubProcessMesh(process_mesh, kv.first); // 1.3 Calculate the input one dim dist attr - TensorDistAttr in_one_dim_dist_attr(vectorize(in.dims())); + TensorDistAttr in_one_dim_dist_attr(common::vectorize(in.dims())); in_one_dim_dist_attr.set_process_mesh(sub_mesh); in_one_dim_dist_attr.set_partial_status(std::vector{0}, kv.second); // 1.4 Calculate the output one dim dist attr - TensorDistAttr out_one_dim_dist_attr(vectorize(in.dims())); + TensorDistAttr out_one_dim_dist_attr(common::vectorize(in.dims())); out_one_dim_dist_attr.set_process_mesh(sub_mesh); // 1.5 Change from partial to replicated @@ -158,7 +158,7 @@ void SameNdMeshReshardFunction::Eval(phi::DeviceContext* dev_ctx, ProcessMesh sub_mesh = GetSubProcessMesh(process_mesh, in_mesh_axis); // 2.3 Calculate the input one dim dist attr - TensorDistAttr in_one_dim_dist_attr(vectorize(in.dims())); + TensorDistAttr in_one_dim_dist_attr(common::vectorize(in.dims())); in_one_dim_dist_attr.set_process_mesh(sub_mesh); std::vector in_one_dims_mapping = in_one_dim_dist_attr.dims_mapping(); @@ -166,7 +166,7 @@ void SameNdMeshReshardFunction::Eval(phi::DeviceContext* dev_ctx, in_one_dim_dist_attr.set_dims_mapping(in_one_dims_mapping); // 2.4 Calculate the output one dim dist attr - TensorDistAttr out_one_dim_dist_attr(vectorize(in.dims())); + TensorDistAttr out_one_dim_dist_attr(common::vectorize(in.dims())); out_one_dim_dist_attr.set_process_mesh(sub_mesh); // 2.5 Change from shard to replicated @@ -198,11 +198,11 @@ void SameNdMeshReshardFunction::Eval(phi::DeviceContext* dev_ctx, ProcessMesh sub_mesh = GetSubProcessMesh(process_mesh, kv.first); // 3.3 Calculate the input one dim dist attr - TensorDistAttr in_one_dim_dist_attr(vectorize(in.dims())); + TensorDistAttr in_one_dim_dist_attr(common::vectorize(in.dims())); in_one_dim_dist_attr.set_process_mesh(sub_mesh); // 3.4 Calculate the output one dim dist attr - TensorDistAttr out_one_dim_dist_attr(vectorize(in.dims())); + TensorDistAttr out_one_dim_dist_attr(common::vectorize(in.dims())); out_one_dim_dist_attr.set_process_mesh(sub_mesh); out_one_dim_dist_attr.set_partial_status(std::vector{0}); @@ -238,11 +238,11 @@ void SameNdMeshReshardFunction::Eval(phi::DeviceContext* dev_ctx, ProcessMesh sub_mesh = GetSubProcessMesh(process_mesh, out_mesh_axis); // 4.3 Calculate the input one dim dist attr - TensorDistAttr in_one_dim_dist_attr(vectorize(in.dims())); + TensorDistAttr in_one_dim_dist_attr(common::vectorize(in.dims())); in_one_dim_dist_attr.set_process_mesh(sub_mesh); // 4.4 Calculate the output one dim dist attr - TensorDistAttr out_one_dim_dist_attr(vectorize(in.dims())); + TensorDistAttr out_one_dim_dist_attr(common::vectorize(in.dims())); out_one_dim_dist_attr.set_process_mesh(sub_mesh); std::vector out_one_dims_mapping = out_one_dim_dist_attr.dims_mapping(); diff --git a/paddle/phi/core/distributed/auto_parallel/reshard/p_to_s_reshard_function.cc b/paddle/phi/core/distributed/auto_parallel/reshard/p_to_s_reshard_function.cc index dcb9096544b3a5..07b047db612507 100644 --- a/paddle/phi/core/distributed/auto_parallel/reshard/p_to_s_reshard_function.cc +++ b/paddle/phi/core/distributed/auto_parallel/reshard/p_to_s_reshard_function.cc @@ -58,7 +58,7 @@ void PToSReshardFunction::Eval(DeviceContext* dev_ctx, DenseTensor in_reduce_scatter = in.value(); std::vector axis; if (out_split_axis != 0) { - for (size_t i = 0; i < vectorize(logical_ddim).size(); ++i) { + for (size_t i = 0; i < common::vectorize(logical_ddim).size(); ++i) { axis.emplace_back(i); } std::swap(axis[0], axis[out_split_axis]); diff --git a/paddle/phi/core/distributed/auto_parallel/reshard/reshard_function.cc b/paddle/phi/core/distributed/auto_parallel/reshard/reshard_function.cc index 3f766eb8d6acd7..3669e09890ba8a 100644 --- a/paddle/phi/core/distributed/auto_parallel/reshard/reshard_function.cc +++ b/paddle/phi/core/distributed/auto_parallel/reshard/reshard_function.cc @@ -43,7 +43,7 @@ void ReshardFunction::SetValue(DistTensor* tensor, const DenseTensor& value) { void ReshardFunction::SetDistProps(DistTensor* tensor, const DDim& dims, const TensorDistAttr& dist_attr) { - PADDLE_ENFORCE_EQ(dist_attr.verify(vectorize(dims)), + PADDLE_ENFORCE_EQ(dist_attr.verify(common::vectorize(dims)), true, phi::errors::InvalidArgument( "The input dist_attr [%s] and dims [%s] are improper.", @@ -56,7 +56,7 @@ void ReshardFunction::SetDistProps(DistTensor* tensor, void ReshardFunction::SetDistProps(DistTensor* tensor, const TensorDistAttr& dist_attr) { - PADDLE_ENFORCE_EQ(dist_attr.verify(vectorize(tensor->dims())), + PADDLE_ENFORCE_EQ(dist_attr.verify(common::vectorize(tensor->dims())), true, phi::errors::InvalidArgument( "The input dist_attr and dims are improper.")); diff --git a/paddle/phi/core/distributed/auto_parallel/reshard/s_to_s_reshard_function.cc b/paddle/phi/core/distributed/auto_parallel/reshard/s_to_s_reshard_function.cc index 931d3d8bc1d89a..fa1c78c3160e0b 100644 --- a/paddle/phi/core/distributed/auto_parallel/reshard/s_to_s_reshard_function.cc +++ b/paddle/phi/core/distributed/auto_parallel/reshard/s_to_s_reshard_function.cc @@ -62,7 +62,7 @@ void SToSReshardFunction::Eval(phi::DeviceContext* dev_ctx, // 1. preprocess, reshape and transpose the input tensor if (out_split_axis != 0) { // 1.1 calc the shape and reshape - std::vector pre_shape_vec = vectorize(logical_ddim); + std::vector pre_shape_vec = common::vectorize(logical_ddim); pre_shape_vec[in_split_axis] /= nranks; pre_shape_vec[out_split_axis] /= nranks; pre_shape_vec.insert(pre_shape_vec.begin() + out_split_axis, nranks); @@ -102,7 +102,7 @@ void SToSReshardFunction::Eval(phi::DeviceContext* dev_ctx, // 3. postprocess, reshape and transpose the output tensor if (in_split_axis != 0) { // 3.1 calc the shape and reshape - std::vector post_shape_vec = vectorize(logical_ddim); + std::vector post_shape_vec = common::vectorize(logical_ddim); post_shape_vec[in_split_axis] /= nranks; post_shape_vec[out_split_axis] /= nranks; post_shape_vec.insert(post_shape_vec.begin(), nranks); diff --git a/paddle/phi/core/distributed/bkcl_comm_context.cc b/paddle/phi/core/distributed/bkcl_comm_context.cc index 2f5fe0eb3ccbe4..bb301661ab8367 100644 --- a/paddle/phi/core/distributed/bkcl_comm_context.cc +++ b/paddle/phi/core/distributed/bkcl_comm_context.cc @@ -129,7 +129,7 @@ void BKCLCommContext::Recv(phi::DenseTensor* out_tensor, ToBKCLDataType(out_tensor->dtype()), stream)); VLOG(3) << "rank " << GetRank() << " recv " - << phi::product(out_tensor->dims()) << " from " << peer; + << common::product(out_tensor->dims()) << " from " << peer; } void BKCLCommContext::AllReduce(phi::DenseTensor* out_tensor, diff --git a/paddle/phi/core/distributed/check/nccl_dynamic_check.cc b/paddle/phi/core/distributed/check/nccl_dynamic_check.cc index 57bdf12bce7992..9307af45bd622b 100644 --- a/paddle/phi/core/distributed/check/nccl_dynamic_check.cc +++ b/paddle/phi/core/distributed/check/nccl_dynamic_check.cc @@ -16,9 +16,9 @@ #include "glog/logging.h" +#include "paddle/common/errors.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #if defined(PADDLE_WITH_RCCL) #include diff --git a/paddle/phi/core/distributed/check/static_check.cc b/paddle/phi/core/distributed/check/static_check.cc index 8ec3e19e6038ea..25cdc8d01262e5 100644 --- a/paddle/phi/core/distributed/check/static_check.cc +++ b/paddle/phi/core/distributed/check/static_check.cc @@ -17,9 +17,9 @@ #include #include +#include "paddle/common/errors.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" namespace phi { namespace distributed { diff --git a/paddle/phi/core/distributed/comm_context.h b/paddle/phi/core/distributed/comm_context.h index 173ff6f8673d48..49c9a9238cf42c 100644 --- a/paddle/phi/core/distributed/comm_context.h +++ b/paddle/phi/core/distributed/comm_context.h @@ -14,7 +14,7 @@ #pragma once -#include "paddle/phi/core/macros.h" +#include "paddle/common/macros.h" namespace phi { namespace distributed { diff --git a/paddle/phi/core/distributed/comm_context_manager.h b/paddle/phi/core/distributed/comm_context_manager.h index cc6eff70c71ba9..8c4d802294986f 100644 --- a/paddle/phi/core/distributed/comm_context_manager.h +++ b/paddle/phi/core/distributed/comm_context_manager.h @@ -20,9 +20,9 @@ #include #include +#include "paddle/common/macros.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/distributed/comm_context.h" -#include "paddle/phi/core/macros.h" #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) #include "paddle/phi/backends/gpu/forwards.h" diff --git a/paddle/phi/core/distributed/comm_task.h b/paddle/phi/core/distributed/comm_task.h index 05560eb67dafce..47ba01b980479a 100644 --- a/paddle/phi/core/distributed/comm_task.h +++ b/paddle/phi/core/distributed/comm_task.h @@ -18,9 +18,9 @@ #include #include #include +#include "paddle/common/macros.h" #include "paddle/phi/core/distributed/utils.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/macros.h" #if defined(PADDLE_WITH_RCCL) #include "paddle/phi/backends/dynload/rccl.h" diff --git a/paddle/phi/core/distributed/comm_task_manager.h b/paddle/phi/core/distributed/comm_task_manager.h index bb739d5c6afdb8..cda83f54d5bef2 100644 --- a/paddle/phi/core/distributed/comm_task_manager.h +++ b/paddle/phi/core/distributed/comm_task_manager.h @@ -23,9 +23,9 @@ #include #include +#include "paddle/common/macros.h" #include "paddle/phi/core/distributed/comm_context.h" #include "paddle/phi/core/distributed/comm_task.h" -#include "paddle/phi/core/macros.h" namespace phi { namespace distributed { diff --git a/paddle/phi/core/distributed/gloo_comm_context.h b/paddle/phi/core/distributed/gloo_comm_context.h index c29935c061ef72..1fdbad11e848b3 100644 --- a/paddle/phi/core/distributed/gloo_comm_context.h +++ b/paddle/phi/core/distributed/gloo_comm_context.h @@ -19,8 +19,8 @@ #include +#include "paddle/common/macros.h" #include "paddle/phi/core/distributed/comm_context.h" -#include "paddle/phi/core/macros.h" namespace phi { class DenseTensor; diff --git a/paddle/phi/core/distributed/gloo_utils.cc b/paddle/phi/core/distributed/gloo_utils.cc index 312681384a1996..55d4689ca3df80 100644 --- a/paddle/phi/core/distributed/gloo_utils.cc +++ b/paddle/phi/core/distributed/gloo_utils.cc @@ -26,10 +26,10 @@ #include #include +#include "paddle/common/errors.h" #include "paddle/phi/core/distributed/gloo_utils.h" #include "paddle/phi/core/distributed/store/tcp_utils.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" namespace phi { namespace distributed { diff --git a/paddle/phi/core/distributed/nccl_comm_context.cc b/paddle/phi/core/distributed/nccl_comm_context.cc index d1d92c98fb0fd6..8da676e74d911a 100644 --- a/paddle/phi/core/distributed/nccl_comm_context.cc +++ b/paddle/phi/core/distributed/nccl_comm_context.cc @@ -147,8 +147,8 @@ void NCCLCommContext::Send(const phi::DenseTensor& in_tensor, peer, nccl_comm_, stream)); - VLOG(3) << "rank " << GetRank() << " send " << phi::product(in_tensor.dims()) - << " to " << peer; + VLOG(3) << "rank " << GetRank() << " send " + << common::product(in_tensor.dims()) << " to " << peer; } void NCCLCommContext::Recv(phi::DenseTensor* out_tensor, @@ -167,7 +167,7 @@ void NCCLCommContext::Recv(phi::DenseTensor* out_tensor, nccl_comm_, stream)); VLOG(3) << "rank " << GetRank() << " recv " - << phi::product(out_tensor->dims()) << " from " << peer; + << common::product(out_tensor->dims()) << " from " << peer; } void NCCLCommContext::AllReduce(phi::DenseTensor* out_tensor, diff --git a/paddle/phi/core/distributed/nccl_comm_context.h b/paddle/phi/core/distributed/nccl_comm_context.h index b9fdce02f4b5f0..609b5e0defe079 100644 --- a/paddle/phi/core/distributed/nccl_comm_context.h +++ b/paddle/phi/core/distributed/nccl_comm_context.h @@ -22,10 +22,10 @@ #include #endif +#include "paddle/common/macros.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_decls.h" #include "paddle/phi/core/distributed/comm_context.h" -#include "paddle/phi/core/macros.h" #if defined(PADDLE_WITH_RCCL) #include "paddle/phi/backends/dynload/rccl.h" diff --git a/paddle/phi/core/distributed/nccl_comm_task.h b/paddle/phi/core/distributed/nccl_comm_task.h index f9a8f3c2509220..fca9004cf0b2d4 100644 --- a/paddle/phi/core/distributed/nccl_comm_task.h +++ b/paddle/phi/core/distributed/nccl_comm_task.h @@ -13,11 +13,11 @@ // limitations under the License. #pragma once +#include "paddle/common/macros.h" #include "paddle/phi/backends/gpu/gpu_decls.h" #include "paddle/phi/core/distributed/comm_context.h" #include "paddle/phi/core/distributed/comm_task.h" #include "paddle/phi/core/distributed/utils.h" -#include "paddle/phi/core/macros.h" #if defined(PADDLE_WITH_RCCL) #include "paddle/phi/backends/dynload/rccl.h" diff --git a/paddle/phi/core/distributed/nccl_tools.cc b/paddle/phi/core/distributed/nccl_tools.cc index e419cfca905fa5..a5388796d1f45b 100644 --- a/paddle/phi/core/distributed/nccl_tools.cc +++ b/paddle/phi/core/distributed/nccl_tools.cc @@ -16,8 +16,8 @@ #include +#include "paddle/common/errors.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #if NCCL_VERSION_CODE >= 21300 #define ENABLE_NCCL_GET_LAST_ERROR diff --git a/paddle/phi/core/distributed/store/CMakeLists.txt b/paddle/phi/core/distributed/store/CMakeLists.txt index 3b62a1367eea9e..c22b793e000f95 100644 --- a/paddle/phi/core/distributed/store/CMakeLists.txt +++ b/paddle/phi/core/distributed/store/CMakeLists.txt @@ -11,5 +11,5 @@ if(NOT WIN32) cc_test( test_c_tcp_store SRCS test_tcp_store.cc - DEPS phi) + DEPS phi common) endif() diff --git a/paddle/phi/core/distributed/xccl_comm_context.cc b/paddle/phi/core/distributed/xccl_comm_context.cc index 5c82e7baf0e82f..ba7e24ab06b9e1 100644 --- a/paddle/phi/core/distributed/xccl_comm_context.cc +++ b/paddle/phi/core/distributed/xccl_comm_context.cc @@ -122,8 +122,8 @@ void XCCLCommContext::Send(const phi::DenseTensor& in_tensor, peer, xccl_comm_, stream); - VLOG(3) << "rank " << GetRank() << " send " << phi::product(in_tensor.dims()) - << " to " << peer; + VLOG(3) << "rank " << GetRank() << " send " + << common::product(in_tensor.dims()) << " to " << peer; } void XCCLCommContext::Recv(phi::DenseTensor* out_tensor, @@ -140,7 +140,7 @@ void XCCLCommContext::Recv(phi::DenseTensor* out_tensor, xccl_comm_, stream); VLOG(3) << "rank " << GetRank() << " recv " - << phi::product(out_tensor->dims()) << " from " << peer; + << common::product(out_tensor->dims()) << " from " << peer; } void XCCLCommContext::AllReduce(phi::DenseTensor* out_tensor, diff --git a/paddle/phi/core/distributed/xccl_comm_context.h b/paddle/phi/core/distributed/xccl_comm_context.h index 86f8dfc76a1eb3..0c253eb925bb4d 100644 --- a/paddle/phi/core/distributed/xccl_comm_context.h +++ b/paddle/phi/core/distributed/xccl_comm_context.h @@ -13,8 +13,8 @@ // limitations under the License. #pragma once +#include "paddle/common/macros.h" #include "paddle/phi/core/distributed/comm_context.h" -#include "paddle/phi/core/macros.h" #include "paddle/phi/backends/device_manager.h" diff --git a/paddle/phi/core/enforce.h b/paddle/phi/core/enforce.h index aa68dd802c0b4c..61e502951f24ee 100644 --- a/paddle/phi/core/enforce.h +++ b/paddle/phi/core/enforce.h @@ -11,19 +11,7 @@ limitations under the License. */ #pragma once -#ifdef __GNUC__ -#include // for __cxa_demangle -#endif // __GNUC__ - -#if !defined(_WIN32) -#include // dladdr -#include // sleep, usleep -#else // _WIN32 -#ifndef NOMINMAX -#define NOMINMAX // msvc max/min macro conflict with std::min/max -#endif -#include // GetModuleFileName, Sleep -#endif +#include "paddle/common/enforce.h" #ifdef PADDLE_WITH_CUDA #include @@ -51,18 +39,11 @@ limitations under the License. */ #include #include #include -#include "paddle/phi/core/macros.h" +#include "paddle/common/macros.h" #if !defined(_WIN32) && !defined(PADDLE_WITH_MUSL) #include #endif -#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h -#include "paddle/phi/core/errors.h" - -#include "paddle/utils/string/printf.h" -#include "paddle/utils/string/to_string.h" -#include "paddle/utils/test_macros.h" - #ifdef PADDLE_WITH_CUDA #include "paddle/phi/backends/dynload/cublas.h" #include "paddle/phi/backends/dynload/cudnn.h" @@ -70,7 +51,6 @@ limitations under the License. */ #include "paddle/phi/backends/dynload/cusolver.h" #if !defined(__APPLE__) && defined(PADDLE_WITH_NCCL) #include - #include "paddle/phi/backends/dynload/nccl.h" #endif // __APPLE__ #endif // PADDLE_WITH_CUDA @@ -82,7 +62,6 @@ limitations under the License. */ #include "paddle/phi/backends/dynload/rocblas.h" #if !defined(__APPLE__) && defined(PADDLE_WITH_RCCL) #include // NOLINT - #include "paddle/phi/backends/dynload/rccl.h" #endif // __APPLE__ #endif // PADDLE_WITH_HIP @@ -97,56 +76,9 @@ limitations under the License. */ #include "xpu/bkcl.h" #endif -#include "paddle/utils/variant.h" - -namespace phi { -class ErrorSummary; -} // namespace phi - -namespace phi { -namespace proto {} // namespace proto -} // namespace phi - namespace phi { namespace enforce { -/** HELPER MACROS AND FUNCTIONS **/ -#ifndef PADDLE_MAY_THROW -#define PADDLE_MAY_THROW noexcept(false) -#endif - -// Because most enforce conditions would evaluate to true, we can use -// __builtin_expect to instruct the C++ compiler to generate code that -// always forces branch prediction of true. -// This generates faster binary code. __builtin_expect is since C++11. -// For more details, please check https://stackoverflow.com/a/43870188/724872. -#if !defined(_WIN32) -#define UNLIKELY(condition) __builtin_expect(static_cast(condition), 0) -#else -// there is no equivalent intrinsics in msvc. -#define UNLIKELY(condition) (condition) -#endif - -#if !defined(_WIN32) -#define LIKELY(condition) __builtin_expect(static_cast(condition), 1) -#else -// there is no equivalent intrinsics in msvc. -#define LIKELY(condition) (condition) -#endif - -#if defined _WIN32 && defined PADDLE_ON_INFERENCE && defined PADDLE_NO_PYTHON -#define HANDLE_THE_ERROR try { -#define END_HANDLE_THE_ERROR \ - } \ - catch (const std::exception& e) { \ - std::cout << e.what() << std::endl; \ - throw; \ - } -#else -#define HANDLE_THE_ERROR -#define END_HANDLE_THE_ERROR -#endif - #ifdef __GNUC__ inline std::string demangle(std::string name) { int status = -4; // some arbitrary value to eliminate the compiler warning @@ -318,7 +250,7 @@ struct EnforceNotMet : public std::exception { simple_err_str_ = SimplifyErrorTypeFormat(err_str_); } - EnforceNotMet(const phi::ErrorSummary& error, const char* file, int line) + EnforceNotMet(const common::ErrorSummary& error, const char* file, int line) : code_(error.code()), err_str_(GetTraceBackString(error.to_string(), file, line)) { simple_err_str_ = SimplifyErrorTypeFormat(err_str_); @@ -332,7 +264,7 @@ struct EnforceNotMet : public std::exception { } } - phi::ErrorCode code() const { return code_; } + common::ErrorCode code() const { return code_; } const std::string& error_str() const { return err_str_; } @@ -350,7 +282,7 @@ struct EnforceNotMet : public std::exception { private: // Used to determine the final type of exception thrown - phi::ErrorCode code_ = phi::ErrorCode::LEGACY; + common::ErrorCode code_ = common::ErrorCode::LEGACY; // Complete error message // e.g. InvalidArgumentError: *** std::string err_str_; diff --git a/paddle/phi/core/errors.h b/paddle/phi/core/errors.h deleted file mode 100644 index 1dd5cbcaaf6c74..00000000000000 --- a/paddle/phi/core/errors.h +++ /dev/null @@ -1,147 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include -#include -#include -#include - -#include "paddle/utils/string/printf.h" -#include "paddle/utils/test_macros.h" - -namespace phi { -enum ErrorCode { - // Legacy error. - // Error type string: "Error" - LEGACY = 0, - - // Client specified an invalid argument. - // Error type string: "InvalidArgumentError" - INVALID_ARGUMENT = 1, - - // Some requested entity (e.g., file or directory) was not found. - // Error type string: "NotFoundError" - NOT_FOUND = 2, - - // Operation tried to iterate past the valid input range. E.g., seeking or - // reading past end of file. - // Error type string: "OutOfRangeError" - OUT_OF_RANGE = 3, - - // Some entity that we attempted to create (e.g., file or directory) - // already exists. - // Error type string: "AlreadyExistsError" - ALREADY_EXISTS = 4, - - // Some resource has been exhausted, perhaps a per-user quota, or - // perhaps the entire file system is out of space. - // Error type string: "ResourceExhaustedError" - RESOURCE_EXHAUSTED = 5, - - // Operation was rejected because the system is not in a state - // required for the operation's execution. - // Error type string: "PreconditionNotMetError" - PRECONDITION_NOT_MET = 6, - - // The caller does not have permission to execute the specified - // operation. - // Error type string: "PermissionDeniedError" - PERMISSION_DENIED = 7, - - // Deadline expired before operation could complete. - // Error type string: "ExecutionTimeout" - EXECUTION_TIMEOUT = 8, - - // Operation is not implemented or not supported/enabled in this service. - // Error type string: "UnimplementedError" - UNIMPLEMENTED = 9, - - // The service is currently unavailable. This is a most likely a - // transient condition and may be corrected by retrying with - // a backoff. - // Error type string: "UnavailableError" - UNAVAILABLE = 10, - - // Fatal errors. Means some invariant expected by the underlying - // system has been broken. If you see one of these errors, - // something is very broken. - // Error type string: "FatalError" - FATAL = 11, - - // Third-party library error. - // Error type string: "ExternalError" - EXTERNAL = 12, -}; - -class ErrorSummary { - public: - // Note(chenweihang): Final deprecated constructor - // This constructor is used to be compatible with - // current existing untyped PADDLE_ENFORCE_* - // PADDLE_ENFORCE - // Note(chenweihang): Windows openblas need this - // constructor for compiling PADDLE_ENFORCE in *.cu, - // this is a bug cause we can't remove this - // constructor now. - template - explicit ErrorSummary(Args... args) { - code_ = phi::ErrorCode::LEGACY; - msg_ = paddle::string::Sprintf(args...); - } - - // Note(chenweihang): Only recommended constructor - // No longer supports PADDLE_ENFORCE without type or without error message - explicit ErrorSummary(ErrorCode code, std::string msg) - : code_(code), msg_(msg) {} - - ErrorCode code() const { return code_; } - - const std::string& error_message() const { return msg_; } - - TEST_API std::string to_string() const; - - private: - ErrorCode code_; - std::string msg_; -}; - -namespace errors { - -#define REGISTER_ERROR(FUNC, CONST, ...) \ - template \ - ::phi::ErrorSummary FUNC(Args... args) { \ - return ::phi::ErrorSummary(::phi::CONST, \ - ::paddle::string::Sprintf(args...)); \ - } - -REGISTER_ERROR(InvalidArgument, ErrorCode::INVALID_ARGUMENT) -REGISTER_ERROR(NotFound, ErrorCode::NOT_FOUND) -REGISTER_ERROR(OutOfRange, ErrorCode::OUT_OF_RANGE) -REGISTER_ERROR(AlreadyExists, ErrorCode::ALREADY_EXISTS) -REGISTER_ERROR(ResourceExhausted, ErrorCode::RESOURCE_EXHAUSTED) -REGISTER_ERROR(PreconditionNotMet, ErrorCode::PRECONDITION_NOT_MET) -REGISTER_ERROR(PermissionDenied, ErrorCode::PERMISSION_DENIED) -REGISTER_ERROR(ExecutionTimeout, ErrorCode::EXECUTION_TIMEOUT) -REGISTER_ERROR(Unimplemented, ErrorCode::UNIMPLEMENTED) -REGISTER_ERROR(Unavailable, ErrorCode::UNAVAILABLE) -REGISTER_ERROR(Fatal, ErrorCode::FATAL) -REGISTER_ERROR(External, ErrorCode::EXTERNAL) - -#undef REGISTER_ERROR - -} // namespace errors -} // namespace phi diff --git a/paddle/phi/core/extended_tensor.cc b/paddle/phi/core/extended_tensor.cc index 31d0fb25c88c1d..03609316b840cd 100644 --- a/paddle/phi/core/extended_tensor.cc +++ b/paddle/phi/core/extended_tensor.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/phi/core/extended_tensor.h" +#include "paddle/phi/core/enforce.h" namespace phi { diff --git a/paddle/phi/core/flags.h b/paddle/phi/core/flags.h index 7c905ec6c64914..9a5ff40596e7d8 100644 --- a/paddle/phi/core/flags.h +++ b/paddle/phi/core/flags.h @@ -20,7 +20,7 @@ #include #include -#include "paddle/phi/core/macros.h" +#include "paddle/common/macros.h" #include "paddle/utils/flags.h" #include "paddle/utils/test_macros.h" #include "paddle/utils/variant.h" diff --git a/paddle/phi/core/infermeta_utils.h b/paddle/phi/core/infermeta_utils.h index bc6ef528d3ba93..06036b2c138940 100644 --- a/paddle/phi/core/infermeta_utils.h +++ b/paddle/phi/core/infermeta_utils.h @@ -19,11 +19,11 @@ limitations under the License. */ #include #include +#include "paddle/common/macros.h" #include "paddle/phi/common/int_array.h" #include "paddle/phi/common/scalar.h" #include "paddle/phi/core/attribute.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/macros.h" #include "paddle/phi/core/meta_tensor.h" #include "paddle/phi/core/type_defs.h" #include "paddle/utils/any.h" diff --git a/paddle/phi/core/kernel_factory.cc b/paddle/phi/core/kernel_factory.cc index 69c7900def16ba..a5c5a3994a81b1 100644 --- a/paddle/phi/core/kernel_factory.cc +++ b/paddle/phi/core/kernel_factory.cc @@ -538,7 +538,7 @@ std::string KernelSelectionErrorMessage(const std::string& kernel_name, backend_set.insert( paddle::experimental::BackendToString(kernel_key.backend())); all_kernel_key[paddle::experimental::BackendToString(kernel_key.backend()) + - ", " + phi::DataLayoutToString(kernel_key.layout())] + ", " + common::DataLayoutToString(kernel_key.layout())] .push_back(DataTypeToString(kernel_key.dtype())); } // 1. If target_key not supports target backend, output "Selected wrong diff --git a/paddle/phi/core/kernel_factory.h b/paddle/phi/core/kernel_factory.h index 9e3c67fa9ad35b..c30d883ee6c462 100644 --- a/paddle/phi/core/kernel_factory.h +++ b/paddle/phi/core/kernel_factory.h @@ -18,9 +18,9 @@ #include #include #include +#include "paddle/common/layout.h" #include "paddle/phi/common/backend.h" #include "paddle/phi/common/data_type.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/core/compat/convert_utils.h" #include "paddle/phi/core/compat/get_kerneltype_forvar_utils.h" #include "paddle/phi/core/type_defs.h" diff --git a/paddle/phi/core/macros.h b/paddle/phi/core/macros.h deleted file mode 100644 index 6245d94c97cb10..00000000000000 --- a/paddle/phi/core/macros.h +++ /dev/null @@ -1,67 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -namespace phi { - -// Disable the copy and assignment operator for a class. - -#define DISABLE_COPY_AND_ASSIGN(classname) \ - private: \ - classname(const classname&) = delete; \ - classname(classname&&) = delete; \ - classname& operator=(const classname&) = delete; \ - classname& operator=(classname&&) = delete - -#define PD_STATIC_ASSERT_GLOBAL_NAMESPACE(uniq_name, msg) \ - _PD_STATIC_ASSERT_GLOBAL_NAMESPACE(uniq_name, msg) - -#define _PD_STATIC_ASSERT_GLOBAL_NAMESPACE(uniq_name, msg) \ - struct __test_global_namespace_##uniq_name##__ {}; \ - static_assert(std::is_same<::__test_global_namespace_##uniq_name##__, \ - __test_global_namespace_##uniq_name##__>::value, \ - msg) - -#ifdef __COUNTER__ -#define PD_ID __COUNTER__ -#else -#define PD_ID __LINE__ -#endif - -#if defined(_WIN32) -#define UNUSED -#define __builtin_expect(EXP, C) (EXP) -#else -#define UNUSED __attribute__((unused)) -#endif - -#define PD_CONCATENATE(arg1, arg2) PD_CONCATENATE1(arg1, arg2) -#define PD_CONCATENATE1(arg1, arg2) PD_CONCATENATE2(arg1, arg2) -#define PD_CONCATENATE2(arg1, arg2) arg1##arg2 -#define PD_EXPAND(x) x - -#if defined(__NVCC__) || defined(__HIPCC__) -#define PADDLE_RESTRICT __restrict__ -#else -#define PADDLE_RESTRICT -#endif - -#ifndef PADDLE_WITH_MUSL -#if defined(__FLT_MAX__) -#define FLT_MAX __FLT_MAX__ -#endif // __FLT_MAX__ -#endif // PADDLE_WITH_MUSL - -} // namespace phi diff --git a/paddle/phi/core/meta_tensor.h b/paddle/phi/core/meta_tensor.h index 6990f95a4b68ac..b28081c8d4ef77 100644 --- a/paddle/phi/core/meta_tensor.h +++ b/paddle/phi/core/meta_tensor.h @@ -14,10 +14,10 @@ limitations under the License. */ #pragma once +#include "paddle/common/ddim.h" +#include "paddle/common/layout.h" +#include "paddle/common/macros.h" #include "paddle/phi/common/data_type.h" -#include "paddle/phi/common/layout.h" -#include "paddle/phi/core/ddim.h" -#include "paddle/phi/core/macros.h" #include "paddle/phi/core/tensor_base.h" #include "paddle/phi/core/tensor_meta.h" diff --git a/paddle/phi/core/mixed_vector.h b/paddle/phi/core/mixed_vector.h index d25a646608d3d2..251aa28232b8e8 100644 --- a/paddle/phi/core/mixed_vector.h +++ b/paddle/phi/core/mixed_vector.h @@ -22,10 +22,10 @@ limitations under the License. */ #include #include "glog/logging.h" +#include "paddle/common/errors.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/allocator.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #include "paddle/utils/none.h" #include "paddle/utils/optional.h" diff --git a/paddle/phi/core/scope_guard.h b/paddle/phi/core/scope_guard.h index 1c73133761dd67..5ad0cdb7e3a280 100644 --- a/paddle/phi/core/scope_guard.h +++ b/paddle/phi/core/scope_guard.h @@ -17,7 +17,7 @@ #include #include -#include "paddle/phi/core/macros.h" +#include "paddle/common/macros.h" namespace phi { diff --git a/paddle/phi/core/selected_rows_impl.h b/paddle/phi/core/selected_rows_impl.h index a29f66b99420ab..3647583e5038fd 100644 --- a/paddle/phi/core/selected_rows_impl.h +++ b/paddle/phi/core/selected_rows_impl.h @@ -21,8 +21,8 @@ limitations under the License. */ #include #include +#include "paddle/common/ddim.h" #include "paddle/phi/common/place.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/enforce.h" #include "paddle/phi/core/utils/rw_lock.h" @@ -142,9 +142,9 @@ class SelectedRowsImpl { * @brief Get complete Dims before */ phi::DDim GetCompleteDims() const { - std::vector dims = vectorize(value_->dims()); + std::vector dims = common::vectorize(value_->dims()); dims[0] = height_; - return phi::make_ddim(dims); + return common::make_ddim(dims); } /// \brief Returns the number of elements contained in tensor. diff --git a/paddle/phi/core/sparse_coo_tensor.cc b/paddle/phi/core/sparse_coo_tensor.cc index f8517fb0ff007e..95501527f51fd0 100644 --- a/paddle/phi/core/sparse_coo_tensor.cc +++ b/paddle/phi/core/sparse_coo_tensor.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/phi/core/sparse_coo_tensor.h" +#include "paddle/phi/core/enforce.h" namespace phi { @@ -112,7 +113,7 @@ void SparseCooTensor::Resize(const DDim& dense_dims, phi::errors::InvalidArgument( "the sparse_dim must be less than or equal dense_dims.")); - DDim indices_dims = phi::make_ddim({sparse_dim, non_zero_num}); + DDim indices_dims = common::make_ddim({sparse_dim, non_zero_num}); auto dense_dim = dense_dims.size() - sparse_dim; DDim values_dims; if (dense_dim) { @@ -121,9 +122,9 @@ void SparseCooTensor::Resize(const DDim& dense_dims, memcpy(&dense_dim_vec[1], dense_dims.Get() + sparse_dim, dense_dim * sizeof(dense_dims[0])); - values_dims = phi::make_ddim(dense_dim_vec); + values_dims = common::make_ddim(dense_dim_vec); } else { - values_dims = phi::make_ddim({non_zero_num}); + values_dims = common::make_ddim({non_zero_num}); } this->non_zero_indices_.Resize(indices_dims); diff --git a/paddle/phi/core/sparse_csr_tensor.cc b/paddle/phi/core/sparse_csr_tensor.cc index 0dc0807a36baf1..525f38cd8263db 100644 --- a/paddle/phi/core/sparse_csr_tensor.cc +++ b/paddle/phi/core/sparse_csr_tensor.cc @@ -111,10 +111,10 @@ void SparseCsrTensor::Resize(const DDim& dense_dims, crows_size = dense_dims[0] * (dense_dims[1] + 1); } - DDim crows_dims = phi::make_ddim({crows_size}); + DDim crows_dims = common::make_ddim({crows_size}); this->non_zero_crows_.Resize(crows_dims); - DDim col_dims = phi::make_ddim({non_zero_num}); + DDim col_dims = common::make_ddim({non_zero_num}); this->non_zero_cols_.Resize(col_dims); this->non_zero_elements_.Resize(col_dims); } diff --git a/paddle/phi/core/storage_properties.h b/paddle/phi/core/storage_properties.h index ac64875452bf8f..421f853872cb64 100644 --- a/paddle/phi/core/storage_properties.h +++ b/paddle/phi/core/storage_properties.h @@ -16,7 +16,7 @@ limitations under the License. */ #include -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" #include "paddle/phi/core/utils/type_registry.h" #ifdef PADDLE_WITH_DNNL diff --git a/paddle/phi/core/tensor_array.cc b/paddle/phi/core/tensor_array.cc index 1b5a1189483411..a1bcb23c4704b9 100644 --- a/paddle/phi/core/tensor_array.cc +++ b/paddle/phi/core/tensor_array.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/phi/core/tensor_array.h" +#include "paddle/phi/core/enforce.h" namespace phi { diff --git a/paddle/phi/core/tensor_base.h b/paddle/phi/core/tensor_base.h index 069382720e19de..99318f86cf42b6 100644 --- a/paddle/phi/core/tensor_base.h +++ b/paddle/phi/core/tensor_base.h @@ -14,12 +14,12 @@ limitations under the License. */ #pragma once +#include "paddle/common/ddim.h" +#include "paddle/common/layout.h" #include "paddle/phi/common/backend.h" #include "paddle/phi/common/data_type.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/allocator.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/utils/type_registry.h" namespace phi { diff --git a/paddle/phi/core/tensor_meta.cc b/paddle/phi/core/tensor_meta.cc index aa0fa712ad5af8..d44ab61a59cd98 100644 --- a/paddle/phi/core/tensor_meta.cc +++ b/paddle/phi/core/tensor_meta.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/phi/core/tensor_meta.h" -#include "paddle/pir/core/enforce.h" +#include "paddle/phi/core/enforce.h" namespace phi { diff --git a/paddle/phi/core/tensor_meta.h b/paddle/phi/core/tensor_meta.h index 176ef60cda7cda..77f2e2bebb4ec3 100644 --- a/paddle/phi/core/tensor_meta.h +++ b/paddle/phi/core/tensor_meta.h @@ -16,10 +16,10 @@ limitations under the License. */ #include +#include "paddle/common/ddim.h" +#include "paddle/common/layout.h" #include "paddle/phi/common/backend.h" #include "paddle/phi/common/data_type.h" -#include "paddle/phi/common/layout.h" -#include "paddle/phi/core/ddim.h" #include "paddle/utils/any.h" #include "paddle/utils/optional.h" #include "paddle/utils/test_macros.h" @@ -42,6 +42,7 @@ namespace phi { * 0 2 5 7 10 12 15 20 */ using LoD = std::vector>; +using DDim = phi::DDim; /// \brief The meta data of dense tensor. Take the structure type /// and use all default operations. diff --git a/paddle/phi/core/tensor_utils.cc b/paddle/phi/core/tensor_utils.cc index 26e675326593cb..17fdef1b9cfbdd 100644 --- a/paddle/phi/core/tensor_utils.cc +++ b/paddle/phi/core/tensor_utils.cc @@ -915,7 +915,7 @@ phi::DenseTensor ReshapeToMatrix(const phi::DenseTensor& src, } phi::DenseTensor res; res.ShareDataWith(src); - res.Resize(phi::flatten_to_2d(src.dims(), num_col_dims)); + res.Resize(common::flatten_to_2d(src.dims(), num_col_dims)); return res; } diff --git a/paddle/phi/core/threadpool.h b/paddle/phi/core/threadpool.h index b45991f9a7f825..318ec38d3c8c58 100644 --- a/paddle/phi/core/threadpool.h +++ b/paddle/phi/core/threadpool.h @@ -24,8 +24,8 @@ #include #include +#include "paddle/common/macros.h" // for DISABLE_COPY_AND_ASSIGN #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/macros.h" // for DISABLE_COPY_AND_ASSIGN namespace phi { diff --git a/paddle/phi/core/utils/array.h b/paddle/phi/core/utils/array.h deleted file mode 100644 index 44290b73737fb7..00000000000000 --- a/paddle/phi/core/utils/array.h +++ /dev/null @@ -1,142 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include - -#include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/utils/unroll_array_ops.h" - -namespace phi { - -template -class Array { - public: - static constexpr size_t kSize = N; - - HOSTDEVICE inline Array() {} - - template - HOSTDEVICE inline explicit Array(const T &val, Args... args) { - static_assert(N == sizeof...(Args) + 1, "Invalid argument"); - UnrollVarArgsAssign::Run(data_, val, args...); - } - - HOSTDEVICE inline void Fill(const T &val) { - UnrollFillConstant::Run(data_, val); - } - - HOSTDEVICE inline const T *Get() const { return data_; } - - HOSTDEVICE inline T *GetMutable() { return data_; } - - HOSTDEVICE inline T &operator[](size_t i) { return *advance(data_, i); } - - // Writing "return data_[i]" would cause compilation warning/error: - // "array subscript is above array bound" in Python 35 CI. - // It seems that it is a false warning of GCC if we do not check the bounds - // of array index. But for better performance, we do not check in operator[] - // like what is in STL. If users want to check the bounds, use at() instead - HOSTDEVICE inline const T &operator[](size_t i) const { - return *advance(data_, i); - } - - HOSTDEVICE inline T &at(size_t i) { -#if !defined(__CUDA_ARCH__) && !defined(__HIPCC__) - PADDLE_ENFORCE_LT( - i, N, phi::errors::OutOfRange("Array index out of bounds.")); -#endif - return (*this)[i]; - } - - HOSTDEVICE inline const T &at(size_t i) const { -#if !defined(__CUDA_ARCH__) && !defined(__HIPCC__) - PADDLE_ENFORCE_LT( - i, N, phi::errors::OutOfRange("Array index out of bounds.")); -#endif - return (*this)[i]; - } - - HOSTDEVICE constexpr size_t size() const { return N; } - - HOSTDEVICE inline bool operator==(const Array &other) const { - return UnrollCompare::Run(data_, other.data_); - } - - HOSTDEVICE inline bool operator!=(const Array &other) const { - return !(*this == other); - } - - private: - template - HOSTDEVICE static inline U *advance(U *ptr, size_t i) { - return ptr + i; - } - - T data_[N] = {}; -}; - -template -class Array { - public: - static constexpr size_t kSize = 0; - - HOSTDEVICE inline Array() {} - - HOSTDEVICE inline void Fill(const T &val) {} - - HOSTDEVICE inline constexpr T *Get() const { return nullptr; } - - // Add constexpr to GetMutable() cause warning in MAC - HOSTDEVICE inline T *GetMutable() { return nullptr; } - - HOSTDEVICE inline T &operator[](size_t) { -#if defined(__HIPCC__) || defined(__CUDA_ARCH__) - // HIP and CUDA will have compile error, if use "obj()" - // function declared in block scope cannot have 'static' storage class - static T obj{}; - return obj; -#else - PADDLE_THROW(phi::errors::Unavailable("Array has no element.")); -#endif - } - - HOSTDEVICE inline const T &operator[](size_t) const { -#if defined(__HIPCC__) || defined(__CUDA_ARCH__) - // HIP and CUDA will have compile error, if use "obj()" - // function declared in block scope cannot have 'static' storage class - static const T obj{}; - return obj; -#else - PADDLE_THROW(phi::errors::Unavailable("Array has no element.")); -#endif - } - - HOSTDEVICE inline T &at(size_t i) { return (*this)[i]; } - - HOSTDEVICE inline const T &at(size_t i) const { return (*this)[i]; } - - HOSTDEVICE constexpr size_t size() const { return 0; } - - HOSTDEVICE constexpr bool operator==(const Array &other) const { - return true; - } - - HOSTDEVICE constexpr bool operator!=(const Array &other) const { - return false; - } -}; - -} // namespace phi diff --git a/paddle/phi/core/utils/dim.h b/paddle/phi/core/utils/dim.h deleted file mode 100644 index 7cc023f8cc7d16..00000000000000 --- a/paddle/phi/core/utils/dim.h +++ /dev/null @@ -1,111 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#include -#include -#include - -#include "paddle/phi/core/hostdevice.h" -#include "paddle/phi/core/utils/array.h" - -namespace phi { - -// Statically sized, statically indexed dimension -template -class Dim : public Array { - public: - static_assert(D >= 0, "D must be not less than 0"); - - static constexpr int kRank = D; - using BaseClass = Array; - - inline Dim(int64_t head, const Dim& tail) { - (*this)[0] = head; - new (this->GetMutable() + 1) Dim(tail); - } - - template - HOSTDEVICE explicit Dim(int64_t head, Args... args) - : BaseClass(head, args...) {} - - /** Construct a Dim with each dimension set to the given index */ - HOSTDEVICE explicit Dim(int64_t idx) { this->Fill(idx); } - - HOSTDEVICE Dim() = default; - - HOST std::string to_string() const; -}; - -// Product of a Dim -template -HOSTDEVICE inline int64_t product(const Dim& a) { - return UnrollProduct::Run(a.Get()); -} - -/** - * Helper function to create a Dim - * - * \param idxes The type of Dim constructed depends on the number of params - * - */ - -template -HOSTDEVICE inline Dim make_dim(Args... idxes) { - return Dim(idxes...); -} - -// Allows us to output a Dim -template -inline std::ostream& operator<<(std::ostream& os, const Dim& d) { - if (D > 0) { - os << d[0]; - for (int i = 1; i < D; ++i) { - os << ", " << d[i]; - } - } else { - os << ""; - } - - return os; -} - -inline std::ostream& operator<<(std::ostream& os, const Dim<0>& d UNUSED) { - return os; -} - -template -HOST std::string Dim::to_string() const { - std::stringstream stream; - stream << *this; - return stream.str(); -} - -template -inline void static_dim_assign(const T1* in, T2* out) { - UnrollAssign::Run(in, out); -} - -} // namespace phi - -namespace paddle { -namespace framework { -template -using Dim = phi::Dim; - -} // namespace framework -} // namespace paddle diff --git a/paddle/phi/core/utils/unroll_array_ops.h b/paddle/phi/core/utils/unroll_array_ops.h deleted file mode 100644 index ea9d6273e4a4ef..00000000000000 --- a/paddle/phi/core/utils/unroll_array_ops.h +++ /dev/null @@ -1,129 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include -#include -#include "paddle/phi/core/hostdevice.h" -#include "paddle/phi/core/macros.h" - -namespace phi { -namespace detail { -template -struct UnrollFillConstant { - template - HOSTDEVICE inline static void Run(T *data, T val) { - data[kStart] = val; - UnrollFillConstant::Run(data, val); - } -}; - -template -struct UnrollFillConstant { - template - HOSTDEVICE inline static void Run(T *data UNUSED, T val UNUSED) {} -}; - -template -struct UnrollAssign { - template - HOSTDEVICE inline static void Run(const Tin *d1, Tout *d2) { - d2[kStart] = static_cast(d1[kStart]); - UnrollAssign::Run(d1, d2); - } -}; - -template -struct UnrollAssign { - template - HOSTDEVICE inline static void Run(const Tin *d1 UNUSED, Tout *d2 UNUSED) {} -}; - -template -struct UnrollVarArgsAssignImpl { - template - HOSTDEVICE inline static void Run(T *d, T val, Args... args) { - static_assert(sizeof...(args) + 1 == kEnd - kStart, "Wrong argument"); - d[kStart] = val; - UnrollVarArgsAssignImpl::Run( - d, args...); - } -}; - -template -struct UnrollVarArgsAssignImpl { - HOSTDEVICE inline static void Run(T *d) {} -}; - -template -struct UnrollVarArgsAssign { - template - HOSTDEVICE inline static void Run(T *d, Args... args) { - UnrollVarArgsAssignImpl::Run( - d, args...); - } -}; - -template -struct UnrollCompare { - template - HOSTDEVICE inline static bool Run(const T *d1, const T *d2) { - return d1[kStart] == d2[kStart] && - UnrollCompare::Run(d1, d2); - } -}; - -template -struct UnrollCompare { - template - HOSTDEVICE inline constexpr static bool Run(const T *d1 UNUSED, - const T *d2 UNUSED) { - return true; - } -}; - -template -struct UnrollProduct { - template - HOSTDEVICE inline static T Run(const T *d) { - return d[kStart] * - UnrollProduct::Run(d); - } -}; - -template -struct UnrollProduct { - template - HOSTDEVICE inline constexpr static T Run(const T *d UNUSED) { - return 1; - } -}; -} // namespace detail - -template -using UnrollFillConstant = detail::UnrollFillConstant<0, N, N == 0>; - -template -using UnrollAssign = detail::UnrollAssign<0, N, N == 0>; - -template -using UnrollVarArgsAssign = detail::UnrollVarArgsAssign; - -template -using UnrollCompare = detail::UnrollCompare<0, N, N == 0>; - -template -using UnrollProduct = detail::UnrollProduct<0, N, N == 0>; - -} // namespace phi diff --git a/paddle/phi/core/visit_type.h b/paddle/phi/core/visit_type.h index 047ba79bc15998..28f575295b47e1 100644 --- a/paddle/phi/core/visit_type.h +++ b/paddle/phi/core/visit_type.h @@ -14,7 +14,7 @@ limitations under the License. */ #pragma once -#include "paddle/phi/api/ext/exception.h" +#include "paddle/common/exception.h" #include "paddle/phi/common/data_type.h" namespace phi { diff --git a/paddle/phi/infermeta/backward.cc b/paddle/phi/infermeta/backward.cc index a3eb7ce8c906b3..606ca86fad99ed 100644 --- a/paddle/phi/infermeta/backward.cc +++ b/paddle/phi/infermeta/backward.cc @@ -26,9 +26,9 @@ void AffineGridGradInferMeta(const MetaTensor& output_grad, if (input_grad) { auto output_dims = output_grad.dims(); if (output_dims.size() == 4) { - input_grad->set_dims(phi::make_ddim({output_dims[0], 2, 3})); + input_grad->set_dims(common::make_ddim({output_dims[0], 2, 3})); } else { - input_grad->set_dims(phi::make_ddim({output_dims[0], 3, 4})); + input_grad->set_dims(common::make_ddim({output_dims[0], 3, 4})); } } } @@ -588,7 +588,7 @@ void KernelWithXShapeInferMeta(const MetaTensor& xshape, const MetaTensor& out, MetaTensor* dx) { auto xshape_dims = xshape.dims(); - auto x_dims = phi::slice_ddim(xshape_dims, 1, xshape_dims.size()); + auto x_dims = common::slice_ddim(xshape_dims, 1, xshape_dims.size()); dx->set_dims(x_dims); dx->set_dtype(out.dtype()); dx->share_lod(xshape); @@ -716,17 +716,17 @@ void MemoryEfficientAttentionGradInferMeta(const MetaTensor& query, std::vector value_grad_dims( {value_batch_size, value_seq_length, value_num_head, value_head_size}); - query_grad->set_dims(phi::make_ddim(query_grad_dims)); + query_grad->set_dims(common::make_ddim(query_grad_dims)); query_grad->share_lod(query); query_grad->set_dtype(query.dtype()); query_grad->set_layout(query.layout()); - key_grad->set_dims(phi::make_ddim(key_grad_dims)); + key_grad->set_dims(common::make_ddim(key_grad_dims)); key_grad->share_lod(key); key_grad->set_dtype(key.dtype()); key_grad->set_layout(key.layout()); - value_grad->set_dims(phi::make_ddim(value_grad_dims)); + value_grad->set_dims(common::make_ddim(value_grad_dims)); value_grad->share_lod(value); value_grad->set_dtype(value.dtype()); value_grad->set_layout(value.layout()); @@ -740,7 +740,7 @@ void MemoryEfficientAttentionGradInferMeta(const MetaTensor& query, std::vector bias_grad_dims( {bias_batch_size, bias_seq_length, bias_num_head, bias_head_size}); - bias_grad->set_dims(phi::make_ddim(bias_grad_dims)); + bias_grad->set_dims(common::make_ddim(bias_grad_dims)); bias_grad->share_lod(bias); bias_grad->set_dtype(bias.dtype()); bias_grad->set_layout(bias.layout()); @@ -818,8 +818,8 @@ void NllLossGradInferMeta(const MetaTensor& x, const auto& x_dims = x.dims(); const auto& label_dims = label.dims(); const auto& dout_dims = out_grad.dims(); - bool contain_unknown_dim = - phi::contain_unknown_dim(x_dims) || phi::contain_unknown_dim(dout_dims); + bool contain_unknown_dim = common::contain_unknown_dim(x_dims) || + common::contain_unknown_dim(dout_dims); bool check = config.is_runtime || !contain_unknown_dim; if (check) { @@ -1061,12 +1061,12 @@ void StackGradInferMeta(const MetaTensor& out_grad, x_grad.size(), static_cast(dy_dim[axis]))); - auto vec = phi::vectorize(dy_dim); + auto vec = common::vectorize(dy_dim); vec.erase(vec.begin() + axis); for (auto& grad : x_grad) { if (grad) { - grad->set_dims(phi::make_ddim(vec)); + grad->set_dims(common::make_ddim(vec)); grad->set_dtype(out_grad.dtype()); } } @@ -1153,9 +1153,9 @@ void UnStackGradInferMeta(const std::vector& out_grad, rank)); if (axis < 0) axis += (rank + 1); - auto vec = phi::vectorize(input_dims[0]); + auto vec = common::vectorize(input_dims[0]); vec.insert(vec.begin() + axis, static_cast(input_dims.size())); - x_grad->set_dims(phi::make_ddim(vec)); + x_grad->set_dims(common::make_ddim(vec)); x_grad->set_dtype(out_grad[0]->dtype()); } diff --git a/paddle/phi/infermeta/binary.cc b/paddle/phi/infermeta/binary.cc index 53b3b00286a583..b41ec59d259741 100644 --- a/paddle/phi/infermeta/binary.cc +++ b/paddle/phi/infermeta/binary.cc @@ -18,11 +18,11 @@ limitations under the License. */ #include #include "glog/logging.h" +#include "paddle/common/ddim.h" +#include "paddle/common/layout.h" #include "paddle/phi/api/lib/data_type_set.h" #include "paddle/phi/common/data_type.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/common/type_traits.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/infermeta_utils.h" #include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/infermeta/unary.h" @@ -76,12 +76,12 @@ static void BinarySameInputDimsCheck(const MetaTensor& x, // Used in MatrixRankTolInferMeta static DDim CheckAndGetOutputDim(const DDim& dim_x) { - auto x_vec = phi::vectorize(dim_x); + auto x_vec = common::vectorize(dim_x); if (x_vec.size() == 2) { - return phi::make_ddim({}); + return common::make_ddim({}); } x_vec.erase(x_vec.end() - 2, x_vec.end()); - return phi::make_ddim(x_vec); + return common::make_ddim(x_vec); } } // namespace detail @@ -91,7 +91,7 @@ void AllValueCompareInferMeta(const MetaTensor& x, MetaTensor* out, MetaConfig config) { detail::BinarySameInputDimsCheck(x, y, config); - out->set_dims(phi::make_ddim({})); + out->set_dims(common::make_ddim({})); out->set_dtype(DataType::BOOL); } @@ -135,7 +135,7 @@ void KLDivInferMeta(const MetaTensor& x, if ("none" == reduction) { out->set_dims(dim_x); } else { - out->set_dims(phi::make_ddim({})); + out->set_dims(common::make_ddim({})); } out->set_dtype(x.dtype()); } @@ -212,7 +212,7 @@ void BCELossInferMeta(const MetaTensor& input, bool check = true; if ((!config.is_runtime) && - (phi::product(input_dims) <= 0 || phi::product(label_dims) <= 0)) { + (common::product(input_dims) <= 0 || common::product(label_dims) <= 0)) { check = false; } @@ -267,7 +267,7 @@ void BincountInferMeta(const MetaTensor& x, weights_dim, input_dim)); } - out->set_dims(phi::make_ddim({-1})); + out->set_dims(common::make_ddim({-1})); if (weights) { out->set_dtype(weights.dtype()); } else { @@ -278,8 +278,8 @@ void BincountInferMeta(const MetaTensor& x, } void BmmInferMeta(const MetaTensor& x, const MetaTensor& y, MetaTensor* out) { - std::vector x_dims = phi::vectorize(x.dims()); - std::vector y_dims = phi::vectorize(y.dims()); + std::vector x_dims = common::vectorize(x.dims()); + std::vector y_dims = common::vectorize(y.dims()); std::size_t x_ndims = x_dims.size(); std::size_t y_ndims = y_dims.size(); @@ -318,7 +318,7 @@ void BmmInferMeta(const MetaTensor& x, const MetaTensor& y, MetaTensor* out) { "Y's batch size [%s]")); dim_out.push_back(x_dims[1]); dim_out.push_back(y_dims[2]); - out->set_dims(phi::make_ddim(dim_out)); + out->set_dims(common::make_ddim(dim_out)); out->share_lod(x); out->set_dtype(x.dtype()); out->set_layout(x.layout()); @@ -358,8 +358,8 @@ void CholeskySolveInferMeta(const MetaTensor& x, x_dims[x_dims_n - 2], y_dims[y_dims_n - 2])); - std::vector x_dims_vec = phi::vectorize(x_dims); - std::vector y_dims_vec = phi::vectorize(y_dims); + std::vector x_dims_vec = common::vectorize(x_dims); + std::vector y_dims_vec = common::vectorize(y_dims); std::vector x_dims_vec_cut(x_dims_vec.begin(), x_dims_vec.end() - 2); std::vector y_dims_vec_cut(y_dims_vec.begin(), y_dims_vec.end() - 2); @@ -372,7 +372,7 @@ void CholeskySolveInferMeta(const MetaTensor& x, {x_dims_vec[x_dims_n - 2], x_dims_vec[x_dims_n - 1]}); // dim of 'out' is the same with 'X' after broadcast - out->set_dims(phi::make_ddim(x_broadcast_dims)); + out->set_dims(common::make_ddim(x_broadcast_dims)); out->set_dtype(x.dtype()); out->set_layout(x.layout()); out->share_lod(x); @@ -401,7 +401,7 @@ void CompareRawInferMeta(const MetaTensor& x, max_dim, axis); - out->set_dims(make_ddim(out_dims_array)); + out->set_dims(common::make_ddim(out_dims_array)); out->share_lod(x); } if (!out->is_same_tensor(x)) { @@ -426,15 +426,15 @@ void CompareAllInferMeta(const MetaTensor& x, errors::InvalidArgument( "The size of dim_y should not be greater than dim_x's.")); out->share_lod(x); - out->set_dims(make_ddim({})); + out->set_dims(common::make_ddim({})); } void ComplexInferMeta(const MetaTensor& x, const MetaTensor& y, MetaTensor* out) { if (x.dims() == y.dims()) { - auto sizes = vectorize(x.dims()); - out->set_dims(phi::make_ddim(sizes)); + auto sizes = common::vectorize(x.dims()); + out->set_dims(common::make_ddim(sizes)); out->set_dtype(dtype::ToComplex(x.dtype())); // NOTE(chenfeiyu): lod & broadcasting is intrinsically contradictory // so tensors with lod are not supported here @@ -455,7 +455,7 @@ void ComplexInferMeta(const MetaTensor& x, out_dims_array.data(), max_dim, axis); - out->set_dims(phi::make_ddim(out_dims_array)); + out->set_dims(common::make_ddim(out_dims_array)); out->set_dtype(dtype::ToComplex(x.dtype())); } } @@ -540,7 +540,7 @@ void ConvInferMeta(const MetaTensor& input, in_dims.size(), in_dims, strides.size(), - phi::make_ddim(strides), + common::make_ddim(strides), in_sub_stride_size)); const auto input_channels = @@ -583,14 +583,15 @@ void ConvInferMeta(const MetaTensor& input, DDim in_data_dims; if (channel_last) { - in_data_dims = phi::slice_ddim(in_dims, 1, in_dims.size() - 1); + in_data_dims = common::slice_ddim(in_dims, 1, in_dims.size() - 1); } else { - in_data_dims = phi::slice_ddim(in_dims, 2, in_dims.size()); + in_data_dims = common::slice_ddim(in_dims, 2, in_dims.size()); } - DDim filter_data_dims = phi::slice_ddim(filter_dims, 2, filter_dims.size()); + DDim filter_data_dims = + common::slice_ddim(filter_dims, 2, filter_dims.size()); - std::vector ksize = phi::vectorize(filter_data_dims); + std::vector ksize = common::vectorize(filter_data_dims); phi::UpdatePaddingAndDilation( &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize); @@ -616,7 +617,7 @@ void ConvInferMeta(const MetaTensor& input, output_shape.push_back(filter_dims[0]); } - out->set_dims(make_ddim(output_shape)); + out->set_dims(common::make_ddim(output_shape)); out->set_dtype(input.dtype()); } @@ -662,7 +663,7 @@ void ConvTransposeInferMeta(const MetaTensor& x, const DataLayout data_layout = config.is_run_mkldnn_kernel ? DataLayout::kNCHW - : phi::StringToDataLayout(data_format); + : common::StringToDataLayout(data_format); PADDLE_ENFORCE_EQ( x_dims.size() == 4 || x_dims.size() == 5, @@ -750,7 +751,7 @@ void ConvTransposeInferMeta(const MetaTensor& x, x_data_dims = slice_ddim(x_dims, 1, x_dims.size() - 1); } DDim filter_data_dims = slice_ddim(filter_dims, 2, filter_dims.size()); - std::vector ksize = vectorize(filter_data_dims); + std::vector ksize = common::vectorize(filter_data_dims); UpdatePaddingAndDilation( &paddings_, &dilations_, padding_algorithm, x_data_dims, strides, ksize); @@ -775,7 +776,7 @@ void ConvTransposeInferMeta(const MetaTensor& x, "output_size of Op(ConvTransposeOp) should not be " "less than the infered output size. But received output_size = " "[%s], whose dim %d is less than the infered output size [%s]", - make_ddim(output_size).to_str(), + common::make_ddim(output_size).to_str(), i, infer_shape)); PADDLE_ENFORCE_LT( @@ -786,7 +787,7 @@ void ConvTransposeInferMeta(const MetaTensor& x, "than infered size + stride. But received output_size = [%s], " "whose dim %d is not less than the infered output size (%d) + " "stride (%d) = %d", - make_ddim(output_size).to_str(), + common::make_ddim(output_size).to_str(), i, infer_shape, strides[i], @@ -802,7 +803,7 @@ void ConvTransposeInferMeta(const MetaTensor& x, "output_padding of Op(ConvTransposeOp) should not be " "less than the 0. But received output_padding = " "[%s], whose dim %d is less than 0", - make_ddim(output_padding).to_str(), + common::make_ddim(output_padding).to_str(), i)); PADDLE_ENFORCE_LT( output_padding[i], @@ -813,7 +814,7 @@ void ConvTransposeInferMeta(const MetaTensor& x, "[%s], " "whose dim %d is not less than either stride (%d) or " "dilation (%d)", - make_ddim(output_size).to_str(), + common::make_ddim(output_size).to_str(), i, strides[i], dilations_[i])); @@ -827,7 +828,7 @@ void ConvTransposeInferMeta(const MetaTensor& x, output_shape.push_back(filter_dims[1] * groups); } - out->set_dims(make_ddim(output_shape)); + out->set_dims(common::make_ddim(output_shape)); out->set_dtype(x.dtype()); } @@ -1009,19 +1010,19 @@ void DistInferMeta(const MetaTensor& x, auto x_dims = x.dims(); auto y_dims = y.dims(); - PADDLE_ENFORCE_NE(phi::product(x_dims), + PADDLE_ENFORCE_NE(common::product(x_dims), 0, phi::errors::InvalidArgument( "The Input(X) has not been initialized properly. The " "shape of Input(X) = [%s].", x_dims)); - PADDLE_ENFORCE_NE(phi::product(y_dims), + PADDLE_ENFORCE_NE(common::product(y_dims), 0, phi::errors::InvalidArgument( "The Input(Y) has not been initialized properly. The " "shape of Input(Y) = [%s].", y_dims)); - out->set_dims(phi::make_ddim({})); + out->set_dims(common::make_ddim({})); out->set_dtype(x.dtype()); } @@ -1127,7 +1128,7 @@ void DropoutNdInferMeta(const MetaTensor& x, "equal to 0 and less than the dimensions of x. But " "received axis is {%s}, the dimension size of x is %d.", i, - phi::make_ddim(axis), + common::make_ddim(axis), x_dims.size())); } @@ -1143,7 +1144,7 @@ void DropoutNdInferMeta(const MetaTensor& x, mask_dims[t] = x_dims[static_cast(t)]; }); - mask->set_dims(make_ddim(mask_dims)); + mask->set_dims(common::make_ddim(mask_dims)); mask->set_dtype(DataType::UINT8); } } @@ -1183,9 +1184,9 @@ void DotInferMeta(const MetaTensor& x, const MetaTensor& y, MetaTensor* out) { "with input tensor Y: %s", x_dims.to_str(), y_dims.to_str())); - std::vector x_dims_vec = phi::vectorize(x_dims); + std::vector x_dims_vec = common::vectorize(x_dims); std::vector x_dims_vec_cut(x_dims_vec.begin(), x_dims_vec.end() - 1); - x_dims = phi::make_ddim(x_dims_vec_cut); + x_dims = common::make_ddim(x_dims_vec_cut); out->set_dims(x_dims); out->set_dtype(x.dtype()); out->set_layout(x.layout()); @@ -1239,13 +1240,13 @@ void ElementwiseRawInferMeta(const MetaTensor& x, if (should_rotate) { // Pick bigger shape and rotate this one bool x_over_y = (x_dims.size() > y_dims.size()); - auto vdims = - x_over_y ? phi::vectorize(x_dims) : phi::vectorize(y_dims); + auto vdims = x_over_y ? common::vectorize(x_dims) + : common::vectorize(y_dims); std::rotate(vdims.begin() + 1, vdims.begin() + 2, vdims.end()); if (x_over_y) { - x_dims = phi::make_ddim(vdims); + x_dims = common::make_ddim(vdims); } else { - y_dims = phi::make_ddim(vdims); + y_dims = common::make_ddim(vdims); } } #endif @@ -1263,7 +1264,7 @@ void ElementwiseRawInferMeta(const MetaTensor& x, out_dims_array.end()); } #endif - auto out_dims = phi::make_ddim(out_dims_array); + auto out_dims = common::make_ddim(out_dims_array); out->set_dims(out_dims); } else { out->set_dims(x.dims()); @@ -1298,9 +1299,9 @@ void EmbeddingInferMeta(const MetaTensor& x, table_dims.size(), table_dims)); - auto output_dims = phi::vectorize(ids_dims); + auto output_dims = common::vectorize(ids_dims); output_dims.push_back(table_dims[1]); - out->set_dims(phi::make_ddim(output_dims)); + out->set_dims(common::make_ddim(output_dims)); out->set_dtype(weight.dtype()); out->share_lod(x); } @@ -1324,9 +1325,9 @@ void CEmbeddingInferMeta(const MetaTensor& weight, table_dims.size(), table_dims)); - auto output_dims = phi::vectorize(ids_dims); + auto output_dims = common::vectorize(ids_dims); output_dims.push_back(table_dims[1]); - out->set_dims(phi::make_ddim(output_dims)); + out->set_dims(common::make_ddim(output_dims)); out->set_dtype(weight.dtype()); out->share_lod(x); @@ -1364,7 +1365,7 @@ void ExpandAsInferMeta(const MetaTensor& x, "to %d. But received: rank %u.", MAX_RANK_SUPPORTED, target_shape.size())); - out->set_dims(phi::make_ddim(target_shape)); + out->set_dims(common::make_ddim(target_shape)); out->set_dtype(x.dtype()); #undef MAX_RANK_SUPPORTED } @@ -1408,7 +1409,7 @@ static std::vector GetInputShape(phi::DDim dim, if (is_input_fused) { dim = dim.reshape(shape).transpose(axis); } - return phi::vectorize(dim); + return common::vectorize(dim); } void FusedMatmulInferMeta(const MetaTensor& x, @@ -1498,7 +1499,7 @@ void FusedMatmulInferMeta(const MetaTensor& x, new_dims.push_back(1); } - auto ddim_out = phi::make_ddim(new_dims); + auto ddim_out = common::make_ddim(new_dims); std::vector shape = fused_reshape_Out; const std::vector& axis = fused_transpose_Out; @@ -1584,7 +1585,7 @@ void GatherInferMeta(const MetaTensor& x, for (int i = 1; i < input_dim.size(); ++i) { out_dim_vec.emplace_back(input_dim[i]); } - auto output_dims = phi::make_ddim(out_dim_vec); + auto output_dims = common::make_ddim(out_dim_vec); out->set_dims(output_dims); out->set_dtype(x.dtype()); out->share_lod(x); @@ -1596,7 +1597,7 @@ void GatherInferMeta(const MetaTensor& x, for (int i = axis_v + 1; i < input_dim.size(); i++) { out_dim_vec.push_back(input_dim[i]); // NOLINT } - auto output_dims = phi::make_ddim(out_dim_vec); + auto output_dims = common::make_ddim(out_dim_vec); out->set_dims(output_dims); out->set_dtype(x.dtype()); out->share_lod(x); @@ -1621,7 +1622,7 @@ void GatherInferMeta(const MetaTensor& x, for (int i = axis_v + 1; i < input_dim.size(); i++) { out_dim_vec.push_back(input_dim[i]); // NOLINT } - auto output_dims = phi::make_ddim(out_dim_vec); + auto output_dims = common::make_ddim(out_dim_vec); out->set_dims(output_dims); out->set_dtype(x.dtype()); out->share_lod(x); @@ -1659,7 +1660,7 @@ void GatherNdInferMeta(const MetaTensor& x, result_dims.emplace_back(x_dims[i]); } - out->set_dims(phi::make_ddim(result_dims)); + out->set_dims(common::make_ddim(result_dims)); out->share_lod(x); out->set_dtype(x.dtype()); } @@ -1760,8 +1761,8 @@ void HuberLossInferMeta(const MetaTensor& input, input_dims.size(), label_dims.size())); - bool contain_unknown_dim = phi::contain_unknown_dim(input_dims) || - phi::contain_unknown_dim(label_dims); + bool contain_unknown_dim = common::contain_unknown_dim(input_dims) || + common::contain_unknown_dim(label_dims); if (config.is_runtime || !contain_unknown_dim) { PADDLE_ENFORCE_EQ( input_dims, @@ -1847,12 +1848,12 @@ void IndexSelectInferMeta(const MetaTensor& x, true, phi::errors::InvalidArgument("The length of Input(Index) can't be 0.")); - auto output_dim = phi::vectorize(input_dim); + auto output_dim = common::vectorize(input_dim); if (dim < 0) { dim += input_dim.size(); } output_dim[dim] = index_dim[0]; - output->set_dims(phi::make_ddim(output_dim)); + output->set_dims(common::make_ddim(output_dim)); output->set_dtype(x.dtype()); output->set_layout(x.layout()); output->share_lod(x); @@ -1874,12 +1875,12 @@ void IndexSelectStridedInferMeta(const MetaTensor& x, input_dim.size() - 1, dim)); - auto output_dim = phi::vectorize(input_dim); + auto output_dim = common::vectorize(input_dim); if (dim < 0) { dim += input_dim.size(); } output_dim.erase(output_dim.begin() + dim); - output->set_dims(phi::make_ddim(output_dim)); + output->set_dims(common::make_ddim(output_dim)); output->set_dtype(x.dtype()); output->set_layout(x.layout()); output->share_lod(x); @@ -1968,7 +1969,7 @@ void KronInferMeta(const MetaTensor& x, const MetaTensor& y, MetaTensor* out) { int64_t dim_yi = (i < rank - rank_y) ? 1 : dim_y.at(i - (rank - rank_y)); dim_out.push_back(dim_xi == -1 || dim_yi == -1 ? -1 : dim_xi * dim_yi); } - out->set_dims(phi::make_ddim(dim_out)); + out->set_dims(common::make_ddim(dim_out)); out->set_dtype(x.dtype()); } @@ -1981,7 +1982,7 @@ void LogLossInferMeta(const MetaTensor& input, auto label_dims = label.dims(); if (config.is_runtime || - (phi::product(pred_dims) > 0 && phi::product(label_dims) > 0)) { + (common::product(pred_dims) > 0 && common::product(label_dims) > 0)) { PADDLE_ENFORCE_EQ( pred_dims, label_dims, @@ -2140,8 +2141,8 @@ void MatmulInferMeta(const MetaTensor& x, bool trans_x, bool trans_y, MetaTensor* out) { - std::vector dims_x = phi::vectorize(x.dims()); - std::vector dims_y = phi::vectorize(y.dims()); + std::vector dims_x = common::vectorize(x.dims()); + std::vector dims_y = common::vectorize(y.dims()); auto ndims_x = dims_x.size(); auto ndims_y = dims_y.size(); PADDLE_ENFORCE_GT(ndims_x, @@ -2198,7 +2199,7 @@ void MatmulInferMeta(const MetaTensor& x, new_dims.push_back(N); // NOLINT } - auto ddim_out = phi::make_ddim(new_dims); + auto ddim_out = common::make_ddim(new_dims); out->set_dims(ddim_out); if (x.dtype() == phi::DataType::INT8) { @@ -2221,7 +2222,7 @@ void MatmulWithFlattenInferMeta(const MetaTensor& x, << " x_num_col_dims=" << x_num_col_dims << " y_num_col_dims=" << y_num_col_dims; - PADDLE_ENFORCE_NE(phi::product(y_dims), + PADDLE_ENFORCE_NE(common::product(y_dims), 0, phi::errors::PreconditionNotMet( "The Input variable Y has not " @@ -2249,8 +2250,8 @@ void MatmulWithFlattenInferMeta(const MetaTensor& x, y_dims, y_num_col_dims)); - auto x_mat_dims = phi::flatten_to_2d(x_dims, x_num_col_dims); - auto y_mat_dims = phi::flatten_to_2d(y_dims, y_num_col_dims); + auto x_mat_dims = common::flatten_to_2d(x_dims, x_num_col_dims); + auto y_mat_dims = common::flatten_to_2d(y_dims, y_num_col_dims); PADDLE_ENFORCE_EQ( x_mat_dims[1], @@ -2279,7 +2280,7 @@ void MatmulWithFlattenInferMeta(const MetaTensor& x, output_dims.push_back(y_dims[i]); } - out->set_dims(phi::make_ddim(output_dims)); + out->set_dims(common::make_ddim(output_dims)); if (x.dtype() == phi::DataType::INT8) { out->set_dtype(phi::DataType::INT32); } else { @@ -2394,7 +2395,7 @@ void MatrixRankTolInferMeta(const MetaTensor& x, out_dims_array.data(), max_dim, axis); - out->set_dims(phi::make_ddim(out_dims_array)); + out->set_dims(common::make_ddim(out_dims_array)); } out->share_lod(x); } @@ -2421,7 +2422,7 @@ void MvInferMeta(const MetaTensor& x, const MetaTensor& vec, MetaTensor* out) { dim_x, dim_vec)); - auto dim_out = phi::make_ddim({dim_x[0]}); + auto dim_out = common::make_ddim({dim_x[0]}); out->set_dims(dim_out); out->set_dtype(x.dtype()); @@ -2437,7 +2438,7 @@ void PReluInferMeta(const MetaTensor& x, MetaConfig config) { auto x_dim = x.dims(); if (mode == "all") { - PADDLE_ENFORCE_EQ(phi::product(alpha.dims()), + PADDLE_ENFORCE_EQ(common::product(alpha.dims()), 1, phi::errors::InvalidArgument( "For mode 'all', size of weight Alpha must be one. " @@ -2625,8 +2626,8 @@ void PriorBoxInferMeta(const MetaTensor& input, out->set_dtype(input.dtype()); var->set_dtype(input.dtype()); - out->set_dims(phi::make_ddim(dim_vec)); - var->set_dims(phi::make_ddim(dim_vec)); + out->set_dims(common::make_ddim(dim_vec)); + var->set_dims(common::make_ddim(dim_vec)); } void RepeatInterleaveWithTensorIndexInferMeta(const MetaTensor& x, @@ -2634,7 +2635,7 @@ void RepeatInterleaveWithTensorIndexInferMeta(const MetaTensor& x, int dim, MetaTensor* out) { const auto& input_dim = x.dims(); - auto output_dim = phi::vectorize(input_dim); + auto output_dim = common::vectorize(input_dim); PADDLE_ENFORCE_EQ( dim < input_dim.size() && dim >= (0 - input_dim.size()), true, @@ -2671,7 +2672,7 @@ void RepeatInterleaveWithTensorIndexInferMeta(const MetaTensor& x, } output_dim[dim] = -1; - out->set_dims(phi::make_ddim(output_dim)); + out->set_dims(common::make_ddim(output_dim)); out->share_lod(x); out->set_dtype(x.dtype()); } @@ -2740,7 +2741,7 @@ void SequenceMaskInferMeta(const MetaTensor& x, int maxlen, int out_dtype, MetaTensor* y) { - auto dim = phi::vectorize(x.dims()); + auto dim = common::vectorize(x.dims()); if (max_len_tensor) { dim.push_back(-1); @@ -2748,7 +2749,7 @@ void SequenceMaskInferMeta(const MetaTensor& x, dim.push_back(maxlen > 0 ? maxlen : -1); } - y->set_dims(phi::make_ddim(dim)); + y->set_dims(common::make_ddim(dim)); auto out_phi_dtype = phi::TransToPhiDataType(out_dtype); y->set_dtype(out_phi_dtype); } @@ -2858,8 +2859,8 @@ void TriangularSolveInferMeta(const MetaTensor& x, x_dims[x_dims_n - 2], x_dims[x_dims_n - 1])); - std::vector x_dims_vec = phi::vectorize(x_dims); - std::vector y_dims_vec = phi::vectorize(y_dims); + std::vector x_dims_vec = common::vectorize(x_dims); + std::vector y_dims_vec = common::vectorize(y_dims); std::vector x_dims_vec_cut(x_dims_vec.begin(), x_dims_vec.end() - 2); std::vector y_dims_vec_cut(y_dims_vec.begin(), y_dims_vec.end() - 2); @@ -2872,7 +2873,7 @@ void TriangularSolveInferMeta(const MetaTensor& x, {y_dims_vec[y_dims_n - 2], y_dims_vec[y_dims_n - 1]}); // dim of 'out' is the same with 'Y' after broadcast - out->set_dims(phi::make_ddim(y_broadcast_dims)); + out->set_dims(common::make_ddim(y_broadcast_dims)); out->set_dtype(y.dtype()); out->set_layout(y.layout()); out->share_lod(y); @@ -2893,9 +2894,9 @@ void TopPSamplingInferMeta(const MetaTensor& x, "But received x_dims[0] = %d and ps_dims[0] = %d.", x_dims[0], ps_dims[0])); - ids->set_dims(phi::make_ddim({x_dims[0], 1})); + ids->set_dims(common::make_ddim({x_dims[0], 1})); ids->set_dtype(DataType::INT64); - out->set_dims(phi::make_ddim({x_dims[0], 1})); + out->set_dims(common::make_ddim({x_dims[0], 1})); out->set_dtype(x.dtype()); } @@ -2962,24 +2963,24 @@ void LstsqInferMeta(const MetaTensor& x, m, y_dims[y_rank - 2])); - rank->set_dims(phi::make_ddim(batch_dims_vec)); + rank->set_dims(common::make_ddim(batch_dims_vec)); if (m > n) { batch_dims_vec.emplace_back(nrhs); - residuals->set_dims(phi::make_ddim(batch_dims_vec)); + residuals->set_dims(common::make_ddim(batch_dims_vec)); batch_dims_vec.pop_back(); } else { - residuals->set_dims(phi::make_ddim({0})); + residuals->set_dims(common::make_ddim({0})); } residuals->set_dtype(y.dtype()); batch_dims_vec.emplace_back(std::min(m, n)); - singular_values->set_dims(phi::make_ddim(batch_dims_vec)); + singular_values->set_dims(common::make_ddim(batch_dims_vec)); singular_values->set_dtype(y.dtype()); batch_dims_vec[x_rank - 2] = n; batch_dims_vec.emplace_back(nrhs); - solution->set_dims(phi::make_ddim(batch_dims_vec)); + solution->set_dims(common::make_ddim(batch_dims_vec)); solution->set_dtype(y.dtype()); } @@ -3088,11 +3089,11 @@ void YoloBoxInferMeta(const MetaTensor& x, box_num = -1; } std::vector dim_boxes({dim_x[0], box_num, 4}); - boxes->set_dims(phi::make_ddim(dim_boxes)); + boxes->set_dims(common::make_ddim(dim_boxes)); boxes->set_dtype(x.dtype()); std::vector dim_scores({dim_x[0], box_num, class_num}); - scores->set_dims(phi::make_ddim(dim_scores)); + scores->set_dims(common::make_ddim(dim_scores)); } void ValueCompareInferMeta(const MetaTensor& x, @@ -3109,8 +3110,8 @@ void SolveInferMeta(const MetaTensor& x, const MetaTensor& y, MetaTensor* out) { auto x_dims = x.dims(); auto y_dims = y.dims(); - std::vector x_dims_vec = phi::vectorize(x.dims()); - std::vector y_dims_vec = phi::vectorize(y.dims()); + std::vector x_dims_vec = common::vectorize(x.dims()); + std::vector y_dims_vec = common::vectorize(y.dims()); auto x_dims_n = x_dims_vec.size(); auto y_dims_n = y_dims_vec.size(); @@ -3184,7 +3185,7 @@ void SolveInferMeta(const MetaTensor& x, const MetaTensor& y, MetaTensor* out) { new_dims.push_back(1); } - auto out_dims = phi::make_ddim(new_dims); + auto out_dims = common::make_ddim(new_dims); out->set_dims(out_dims); out->set_dtype(x.dtype()); @@ -3234,7 +3235,7 @@ void UnpoolInferMeta(const MetaTensor& x, } } if (out != nullptr) { - out->set_dims(phi::make_ddim(output_shape)); + out->set_dims(common::make_ddim(output_shape)); out->set_dtype(x.dtype()); } } @@ -3275,7 +3276,7 @@ void Unpool3dInferMeta(const MetaTensor& x, } } if (out != nullptr) { - out->set_dims(phi::make_ddim(output_shape)); + out->set_dims(common::make_ddim(output_shape)); out->set_dtype(x.dtype()); } } @@ -3305,7 +3306,7 @@ void WeightDequantizeInferMeta(const MetaTensor& x, x.dims()[0])); int n = x.dims()[1]; int k = x.dims()[0]; - out->set_dims(phi::make_ddim({n, k})); + out->set_dims(common::make_ddim({n, k})); out->set_dtype(out_dtype); } diff --git a/paddle/phi/infermeta/fusion.cc b/paddle/phi/infermeta/fusion.cc index 0bda38a08d651b..37bb925067f67a 100644 --- a/paddle/phi/infermeta/fusion.cc +++ b/paddle/phi/infermeta/fusion.cc @@ -15,7 +15,7 @@ limitations under the License. */ #include "paddle/phi/infermeta/fusion.h" #include #include -#include "paddle/phi/common/layout.h" +#include "paddle/common/layout.h" #include "paddle/phi/common/scalar.h" #include "paddle/phi/core/infermeta_utils.h" #include "paddle/phi/core/meta_tensor.h" @@ -65,7 +65,7 @@ static phi::DDim BroadCastInferShape(const DDim x_dims, max_dim, axis); - return phi::make_ddim(out_dims_array); + return common::make_ddim(out_dims_array); } return x_dims; } @@ -89,7 +89,7 @@ void AddActXPUInferMeta(const MetaTensor& x, out->set_dtype(x.dtype()); out->set_layout(x.layout()); out->share_lod(x); - out_max->set_dims(phi::make_ddim({6})); + out_max->set_dims(common::make_ddim({6})); out_max->set_dtype(x.dtype()); out_max->set_layout(x.layout()); } @@ -206,7 +206,7 @@ void Conv1dXPUInferMeta(const MetaTensor& x, out->set_dims(DDim(out_shape.data(), static_cast(out_shape.size()))); out->set_dtype(x.dtype()); out->set_layout(x.layout()); - out_max->set_dims(phi::make_ddim({6})); + out_max->set_dims(common::make_ddim({6})); } void Conv2dXPUInferMeta(const MetaTensor& x, @@ -270,7 +270,7 @@ void Conv2dXPUInferMeta(const MetaTensor& x, in_dims.size(), in_dims, strides.size(), - phi::make_ddim(strides), + common::make_ddim(strides), in_sub_stride_size)); for (int i = 0; i < dilation_size; ++i) { @@ -313,9 +313,10 @@ void Conv2dXPUInferMeta(const MetaTensor& x, // update paddings and dilations accoring to padding_algorithm std::vector paddings_vec = paddings; std::vector dilations_vec = dilations; - DDim in_data_dims = phi::slice_ddim(in_dims, 2, in_dims.size()); - DDim filter_data_dims = phi::slice_ddim(filter_dims, 2, filter_dims.size()); - std::vector ksize = phi::vectorize(filter_data_dims); + DDim in_data_dims = common::slice_ddim(in_dims, 2, in_dims.size()); + DDim filter_data_dims = + common::slice_ddim(filter_dims, 2, filter_dims.size()); + std::vector ksize = common::vectorize(filter_data_dims); phi::UpdatePaddingAndDilation(&paddings_vec, &dilations_vec, padding_algorithm, @@ -334,7 +335,7 @@ void Conv2dXPUInferMeta(const MetaTensor& x, } // set output and output max dims out->set_dims(DDim(out_shape.data(), static_cast(out_shape.size()))); - out_max->set_dims(phi::make_ddim({6})); + out_max->set_dims(common::make_ddim({6})); out->set_dtype(out_dtype); } @@ -358,7 +359,7 @@ void EmbeddingWithEltwiseAddXPUInferMeta( auto id_dims = ids[0]->dims(); auto table_dims = tables[0]->dims(); - out->set_dims(phi::make_ddim({id_dims[0], id_dims[1], table_dims[1]})); + out->set_dims(common::make_ddim({id_dims[0], id_dims[1], table_dims[1]})); out->set_dtype(tables[0]->dtype()); out->set_layout(ids[0]->layout()); } @@ -387,7 +388,7 @@ void FcXPUInferMeta(const MetaTensor& x, out->set_dims(DDim(out_shape.data(), static_cast(out_shape.size()))); out->set_dtype(out_dtype); out->set_layout(x.layout()); - out_max->set_dims(phi::make_ddim({6})); + out_max->set_dims(common::make_ddim({6})); out_max->set_dtype(x.dtype()); out_max->set_layout(x.layout()); } @@ -841,7 +842,7 @@ void FusedFeedForwardInferMeta(const MetaTensor& x, if (x_dim.size() > 1) { return x_dim; } - return make_ddim({1, x_dim[0]}); + return common::make_ddim({1, x_dim[0]}); }; auto mat_dim_x = @@ -868,7 +869,8 @@ void FusedFeedForwardInferMeta(const MetaTensor& x, dropout2_mask->set_dims(dim_x); } - auto mean_dim = phi::make_ddim({mat_dim_x.batch_size_ * mat_dim_x.height_}); + auto mean_dim = + common::make_ddim({mat_dim_x.batch_size_ * mat_dim_x.height_}); if (pre_layer_norm) { ln1_out->set_dims(dim_x); ln1_mean->set_dims(mean_dim); @@ -1237,7 +1239,8 @@ void FusedGemmEpilogueInferMeta(const MetaTensor& x, bias_dims, y_dims)); - auto x_mat_dims = phi::flatten_to_2d(x_dims, trans_x ? 1 : x_dims.size() - 1); + auto x_mat_dims = + common::flatten_to_2d(x_dims, trans_x ? 1 : x_dims.size() - 1); int K_from_x = static_cast(trans_x ? x_mat_dims[0] : x_mat_dims[1]); int K_from_y = static_cast(trans_y ? y_dims[1] : y_dims[0]); @@ -1264,11 +1267,11 @@ void FusedGemmEpilogueInferMeta(const MetaTensor& x, } else { out_dims.push_back(y_dims[1]); } - out->set_dims(phi::make_ddim(out_dims)); + out->set_dims(common::make_ddim(out_dims)); out->set_dtype(x.dtype()); if (reserve_space) { - reserve_space->set_dims(phi::make_ddim(out_dims)); + reserve_space->set_dims(common::make_ddim(out_dims)); reserve_space->set_dtype(x.dtype()); if (activation == "none") { PADDLE_THROW(phi::errors::InvalidArgument( @@ -1337,8 +1340,8 @@ void FusedGemmEpilogueGradInferMeta(const MetaTensor& x, dout_dims.size(), x_dims.size())); - auto dout_mat_dims = phi::flatten_to_2d(dout_dims, dout_dims.size() - 1); - auto x_mat_dims = phi::flatten_to_2d(x_dims, x_dims.size() - 1); + auto dout_mat_dims = common::flatten_to_2d(dout_dims, dout_dims.size() - 1); + auto x_mat_dims = common::flatten_to_2d(x_dims, x_dims.size() - 1); PADDLE_ENFORCE_EQ( dout_mat_dims[1], @@ -1373,7 +1376,7 @@ void FusedGemmEpilogueGradInferMeta(const MetaTensor& x, if (bias_grad) { int64_t dbias_dim = trans_y ? y_dims[0] : y_dims[1]; - bias_grad->set_dims(phi::make_ddim({dbias_dim})); + bias_grad->set_dims(common::make_ddim({dbias_dim})); bias_grad->set_dtype(y.dtype()); } } @@ -1608,7 +1611,7 @@ void YoloBoxXPUInferMeta(const MetaTensor& x, left_slice_out_dims_vector.data(), 1, true); - auto left_slice_out_dims = phi::make_ddim(left_slice_out_dims_vector); + auto left_slice_out_dims = common::make_ddim(left_slice_out_dims_vector); auto grid_dims = grid.dims(); auto left_add_out_dims = BroadCastInferShape(left_slice_out_dims, grid_dims, -1); @@ -1630,7 +1633,7 @@ void YoloBoxXPUInferMeta(const MetaTensor& x, mid_slice_out_dims_vector.data(), 1, true); - auto mid_slice_out_dims = phi::make_ddim(mid_slice_out_dims_vector); + auto mid_slice_out_dims = common::make_ddim(mid_slice_out_dims_vector); auto anchor_grid_dims = anchor_grid.dims(); auto mid_mul_out_dims = BroadCastInferShape(mid_slice_out_dims, anchor_grid_dims, -1); @@ -1648,7 +1651,7 @@ void YoloBoxXPUInferMeta(const MetaTensor& x, right_slice_out_dims_vector.data(), 1, true); - auto right_slice_out_dims = phi::make_ddim(right_slice_out_dims_vector); + auto right_slice_out_dims = common::make_ddim(right_slice_out_dims_vector); // compute concat out_dims std::vector in_dims; in_dims.reserve(3); @@ -1661,7 +1664,7 @@ void YoloBoxXPUInferMeta(const MetaTensor& x, out->set_dims(out_dim); out->set_dtype(x.dtype()); out->set_layout(x.layout()); - out_max->set_dims(phi::make_ddim({6})); + out_max->set_dims(common::make_ddim({6})); out_max->set_dtype(x.dtype()); out_max->set_layout(x.layout()); } @@ -1767,7 +1770,7 @@ void ConvTransposeXPUInferMeta(const MetaTensor& x, x_data_dims = slice_ddim(x_dims, 1, x_dims.size() - 1); } DDim filter_data_dims = slice_ddim(filter_dims, 2, filter_dims.size()); - std::vector ksize = vectorize(filter_data_dims); + std::vector ksize = common::vectorize(filter_data_dims); UpdatePaddingAndDilation( &paddings_, &dilations_, padding_algorithm, x_data_dims, strides, ksize); @@ -1795,9 +1798,9 @@ void ConvTransposeXPUInferMeta(const MetaTensor& x, output_shape.push_back(filter_dims[1] * groups); } - out->set_dims(make_ddim(output_shape)); + out->set_dims(common::make_ddim(output_shape)); out->set_dtype(x.dtype()); - out_max->set_dims(phi::make_ddim({6})); + out_max->set_dims(common::make_ddim({6})); } void Conv2dTransposeXPUInferMeta(const MetaTensor& x, @@ -1876,7 +1879,7 @@ void BNActXPUInferMeta(const MetaTensor& x, x_dims)); } - const DataLayout data_layout_str = phi::StringToDataLayout(data_layout); + const DataLayout data_layout_str = common::StringToDataLayout(data_layout); PADDLE_ENFORCE_GE( x_dims.size(), @@ -1924,7 +1927,7 @@ void BNActXPUInferMeta(const MetaTensor& x, bool check = true; if ((!config.is_runtime) && - (phi::product(scale_dim) <= 0 || phi::product(bias_dim) <= 0)) { + (common::product(scale_dim) <= 0 || common::product(bias_dim) <= 0)) { check = false; } @@ -2072,9 +2075,10 @@ void FusedScaleBiasReluConvBnInferMeta(const MetaTensor& x, std::vector paddings_vec = paddings; std::vector dilations_vec = dilations; // get "HW" from "NHWC" - DDim in_data_dims = phi::slice_ddim(in_dims, 1, in_dims.size() - 1); - DDim filter_data_dims = phi::slice_ddim(filter_dims, 2, filter_dims.size()); - std::vector ksize = phi::vectorize(filter_data_dims); + DDim in_data_dims = common::slice_ddim(in_dims, 1, in_dims.size() - 1); + DDim filter_data_dims = + common::slice_ddim(filter_dims, 2, filter_dims.size()); + std::vector ksize = common::vectorize(filter_data_dims); phi::UpdatePaddingAndDilation(&paddings_vec, &dilations_vec, padding_algorithm, @@ -2093,7 +2097,7 @@ void FusedScaleBiasReluConvBnInferMeta(const MetaTensor& x, } out_shape.push_back(filter_dims[0]); // make shape for other outputs - auto c_dims = phi::make_ddim({filter_dims[0]}); + auto c_dims = common::make_ddim({filter_dims[0]}); // set output and output max dims out->set_dims(DDim(out_shape.data(), static_cast(out_shape.size()))); out_running_mean->set_dims(c_dims); @@ -2225,7 +2229,7 @@ void FusedEmbeddingEltWiseLayerNormInferMeta( hidden)); } - auto dim_output = phi::make_ddim({batch, seq_len, hidden}); + auto dim_output = common::make_ddim({batch, seq_len, hidden}); out->set_dims(dim_output); out->share_lod(*ids[0]); out->set_dtype((*embs[0]).dtype()); @@ -2289,7 +2293,7 @@ void FusionTransposeFlattenConcatInferMeta( if (out_dims[concat_axis] < 0) { out_dims[concat_axis] = -1; } - out->set_dims(phi::make_ddim(out_dims)); + out->set_dims(common::make_ddim(out_dims)); out->set_dtype((*x[0]).dtype()); } @@ -2368,7 +2372,7 @@ void FusedFCElementwiseLayerNormInferMeta(const MetaTensor& x, x_dims.size(), x_dims)); - auto x_mat_dims = phi::flatten_to_2d(x_dims, x_num_col_dims); + auto x_mat_dims = common::flatten_to_2d(x_dims, x_num_col_dims); PADDLE_ENFORCE_EQ( x_mat_dims[1], w_dims[0], @@ -2389,13 +2393,13 @@ void FusedFCElementwiseLayerNormInferMeta(const MetaTensor& x, fc_out_dims.push_back(w_dims[1]); DDim y_dims = y.dims(); - PADDLE_ENFORCE_EQ(phi::make_ddim(fc_out_dims), + PADDLE_ENFORCE_EQ(common::make_ddim(fc_out_dims), y_dims, phi::errors::InvalidArgument( "The output's shape of fc is expected to be equal to " "that of input Y. But received output's shape of fc " "is %s, input Y's shape is %s.", - phi::make_ddim(fc_out_dims), + common::make_ddim(fc_out_dims), y_dims)); PADDLE_ENFORCE_LT( @@ -2410,7 +2414,7 @@ void FusedFCElementwiseLayerNormInferMeta(const MetaTensor& x, y_dims.size(), y_dims)); - auto y_mat_dim = phi::flatten_to_2d(y_dims, begin_norm_axis); + auto y_mat_dim = common::flatten_to_2d(y_dims, begin_norm_axis); int64_t dim_0 = y_mat_dim[0]; int64_t dim_1 = y_mat_dim[1]; if (scale) { @@ -2512,7 +2516,7 @@ void Conv2dFusionInferMeta(const MetaTensor& input, data_format, channel_last, config); - output->set_dims(phi::make_ddim(out_shape)); + output->set_dims(common::make_ddim(out_shape)); output->set_dtype(input.dtype()); if (data_format == "NHWC") { output->set_layout(phi::DataLayout::NHWC); @@ -2533,17 +2537,17 @@ void Conv2dFusionInferMeta(const MetaTensor& input, "Attr(split_channels) = %u, the content = [%s].", outputs.size(), split_channels.size(), - phi::make_ddim(split_channels))); + common::make_ddim(split_channels))); int split_channels_sum = 0; std::vector output_shapes(split_channels.size()); for (size_t i = 0; i < split_channels.size(); ++i) { split_channels_sum += split_channels[i]; if (channel_last) { - output_shapes[i] = phi::make_ddim( + output_shapes[i] = common::make_ddim( {out_shape[0], out_shape[1], out_shape[2], split_channels[i]}); } else { - output_shapes[i] = phi::make_ddim( + output_shapes[i] = common::make_ddim( {out_shape[0], split_channels[i], out_shape[2], out_shape[3]}); } } @@ -2650,13 +2654,13 @@ void FusionRepeatedFCReluInferMeta(const MetaTensor& x, i, w_dims[i].size())); PADDLE_ENFORCE_EQ( - phi::product(b_dims[i]), + common::product(b_dims[i]), w_dims[i][1], phi::errors::InvalidArgument( "The length of Bias must be equal with w_dims[1], but received " "product(b_dims[%d]) = %d, w_dims[%d][1] = %d.", i, - phi::product(b_dims[i]), + common::product(b_dims[i]), i, w_dims[i][1])); } @@ -2739,7 +2743,7 @@ void FusionGRUInferMeta(const MetaTensor& x, DDim x_dims = x.dims(); auto x_mat_dims = (x_dims.size() == 3 && x_dims[1] == 1) - ? phi::flatten_to_2d(x_dims, 1) + ? common::flatten_to_2d(x_dims, 1) : x_dims; PADDLE_ENFORCE_EQ( x_mat_dims.size(), @@ -3113,7 +3117,7 @@ void FCInferMeta(const MetaTensor& input, phi::funcs::FCOutputSize( in_dims, w_dims, output_dims, in_num_col_dims, padding_weights); - out->set_dims(phi::make_ddim(output_dims)); + out->set_dims(common::make_ddim(output_dims)); out->share_lod(input); out->set_dtype(input.dtype()); } diff --git a/paddle/phi/infermeta/multiary.cc b/paddle/phi/infermeta/multiary.cc index 22dfe4b059ed31..8a7eaf82bdc585 100644 --- a/paddle/phi/infermeta/multiary.cc +++ b/paddle/phi/infermeta/multiary.cc @@ -18,9 +18,9 @@ limitations under the License. */ #include "glog/logging.h" +#include "paddle/common/layout.h" #include "paddle/phi/backends/device_memory_aligment.h" #include "paddle/phi/common/data_type.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/common/scalar.h" #include "paddle/phi/core/infermeta_utils.h" #include "paddle/phi/core/meta_tensor.h" @@ -57,7 +57,7 @@ void AdadeltaInferMeta(const MetaTensor& param, MetaTensor* master_param_out) { auto lr_dims = learning_rate.dims(); PADDLE_ENFORCE_EQ( - phi::product(lr_dims), + common::product(lr_dims), 1, phi::errors::InvalidArgument("LearningRate should have one element")); auto param_dims = param.dims(); @@ -115,7 +115,7 @@ void AdagradInferMeta(const MetaTensor& param, MetaTensor* master_param_out) { auto lr_dims = learning_rate.dims(); PADDLE_ENFORCE_EQ( - phi::product(lr_dims), + common::product(lr_dims), 1, phi::errors::InvalidArgument("LearningRate should have one element")); auto param_dims = param.dims(); @@ -171,7 +171,7 @@ void AdamInferMeta(const MetaTensor& param, MetaTensor* master_param_outs) { auto lr_dims = learning_rate.dims(); PADDLE_ENFORCE_EQ( - phi::product(lr_dims), + common::product(lr_dims), 1, errors::InvalidArgument( "The number of LearningRate shall be 1, but received %d. Maybe " @@ -179,23 +179,23 @@ void AdamInferMeta(const MetaTensor& param, "been initialized. You may need to confirm " "if you put exe.run(startup_program) " "after optimizer.minimize function.", - phi::product(lr_dims))); + common::product(lr_dims))); auto beta1_pow_dims = beta1_pow.dims(); VLOG(3) << "dims of Beta1Pow : [" << beta1_pow_dims << "]"; - PADDLE_ENFORCE_GE(phi::product(beta1_pow_dims), + PADDLE_ENFORCE_GE(common::product(beta1_pow_dims), 1, errors::InvalidArgument( "The size of Beta1 power accumulator should be greater " "than 0, but received %d.", - phi::product(beta1_pow_dims))); + common::product(beta1_pow_dims))); auto beta2_pow_dims = beta2_pow.dims(); VLOG(3) << "dims of Beta2Pow : [" << beta2_pow_dims << "]"; - PADDLE_ENFORCE_GE(phi::product(beta2_pow_dims), + PADDLE_ENFORCE_GE(common::product(beta2_pow_dims), 1, errors::InvalidArgument( "The size of Beta2 power accumulator should be greater " "than 0, but received %d.", - phi::product(beta2_pow_dims))); + common::product(beta2_pow_dims))); auto param_dims = param.dims(); PADDLE_ENFORCE_EQ( @@ -395,7 +395,7 @@ void AddNInferMeta(const std::vector& x, continue; } // for zero-sized tensor - if (phi::product(x_dim) == 0) { + if (common::product(x_dim) == 0) { continue; } // for 0D tensor @@ -403,7 +403,7 @@ void AddNInferMeta(const std::vector& x, continue; } is_all_0d_tensor = false; - if (phi::product(in_dim) == 0) { + if (common::product(in_dim) == 0) { in_dim = x_dim; } else { if (config.is_runtime) { @@ -451,7 +451,7 @@ void AddNInferMeta(const std::vector& x, } } if (is_all_0d_tensor) { - out->set_dims(make_ddim({})); + out->set_dims(common::make_ddim({})); } else { out->set_dims(in_dim); } @@ -477,7 +477,7 @@ void AddNTensorArrayInferMeta(const std::vector& x, if (has_tensor_array) { if (out->is_tensor_array()) { - out->set_dims(make_ddim({max_length})); + out->set_dims(common::make_ddim({max_length})); } } else { AddNInferMeta(x, out, config); @@ -508,14 +508,14 @@ void AucInferMeta(const MetaTensor& input, predict_dims)); auto predict_width = predict_dims[1]; PADDLE_ENFORCE_NE( - phi::product(predict_dims), + common::product(predict_dims), 0, phi::errors::InvalidArgument( "The Input(Predict) has not been initialized properly. The " "shape of Input(Predict) = [%s], the shape can not involes 0.", predict_dims)); PADDLE_ENFORCE_NE( - phi::product(label_dims), + common::product(label_dims), 0, phi::errors::InvalidArgument( "The Input(Label) has not been initialized properly. The " @@ -550,7 +550,7 @@ void AucInferMeta(const MetaTensor& input, 0, phi::errors::InvalidArgument("slide_steps must be natural number")); - auc->set_dims(phi::make_ddim({})); + auc->set_dims(common::make_ddim({})); auc->set_dtype(DataType::INT64); if (slide_steps) { @@ -658,7 +658,7 @@ void BatchNormInferMeta(const MetaTensor& x, x_dims)); } - const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); + const DataLayout data_layout = common::StringToDataLayout(data_layout_str); PADDLE_ENFORCE_GE( x_dims.size(), @@ -709,8 +709,8 @@ void BatchNormInferMeta(const MetaTensor& x, bool check = true; if (!scale || !bias || - ((!config.is_runtime) && - (phi::product(scale.dims()) <= 0 || phi::product(bias.dims()) <= 0))) { + ((!config.is_runtime) && (common::product(scale.dims()) <= 0 || + common::product(bias.dims()) <= 0))) { check = false; } @@ -887,7 +887,7 @@ void BroadcastTensorsInferMeta(const std::vector& x, // 3. Set Output Dim for (size_t i = 0; i < out.size(); i++) { - out[i]->set_dims(phi::make_ddim(target_dims)); + out[i]->set_dims(common::make_ddim(target_dims)); out[i]->share_lod(*(x[i])); out[i]->set_dtype(x[i]->dtype()); } @@ -936,7 +936,7 @@ void CoalesceTensorInferMeta(const std::vector& input, int64_t numel = 0; for (auto item : input) { const auto& dim = item->dims(); - auto size = phi::product(dim); + auto size = common::product(dim); auto len = use_align ? phi::Alignment(static_cast(size) * size_of_dtype, phi::GPUPlace(), @@ -946,9 +946,9 @@ void CoalesceTensorInferMeta(const std::vector& input, numel += len; } if (fused_output) { - fused_output->set_dims(phi::make_ddim({numel})); + fused_output->set_dims(common::make_ddim({numel})); fused_output->set_dtype(dtype); - VLOG(4) << "fused_output size:" << phi::make_ddim({numel}); + VLOG(4) << "fused_output size:" << common::make_ddim({numel}); } #else return; @@ -968,7 +968,7 @@ void CoalesceTensorInferMeta(const std::vector& input, for (auto item : input) { const auto& dim = item->dims(); - auto size = phi::product(dim); + auto size = common::product(dim); auto len = use_align ? alignment(static_cast(size) * size_of_dtype, align_size) / @@ -977,9 +977,9 @@ void CoalesceTensorInferMeta(const std::vector& input, numel += static_cast(len); } if (fused_output) { - fused_output->set_dims(phi::make_ddim({numel})); + fused_output->set_dims(common::make_ddim({numel})); fused_output->set_dtype(dtype); - VLOG(4) << "fused_output size:" << phi::make_ddim({numel}); + VLOG(4) << "fused_output size:" << common::make_ddim({numel}); } } } @@ -995,11 +995,11 @@ void CheckMemoryContinueInferMeta(const std::vector& input, int64_t numel = 0; for (auto item : input) { const auto& dim = item->dims(); - auto size = phi::product(dim); + auto size = common::product(dim); auto len = size * phi::SizeOf(item->dtype()); numel += static_cast(len); } - output->set_dims(phi::make_ddim({numel})); + output->set_dims(common::make_ddim({numel})); output->set_dtype(phi::DataType::INT8); } @@ -1014,7 +1014,7 @@ void ConcatInferMeta(const std::vector& x, "than 0.")); if (axis_scalar.FromTensor()) { auto out_dims = - phi::make_ddim(std::vector(x.at(0)->dims().size(), -1)); + common::make_ddim(std::vector(x.at(0)->dims().size(), -1)); out->set_dims(out_dims); out->set_dtype(x.at(0)->dtype()); out->set_layout(x.at(0)->layout()); @@ -1140,7 +1140,7 @@ void DecayedAdagradInferMeta(const MetaTensor& param, MetaTensor* param_out, MetaTensor* moment_out) { auto lr_dims = learning_rate.dims(); - PADDLE_ENFORCE_NE(phi::product(lr_dims), + PADDLE_ENFORCE_NE(common::product(lr_dims), 0, phi::errors::InvalidArgument( "Maybe the Input variable LearningRate has not " @@ -1148,7 +1148,7 @@ void DecayedAdagradInferMeta(const MetaTensor& param, "if you put exe.run(startup_program) " "after optimizer.minimize function.")); PADDLE_ENFORCE_EQ( - phi::product(lr_dims), + common::product(lr_dims), 1, phi::errors::InvalidArgument("LearningRate should have one element")); auto param_dims = param.dims(); @@ -1376,7 +1376,7 @@ void DeformableConvInferMeta(const MetaTensor& x, } } - out->set_dims(phi::make_ddim(output_shape)); + out->set_dims(common::make_ddim(output_shape)); out->set_dtype(x.dtype()); } @@ -1400,19 +1400,19 @@ void DGCMomentumInferMeta(const MetaTensor& param, MetaTensor* grad_out) { auto lr_dims = learning_rate.dims(); - PADDLE_ENFORCE_NE(phi::product(lr_dims), + PADDLE_ENFORCE_NE(common::product(lr_dims), 0, phi::errors::InvalidArgument( "Maybe the Input variable LearningRate has not " "been initialized. You may need to confirm " "if you put exe.run(startup_program) " "after optimizer.minimize function.")); - PADDLE_ENFORCE_EQ(phi::product(lr_dims), + PADDLE_ENFORCE_EQ(common::product(lr_dims), 1, phi::errors::InvalidArgument( "Learning_rate should be a scalar. But Received " "LearningRate's dim [%s]", - phi::product(lr_dims))); + common::product(lr_dims))); auto param_dims = param.dims(); auto grad_dims = grad.dims(); @@ -1524,7 +1524,7 @@ void EditDistanceInferMeta(const MetaTensor& hyps, out->set_dims(refs.dims()); out->set_dtype(DataType::FLOAT32); - sequencenum->set_dims(phi::make_ddim({1})); + sequencenum->set_dims(common::make_ddim({1})); sequencenum->set_dtype(DataType::FLOAT32); } @@ -1598,9 +1598,9 @@ void FusedBiasActInferMeta(const MetaTensor& x, phi::errors::InvalidArgument( "The seconde dimension of x must be even, but receive %d", dim)); dim /= 2; - out->set_dims(phi::make_ddim({token_num, dim})); + out->set_dims(common::make_ddim({token_num, dim})); } else if (act_method == "gelu" || act_method == "relu") { - out->set_dims(phi::make_ddim({token_num, dim})); + out->set_dims(common::make_ddim({token_num, dim})); } else { PADDLE_THROW( errors::InvalidArgument("act_method must be geglu, swiglu or gelu, " @@ -1709,7 +1709,7 @@ void FusedLayerNormInferMeta(const MetaTensor& x, MetaTensor* residual_out, MetaTensor* mean, MetaTensor* variance) { - std::vector x_dims_vec = phi::vectorize(x.dims()); + std::vector x_dims_vec = common::vectorize(x.dims()); auto x_dims_size = x_dims_vec.size(); size_t normalized_dims = 1; @@ -1734,7 +1734,7 @@ void FusedLayerNormInferMeta(const MetaTensor& x, norm_weight.dims()[0])); } - auto out_dims = phi::make_ddim(x_dims_vec); + auto out_dims = common::make_ddim(x_dims_vec); out->set_dims(out_dims); if (residual_out && !norm_weight && !norm_bias) { @@ -1752,11 +1752,11 @@ void FusedLayerNormInferMeta(const MetaTensor& x, residual_out->set_dtype(x.dtype()); residual_out->set_layout(x.layout()); - mean->set_dims(phi::make_ddim({rows})); + mean->set_dims(common::make_ddim({rows})); mean->set_dtype(DataType::FLOAT32); mean->set_layout(x.layout()); - variance->set_dims(phi::make_ddim({rows})); + variance->set_dims(common::make_ddim({rows})); variance->set_dtype(DataType::FLOAT32); variance->set_layout(x.layout()); } @@ -1900,8 +1900,8 @@ void GenerateProposalsV2InferMeta(const MetaTensor& scores, MetaTensor* rpn_rois, MetaTensor* rpn_roi_probs, MetaTensor* rpn_rois_num) { - rpn_rois->set_dims(phi::make_ddim({-1, 4})); - rpn_roi_probs->set_dims(phi::make_ddim({-1, 1})); + rpn_rois->set_dims(common::make_ddim({-1, 4})); + rpn_roi_probs->set_dims(common::make_ddim({-1, 1})); } void GraphReindexInferMeta(const MetaTensor& x, @@ -2025,7 +2025,7 @@ void HSigmoidLossInferMeta(const MetaTensor& x, label_dims)); std::vector output_shape({input_dims, 1}); - out->set_dims(phi::make_ddim(output_shape)); + out->set_dims(common::make_ddim(output_shape)); out->share_lod(x); out->set_dtype(x.dtype()); } @@ -2053,7 +2053,7 @@ static void Interpolate1DInferShapeCheck( "Interpolation method can only be \"linear\" when" "Input(X) dimension is 3, but got method = %s .", interp_method)); - const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); + const DataLayout data_layout = common::StringToDataLayout(data_layout_str); for (int i = 0; i < dim_x.size(); ++i) { PADDLE_ENFORCE_NE( dim_x[i], @@ -2184,7 +2184,7 @@ static void Interpolate2DInferShapeCheck( "Interpolation method can only be \"bilinear\" or \"nearest\" when " "Input(X) dimension is 4, but got method = %s.", interp_method)); - const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); + const DataLayout data_layout = common::StringToDataLayout(data_layout_str); for (int i = 0; i < dim_x.size(); ++i) { PADDLE_ENFORCE_NE( @@ -2337,7 +2337,7 @@ static void Interpolate3DInferShapeCheck( "\"nearest\" when Input(X) " "dimension is 5, but got method = %s .", interp_method)); - const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); + const DataLayout data_layout = common::StringToDataLayout(data_layout_str); for (int i = 0; i < dim_x.size(); ++i) { PADDLE_ENFORCE_NE( @@ -2583,7 +2583,7 @@ void LambInferMeta(const MetaTensor& param, MetaTensor* master_param_outs) { auto lr_dims = learning_rate.dims(); PADDLE_ENFORCE_NE( - phi::product(lr_dims), + common::product(lr_dims), 0, phi::errors::InvalidArgument( "The number of LearningRate shall not be 0, but received %d. Maybe " @@ -2591,27 +2591,27 @@ void LambInferMeta(const MetaTensor& param, "been initialized. You may need to confirm " "if you put exe.run(startup_program) " "after optimizer.minimize function.", - phi::product(lr_dims))); + common::product(lr_dims))); PADDLE_ENFORCE_EQ( - phi::product(lr_dims), + common::product(lr_dims), 1, phi::errors::InvalidArgument( "Learning rate should have 1 dimension, but received %d.", - phi::product(lr_dims))); + common::product(lr_dims))); auto beta1_pow_dims = beta1_pow.dims(); - PADDLE_ENFORCE_GE(phi::product(beta1_pow_dims), + PADDLE_ENFORCE_GE(common::product(beta1_pow_dims), 1, phi::errors::InvalidArgument( "The size of Beta1 power accumulator should be " "greater than 0, but received %d.", - phi::product(beta1_pow_dims))); + common::product(beta1_pow_dims))); auto beta2_pow_dims = beta2_pow.dims(); - PADDLE_ENFORCE_GE(phi::product(beta2_pow_dims), + PADDLE_ENFORCE_GE(common::product(beta2_pow_dims), 1, phi::errors::InvalidArgument( "The size of Beta2 power accumulator should be " "greater than 0, but received %d.", - phi::product(beta2_pow_dims))); + common::product(beta2_pow_dims))); auto param_dims = param.dims(); PADDLE_ENFORCE_EQ( @@ -2726,12 +2726,12 @@ void LarsMomentumInferMeta( grad_dim.size())); for (auto& lr_dim : lr_dims) { - PADDLE_ENFORCE_EQ(phi::product(lr_dim), + PADDLE_ENFORCE_EQ(common::product(lr_dim), 1, phi::errors::InvalidArgument( "Learning_rate should be a scalar. But Received " "LearningRate's dim [%s]", - phi::product(lr_dim))); + common::product(lr_dim))); } for (size_t i = 0; i < param_dim.size(); ++i) { @@ -2826,33 +2826,33 @@ void LogspaceInferMeta(const MetaTensor& start, MetaTensor* out) { auto s_dims = start.dims(); PADDLE_ENFORCE_EQ( - phi::product(s_dims), + common::product(s_dims), 1, phi::errors::InvalidArgument("The size of Input(Start) must be 1," "but received input size is %s.", - phi::product(s_dims))); + common::product(s_dims))); auto e_dims = stop.dims(); PADDLE_ENFORCE_EQ( - phi::product(e_dims), + common::product(e_dims), true, phi::errors::InvalidArgument("The size of Input(Stop) must be 1," "but received input size is %s.", - phi::product(e_dims))); + common::product(e_dims))); auto num_dims = number.dims(); PADDLE_ENFORCE_EQ( - phi::product(num_dims), + common::product(num_dims), true, phi::errors::InvalidArgument("The size of Input(Num) must be 1," "but received input size is %s.", - phi::product(num_dims))); + common::product(num_dims))); auto b_dims = base.dims(); - PADDLE_ENFORCE_EQ(phi::product(b_dims), + PADDLE_ENFORCE_EQ(common::product(b_dims), true, phi::errors::InvalidArgument( "The size of Input(Base) must be 1," - "but received input size is phi::product(b_dims).", - phi::product(b_dims))); - out->set_dims(phi::make_ddim({-1})); + "but received input size is common::product(b_dims).", + common::product(b_dims))); + out->set_dims(common::make_ddim({-1})); out->set_dtype(dtype); } @@ -2970,15 +2970,15 @@ void MemoryEfficientAttentionInferMeta(const MetaTensor& query, std::vector logsumexp_dims({query_num_head, query_batch_size}); std::vector seed_and_offset_dims({2}); - output->set_dims(phi::make_ddim(out_dims)); + output->set_dims(common::make_ddim(out_dims)); output->share_lod(query); output->set_dtype(query.dtype()); output->set_layout(query.layout()); - logsumexp->set_dims(phi::make_ddim(logsumexp_dims)); + logsumexp->set_dims(common::make_ddim(logsumexp_dims)); logsumexp->set_dtype(phi::DataType::FLOAT32); - seed_and_offset->set_dims(phi::make_ddim(seed_and_offset_dims)); + seed_and_offset->set_dims(common::make_ddim(seed_and_offset_dims)); seed_and_offset->set_dtype(phi::DataType::INT64); } @@ -3060,7 +3060,7 @@ void VariableLengthMemoryEfficientAttentionInferMeta( std::vector out_dims( {query_batch_size, query_num_head, query_seq_length, value_head_size}); - out->set_dims(phi::make_ddim(out_dims)); + out->set_dims(common::make_ddim(out_dims)); out->set_dtype(query.dtype()); out->set_layout(query.layout()); } @@ -3078,7 +3078,7 @@ void MeshgridInferMeta(const std::vector& inputs, out_shape[i] = static_cast(inputs[i]->dims()[0]); } } - auto out_dims = phi::make_ddim(std::vector(out_shape)); + auto out_dims = common::make_ddim(std::vector(out_shape)); for (auto& output : outputs) { output->set_dims(out_dims); output->set_dtype(inputs[0]->dtype()); @@ -3110,18 +3110,18 @@ void MomentumInferMeta(const MetaTensor& param, auto lr_dims = learning_rate.dims(); PADDLE_ENFORCE_NE( - phi::product(lr_dims), + common::product(lr_dims), 0, errors::InvalidArgument("Maybe the Input variable LearningRate has not " "been initialized. You may need to confirm " "if you put exe.run(startup_program) " "after optimizer.minimize function.")); PADDLE_ENFORCE_EQ( - phi::product(lr_dims), + common::product(lr_dims), 1, errors::InvalidArgument("Learning_rate should be a scalar. But Received " "LearningRate's dim [%s]", - phi::product(lr_dims))); + common::product(lr_dims))); auto param_dim = param.dims(); param_out->set_dims(param_dim); @@ -3163,7 +3163,7 @@ void MultiDotInferMeta(const std::vector& x, // If the first tensor is 1D of size n view it as a row vector (1, n) if (first_dim.size() == 1) { - first_dim = phi::make_ddim({1, static_cast(first_dim[0])}); + first_dim = common::make_ddim({1, static_cast(first_dim[0])}); is_vector = true; } @@ -3177,11 +3177,12 @@ void MultiDotInferMeta(const std::vector& x, // If the last tensor is 1D of size n view it as a column vector (n, 1) if (last_dim.size() == 1) { - last_dim = phi::make_ddim({static_cast(last_dim[0]), 1}); - out_dim = is_vector ? phi::make_ddim({}) : phi::make_ddim({first_dim[0]}); + last_dim = common::make_ddim({static_cast(last_dim[0]), 1}); + out_dim = + is_vector ? common::make_ddim({}) : common::make_ddim({first_dim[0]}); } else { - out_dim = is_vector ? phi::make_ddim({last_dim[1]}) - : phi::make_ddim({first_dim[0], last_dim[1]}); + out_dim = is_vector ? common::make_ddim({last_dim[1]}) + : common::make_ddim({first_dim[0], last_dim[1]}); } auto width = first_dim.at(1); @@ -3351,7 +3352,7 @@ void RmsNormInferMeta(const MetaTensor& x, const float quant_min_bound, MetaTensor* out, MetaTensor* residual_out) { - std::vector x_dims_vec = phi::vectorize(x.dims()); + std::vector x_dims_vec = common::vectorize(x.dims()); auto x_dims_size = x_dims_vec.size(); size_t normalized_dims = 1; @@ -3369,7 +3370,7 @@ void RmsNormInferMeta(const MetaTensor& x, normalized_dims, norm_weight.dims()[0])); - auto out_dims = phi::make_ddim(x_dims_vec); + auto out_dims = common::make_ddim(x_dims_vec); out->set_dims(out_dims); if (quant_scale <= 0.0f) { @@ -3429,12 +3430,12 @@ void RmspropInferMeta(const MetaTensor& param, mean_square.dims())); auto lr_dim = learning_rate.dims(); - PADDLE_ENFORCE_EQ(phi::product(lr_dim), + PADDLE_ENFORCE_EQ(common::product(lr_dim), 1, phi::errors::InvalidArgument( "Learning Rate of RmspropOp should be a scalar. But " "received LearningRate's dim [%s]", - phi::product(lr_dim))); + common::product(lr_dim))); if (master_param.initialized()) { PADDLE_ENFORCE_EQ(param_dim, @@ -3564,12 +3565,12 @@ void SgdInferMeta(const MetaTensor& param, "Output(ParamOut) of SGDOp should not be null.")); auto lr_dims = learning_rate.dims(); - PADDLE_ENFORCE_EQ(phi::product(lr_dims), + PADDLE_ENFORCE_EQ(common::product(lr_dims), 1, phi::errors::InvalidArgument( "Learning rate should have 1 element. But received " "LearningRate dims [%s]", - phi::product(lr_dims))); + common::product(lr_dims))); param_out->set_dims(param.dims()); param_out->set_dtype(param.dtype()); @@ -3605,14 +3606,14 @@ void SigmoidCrossEntropyWithLogitsInferMeta(const MetaTensor& x, bool check = true; if ((!config.is_runtime) && - (phi::product(x_dims) <= 0 || phi::product(labels_dims) <= 0)) { + (common::product(x_dims) <= 0 || common::product(labels_dims) <= 0)) { check = false; } if (check) { PADDLE_ENFORCE_EQ( - phi::slice_ddim(x_dims, 0, rank), - phi::slice_ddim(labels_dims, 0, rank), + common::slice_ddim(x_dims, 0, rank), + common::slice_ddim(labels_dims, 0, rank), phi::errors::InvalidArgument( "Input(X) and Input(Label) shall have the same shape " "except the last dimension. But received: the shape of " @@ -3623,8 +3624,8 @@ void SigmoidCrossEntropyWithLogitsInferMeta(const MetaTensor& x, if (pos_weight) { auto weight_dims = pos_weight.dims(); PADDLE_ENFORCE_EQ( - phi::slice_ddim(weight_dims, 0, rank), - phi::slice_ddim(labels_dims, 0, rank), + common::slice_ddim(weight_dims, 0, rank), + common::slice_ddim(labels_dims, 0, rank), phi::errors::InvalidArgument( "Input(pos_weight) and Input(Label) shall have the same shape " "But received: the shape of Input(PosWeight) is [%s], " @@ -3705,8 +3706,8 @@ void SendUERecvInferMeta(const MetaTensor& x, // Infer out's shape according to x and e(need broadcasting condition) out->set_dtype(x.dtype()); - auto x_dims1 = phi::vectorize(x_dims); - auto y_dims1 = phi::vectorize(y_dims); + auto x_dims1 = common::vectorize(x_dims); + auto y_dims1 = common::vectorize(y_dims); std::vector x_dims2(x_dims1.begin() + 1, x_dims1.end()); std::vector y_dims2(y_dims1.begin() + 1, y_dims1.end()); @@ -3716,15 +3717,15 @@ void SendUERecvInferMeta(const MetaTensor& x, std::vector y_dims_array(max_dim); std::vector out_dims_array(max_dim); // Only need to broadcast dimensions other than the 0th dimension. - phi::funcs::GetBroadcastDimsArrays(phi::make_ddim(x_dims2), - phi::make_ddim(y_dims2), + phi::funcs::GetBroadcastDimsArrays(common::make_ddim(x_dims2), + common::make_ddim(y_dims2), x_dims_array.data(), y_dims_array.data(), out_dims_array.data(), max_dim, axis); out_dims_array.insert(out_dims_array.begin(), -1); - out->set_dims(phi::make_ddim(out_dims_array)); + out->set_dims(common::make_ddim(out_dims_array)); } void SendUVInferMeta(const MetaTensor& x, @@ -3776,8 +3777,8 @@ void SendUVInferMeta(const MetaTensor& x, out->set_dtype(x.dtype()); auto x_dims = x.dims(); auto y_dims = y.dims(); - auto x_dims1 = phi::vectorize(x_dims); - auto y_dims1 = phi::vectorize(y_dims); + auto x_dims1 = common::vectorize(x_dims); + auto y_dims1 = common::vectorize(y_dims); std::vector x_dims2(x_dims1.begin() + 1, x_dims1.end()); std::vector y_dims2(y_dims1.begin() + 1, y_dims1.end()); int max_dim = static_cast(std::max(x_dims2.size(), y_dims2.size())); @@ -3786,15 +3787,15 @@ void SendUVInferMeta(const MetaTensor& x, std::vector y_dims_array(max_dim); std::vector out_dims_array(max_dim); // Only need to broadcast dimensions other than the 0th dimension. - phi::funcs::GetBroadcastDimsArrays(phi::make_ddim(x_dims2), - phi::make_ddim(y_dims2), + phi::funcs::GetBroadcastDimsArrays(common::make_ddim(x_dims2), + common::make_ddim(y_dims2), x_dims_array.data(), y_dims_array.data(), out_dims_array.data(), max_dim, axis); out_dims_array.insert(out_dims_array.begin(), src_index_dims[0]); // NOLINT - out->set_dims(phi::make_ddim(out_dims_array)); + out->set_dims(common::make_ddim(out_dims_array)); } void StackInferMeta(const std::vector& x, @@ -3830,9 +3831,9 @@ void StackInferMeta(const std::vector& x, rank, axis)); if (axis < 0) axis += (rank + 1); - auto vec = phi::vectorize(out_dim); + auto vec = common::vectorize(out_dim); vec.insert(vec.begin() + axis, input_dims.size()); // NOLINT - out->set_dims(phi::make_ddim(vec)); + out->set_dims(common::make_ddim(vec)); out->set_dtype(x.at(0)->dtype()); out->share_lod(*x.at(0)); } @@ -3923,7 +3924,7 @@ void WarpctcInferMeta(const MetaTensor& logits, sequence_width = static_cast(logits_dims[2]); } else { sequence_width = - static_cast(phi::product(logits_dims) / logits_dims[0]); + static_cast(common::product(logits_dims) / logits_dims[0]); } PADDLE_ENFORCE_GE( @@ -4182,15 +4183,15 @@ void YoloLossInferMeta(const MetaTensor& x, } std::vector dim_out({dim_x[0]}); - loss->set_dims(phi::make_ddim(dim_out)); + loss->set_dims(common::make_ddim(dim_out)); loss->set_dtype(x.dtype()); std::vector dim_obj_mask({dim_x[0], mask_num, dim_x[2], dim_x[3]}); - objectness_mask->set_dims(phi::make_ddim(dim_obj_mask)); + objectness_mask->set_dims(common::make_ddim(dim_obj_mask)); objectness_mask->set_dtype(x.dtype()); std::vector dim_gt_match_mask({dim_gtbox[0], dim_gtbox[1]}); - gt_match_mask->set_dims(phi::make_ddim(dim_gt_match_mask)); + gt_match_mask->set_dims(common::make_ddim(dim_gt_match_mask)); gt_match_mask->set_dtype(x.dtype()); } @@ -4546,7 +4547,7 @@ void MaskedMultiheadAttentionInferMeta(const MetaTensor& x, void FullWithTensorInferMeta(const MetaTensor& shape, DataType dtype, MetaTensor* out) { - out->set_dims(make_ddim(std::vector(shape.numel(), -1))); + out->set_dims(common::make_ddim(std::vector(shape.numel(), -1))); out->set_dtype(dtype); } diff --git a/paddle/phi/infermeta/nullary.cc b/paddle/phi/infermeta/nullary.cc index d32f160fd7f927..3f8686753e890b 100644 --- a/paddle/phi/infermeta/nullary.cc +++ b/paddle/phi/infermeta/nullary.cc @@ -27,7 +27,7 @@ void ArangeInferMeta(const Scalar& start, double step_value = step.to(); int numel = static_cast(std::ceil((end_value - start_value) / step_value)); - out->set_dims(phi::make_ddim(std::vector(1, numel))); + out->set_dims(common::make_ddim(std::vector(1, numel))); } else { out->set_dims({-1}); } @@ -37,7 +37,7 @@ void ArangeInferMeta(const Scalar& start, void AssignValueInferMeta(const std::vector& shape, DataType dtype, MetaTensor* out) { - out->set_dims(phi::make_ddim(shape)); + out->set_dims(common::make_ddim(shape)); out->set_dtype(dtype); } @@ -57,7 +57,7 @@ void CreateInferMeta(const IntArray& shape, DataType dtype, MetaTensor* out) { "than 0. But received: shape[%u] = %d; shape = [%s].", i, data[i], - phi::make_ddim(data))); + common::make_ddim(data))); } } CreateInferMetaBase(shape.GetData(), dtype, DataLayout::NCHW, out); @@ -74,7 +74,7 @@ void CreateInferMetaBase(const std::vector& shape, DataType dtype, DataLayout layout, MetaTensor* out) { - auto out_dims = phi::make_ddim(shape); + auto out_dims = common::make_ddim(shape); out->set_dims(out_dims); out->set_dtype(dtype); out->set_layout(layout); @@ -84,7 +84,7 @@ void DataInferMeta(const std::string& name, const phi::IntArray& shape, phi::DataType data_type, MetaTensor* out) { - auto out_dims = phi::make_ddim(shape.GetData()); + auto out_dims = common::make_ddim(shape.GetData()); out->set_dims(out_dims); out->set_dtype(data_type); } @@ -117,21 +117,21 @@ void GaussianInferMeta(const IntArray& shape, int seed, DataType dtype, MetaTensor* out) { - auto out_dims = phi::make_ddim(shape.GetData()); + auto out_dims = common::make_ddim(shape.GetData()); out->set_dims(out_dims); out->set_dtype(dtype); out->set_layout(DataLayout::NCHW); } void RandpermInferMeta(int n, DataType dtype, MetaTensor* out) { - out->set_dims(phi::make_ddim({n})); + out->set_dims(common::make_ddim({n})); out->set_dtype(dtype); } void UniformRandomInferMeta(const IntArray& shape, DataType dtype, MetaTensor* out) { - auto out_dims = phi::make_ddim(shape.GetData()); + auto out_dims = common::make_ddim(shape.GetData()); out->set_dims(out_dims); out->set_dtype(dtype); out->set_layout(DataLayout::NCHW); @@ -156,7 +156,7 @@ void RandintInferMeta( for (auto dim : shape_vector) { tensor_shape.push_back(static_cast(dim)); } - out->set_dims(make_ddim(tensor_shape)); + out->set_dims(common::make_ddim(tensor_shape)); out->set_dtype(dtype); } @@ -238,13 +238,13 @@ void RecvV2InferMeta(const int ring_id, i, out_shape[i])); } - out->set_dims(phi::make_ddim(out_shape)); + out->set_dims(common::make_ddim(out_shape)); } out->set_dtype(dtype); } void SeedInferMeta(int seed, MetaTensor* out) { - out->set_dims(phi::make_ddim({1})); + out->set_dims(common::make_ddim({1})); out->set_dtype(DataType::INT32); } @@ -254,7 +254,7 @@ void TruncatedGaussianRandomInferMeta(const std::vector& shape, int seed, DataType dtype, MetaTensor* out) { - auto out_dims = phi::make_ddim(shape); + auto out_dims = common::make_ddim(shape); out->set_dims(out_dims); out->set_dtype(dtype); out->set_layout(DataLayout::NCHW); @@ -279,7 +279,7 @@ void TrilIndicesInferMeta( tril_size += diff_row * cols; } std::vector tmp = {2, tril_size}; - auto out_dims = phi::make_ddim(tmp); + auto out_dims = common::make_ddim(tmp); out->set_dims(out_dims); out->set_dtype(dtype); } @@ -308,7 +308,7 @@ void TriuIndicesInferMeta( tril_size += diff_row * col; } std::vector tmp = {2, row * col - tril_size}; - auto out_dims = phi::make_ddim(tmp); + auto out_dims = common::make_ddim(tmp); out->set_dims(out_dims); out->set_dtype(dtype); } diff --git a/paddle/phi/infermeta/nullary.h b/paddle/phi/infermeta/nullary.h index c1c0501593ea45..1447649eb1ebb7 100644 --- a/paddle/phi/infermeta/nullary.h +++ b/paddle/phi/infermeta/nullary.h @@ -16,6 +16,7 @@ limitations under the License. */ #include "paddle/phi/common/int_array.h" #include "paddle/phi/common/scalar.h" +#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/meta_tensor.h" namespace phi { diff --git a/paddle/phi/infermeta/sparse/binary.cc b/paddle/phi/infermeta/sparse/binary.cc index 6e9a5775e24634..2ed540c0e0c4db 100644 --- a/paddle/phi/infermeta/sparse/binary.cc +++ b/paddle/phi/infermeta/sparse/binary.cc @@ -91,7 +91,7 @@ void Conv3dInferMeta(const MetaTensor& x, int rank = is2D ? 4 : 5; std::vector out_dims_vec(rank, 1); - DDim out_dims = make_ddim(out_dims_vec); + DDim out_dims = common::make_ddim(out_dims_vec); std::vector kernel_sizes(kernel_dims.size()); for (int i = 0; i < kernel_dims.size(); i++) { @@ -164,7 +164,7 @@ void SparseCooTensorInferMeta(const MetaTensor& values, const MetaTensor& indices, const std::vector& shape, MetaTensor* out) { - out->set_dims(phi::make_ddim(shape)); + out->set_dims(common::make_ddim(shape)); out->set_dtype(values.dtype()); out->set_layout(values.layout()); } diff --git a/paddle/phi/infermeta/spmd_rules/concat.cc b/paddle/phi/infermeta/spmd_rules/concat.cc index 5311b287a734ed..15844a426245fe 100644 --- a/paddle/phi/infermeta/spmd_rules/concat.cc +++ b/paddle/phi/infermeta/spmd_rules/concat.cc @@ -62,7 +62,7 @@ SpmdInfo ConcatInferSpmd(const std::vector& x, int axis) { x.end(), std::back_inserter(tensor_shapes), [](const DistMetaTensor& meta) { - return phi::vectorize(meta.dims()); + return common::vectorize(meta.dims()); }); bool all_empty = std::all_of(tensor_shapes.begin(), tensor_shapes.end(), IsEmpty); @@ -125,7 +125,7 @@ SpmdInfo ConcatGradInferSpmdDynamic(const std::vector& x, x.end(), std::back_inserter(tensor_shapes), [](const DistMetaTensor& meta) { - return phi::vectorize(meta.dims()); + return common::vectorize(meta.dims()); }); bool all_empty = std::all_of(tensor_shapes.begin(), tensor_shapes.end(), IsEmpty); @@ -148,7 +148,7 @@ SpmdInfo ConcatGradInferSpmdDynamic(const std::vector& x, return meta.dist_attr(); }); input_attrs.push_back(output_grad.dist_attr()); - tensor_shapes.push_back(phi::vectorize(output_grad.dims())); + tensor_shapes.push_back(common::vectorize(output_grad.dims())); std::string all_aixs; std::string align_axis; std::tie(all_aixs, align_axis) = FillConcatNotation(ndim, dim); diff --git a/paddle/phi/infermeta/spmd_rules/default_data_parallel.cc b/paddle/phi/infermeta/spmd_rules/default_data_parallel.cc index 7a3639147f1ee6..6b390e7eda1a8d 100644 --- a/paddle/phi/infermeta/spmd_rules/default_data_parallel.cc +++ b/paddle/phi/infermeta/spmd_rules/default_data_parallel.cc @@ -81,7 +81,7 @@ SpmdInfo DefaultDataParallelInferSpmd( VLOG(4) << "DefaultDataParallelSpmd InferForward:"; for (int64_t i = 0; i < ninputs; i++) { VLOG(4) << "Input" << std::to_string(i) << " shape: [" - << str_join(phi::vectorize(ins[i]->dims())) << "] " + << str_join(common::vectorize(ins[i]->dims())) << "] " << "src_dims_mapping: [" << str_join(ins[i]->dist_attr().dims_mapping()) << "] " << "dst_dims_mapping: [" @@ -90,7 +90,7 @@ SpmdInfo DefaultDataParallelInferSpmd( for (int64_t i = 0; i < noutputs; i++) { VLOG(4) << "Output" << std::to_string(i) << " shape: [" - << str_join(phi::vectorize(outs[i]->dims())) << "] " + << str_join(common::vectorize(outs[i]->dims())) << "] " << "dst_dims_mapping: [" << str_join(output_dist_attrs[i].dims_mapping()) << "]"; } @@ -144,7 +144,7 @@ SpmdInfo DefaultDataParallelInferSpmdReverse( VLOG(4) << "DefaultDataParallelSpmd InferBackward:"; for (int64_t i = 0; i < noutputs; i++) { VLOG(4) << "Output" << std::to_string(i) << " shape: [" - << str_join(phi::vectorize(outs[i]->dims())) << "] " + << str_join(common::vectorize(outs[i]->dims())) << "] " << "src_dims_mapping: [" << str_join(outs[i]->dist_attr().dims_mapping()) << "] " << "dst_dims_mapping: [" @@ -153,7 +153,7 @@ SpmdInfo DefaultDataParallelInferSpmdReverse( for (int64_t i = 0; i < ninputs; i++) { VLOG(4) << "Input" << std::to_string(i) << " shape: [" - << str_join(phi::vectorize(ins[i]->dims())) << "] " + << str_join(common::vectorize(ins[i]->dims())) << "] " << "dst_dims_mapping: [" << str_join(dst_input_dist_attrs[i].dims_mapping()) << "]"; } diff --git a/paddle/phi/infermeta/spmd_rules/elementwise.cc b/paddle/phi/infermeta/spmd_rules/elementwise.cc index e0d4c248b7760c..a4b6826f35cbf9 100644 --- a/paddle/phi/infermeta/spmd_rules/elementwise.cc +++ b/paddle/phi/infermeta/spmd_rules/elementwise.cc @@ -81,7 +81,7 @@ void GetBinaryNotations(const std::vector& x_shape, SpmdInfo ElementwiseUnaryInferSpmd(const DistMetaTensor& x) { // Step0: Verify Input Args Based on Elementwise Logic - auto x_shape = phi::vectorize(x.dims()); + auto x_shape = common::vectorize(x.dims()); int x_ndim = x_shape.size(); TensorDistAttr x_dist_attr_src = x.dist_attr(); std::vector x_dims_mapping = x_dist_attr_src.dims_mapping(); @@ -127,9 +127,9 @@ SpmdInfo ElementwiseUnaryInferSpmd(const DistMetaTensor& x) { SpmdInfo ElementwiseUnaryInferSpmdReverse(const DistMetaTensor& x, const DistMetaTensor& out) { // Step0: Verify Input Args Based on Elementwise Logic - auto x_shape = phi::vectorize(x.dims()); + auto x_shape = common::vectorize(x.dims()); int x_ndim = x_shape.size(); - auto out_shape = phi::vectorize(out.dims()); + auto out_shape = common::vectorize(out.dims()); int out_ndim = out_shape.size(); TensorDistAttr out_dist_attr = out.dist_attr(); std::vector out_dims_mapping = out_dist_attr.dims_mapping(); @@ -181,9 +181,9 @@ SpmdInfo ElementwiseUnaryInferSpmdReverse(const DistMetaTensor& x, SpmdInfo ElementwiseBinaryInferSpmd(const DistMetaTensor& x, const DistMetaTensor& y) { // Step0: Verify Input Args Based on Elementwise Logic - auto x_shape = phi::vectorize(x.dims()); + auto x_shape = common::vectorize(x.dims()); int x_ndim = x_shape.size(); - auto y_shape = phi::vectorize(y.dims()); + auto y_shape = common::vectorize(y.dims()); int y_ndim = y_shape.size(); TensorDistAttr x_dist_attr_src = x.dist_attr(); TensorDistAttr y_dist_attr_src = y.dist_attr(); @@ -251,11 +251,11 @@ SpmdInfo ElementwiseBinaryInferSpmdReverse(const DistMetaTensor& x, const DistMetaTensor& y, const DistMetaTensor& out) { // Step0: Verify Input Args Based on Elementwise Logic - auto x_shape = phi::vectorize(x.dims()); + auto x_shape = common::vectorize(x.dims()); int x_ndim = x_shape.size(); - auto y_shape = phi::vectorize(y.dims()); + auto y_shape = common::vectorize(y.dims()); int y_ndim = y_shape.size(); - auto out_shape = phi::vectorize(out.dims()); + auto out_shape = common::vectorize(out.dims()); int out_ndim = out_shape.size(); int max_ndim = std::max(x_ndim, y_ndim); TensorDistAttr out_dist_attr = out.dist_attr(); diff --git a/paddle/phi/infermeta/spmd_rules/embedding.cc b/paddle/phi/infermeta/spmd_rules/embedding.cc index 99aec54cd54742..873f8065f222a1 100644 --- a/paddle/phi/infermeta/spmd_rules/embedding.cc +++ b/paddle/phi/infermeta/spmd_rules/embedding.cc @@ -33,8 +33,8 @@ SpmdInfo EmbeddingInferSpmd(const DistMetaTensor& x, int padding_idx, bool sparse) { // Step0: Verify input args based on embedding logic - auto x_shape = phi::vectorize(x.dims()); - auto weight_shape = phi::vectorize(weight.dims()); + auto x_shape = common::vectorize(x.dims()); + auto weight_shape = common::vectorize(weight.dims()); int x_ndim = static_cast(x_shape.size()); int weight_ndim = static_cast(weight_shape.size()); auto x_dist_attr_src = x.dist_attr(); @@ -159,9 +159,9 @@ SpmdInfo EmbeddingInferSpmdReverse(const DistMetaTensor& x, bool sparse) { // Step0: Verify input args based on embedding logic // InferBackward is called after InferForward, so we skip some checks. - auto x_shape = phi::vectorize(x.dims()); + auto x_shape = common::vectorize(x.dims()); int x_ndim = static_cast(x_shape.size()); - auto out_shape = phi::vectorize(out.dims()); + auto out_shape = common::vectorize(out.dims()); int out_ndim = static_cast(out_shape.size()); PADDLE_ENFORCE_EQ(x_ndim, diff --git a/paddle/phi/infermeta/spmd_rules/flash_attention.cc b/paddle/phi/infermeta/spmd_rules/flash_attention.cc index c12f6665237721..5f6e05eb256eeb 100644 --- a/paddle/phi/infermeta/spmd_rules/flash_attention.cc +++ b/paddle/phi/infermeta/spmd_rules/flash_attention.cc @@ -60,7 +60,7 @@ SpmdInfo FlashAttInferSpmd(const DistMetaTensor& q, const std::string& rng_name) { // q // [batch_size, seq_len_q, num_heads, head_dim] - auto q_shape = phi::vectorize(q.dims()); + auto q_shape = common::vectorize(q.dims()); int q_ndim = q_shape.size(); auto q_dist_attr = q.dist_attr(); int q_dims_mapping_size = q_dist_attr.dims_mapping().size(); @@ -85,7 +85,7 @@ SpmdInfo FlashAttInferSpmd(const DistMetaTensor& q, // k // [batch_size, seq_len_kv, num_heads, head_dim] - auto k_shape = phi::vectorize(k.dims()); + auto k_shape = common::vectorize(k.dims()); int k_ndim = k_shape.size(); auto k_dist_attr = k.dist_attr(); int k_dims_mapping_size = k_dist_attr.dims_mapping().size(); @@ -134,7 +134,7 @@ SpmdInfo FlashAttInferSpmd(const DistMetaTensor& q, // v // [batch_size, seq_len_kv, num_heads, head_dim] - auto v_shape = phi::vectorize(v.dims()); + auto v_shape = common::vectorize(v.dims()); int v_ndim = v_shape.size(); auto v_dist_attr = v.dist_attr(); int v_dims_mapping_size = v_dist_attr.dims_mapping().size(); @@ -183,9 +183,9 @@ SpmdInfo FlashAttInferSpmd(const DistMetaTensor& q, // fixed_seed_offset // TODO(liuzhenhai): process fixed_seed_offset and attn_mask auto fixed_seed_offset_dist_attr = fixed_seed_offset.dist_attr(); - auto fixed_seed_offset_shape = phi::vectorize(fixed_seed_offset.dims()); + auto fixed_seed_offset_shape = common::vectorize(fixed_seed_offset.dims()); // attn_mask - auto attn_mask_shape = phi::vectorize(attn_mask.dims()); + auto attn_mask_shape = common::vectorize(attn_mask.dims()); int mask_ndim = attn_mask_shape.size(); auto attn_mask_dist_attr = attn_mask.dist_attr(); int mask_dims_mapping_size = attn_mask_dist_attr.dims_mapping().size(); @@ -289,7 +289,7 @@ SpmdInfo FlashAttGradInferSpmd(const DistMetaTensor& q, bool causal) { // q // [batch_size, seq_len_q, num_heads, head_dim] - auto q_shape = phi::vectorize(q.dims()); + auto q_shape = common::vectorize(q.dims()); int q_ndim = q_shape.size(); auto q_dist_attr = q.dist_attr(); int q_dims_mapping_size = q_dist_attr.dims_mapping().size(); @@ -314,7 +314,7 @@ SpmdInfo FlashAttGradInferSpmd(const DistMetaTensor& q, // k // [batch_size, seq_len_kv, num_heads, head_dim] - auto k_shape = phi::vectorize(k.dims()); + auto k_shape = common::vectorize(k.dims()); int k_ndim = k_shape.size(); auto k_dist_attr = k.dist_attr(); int k_dims_mapping_size = k_dist_attr.dims_mapping().size(); @@ -363,7 +363,7 @@ SpmdInfo FlashAttGradInferSpmd(const DistMetaTensor& q, // v // [batch_size, seq_len_kv, num_heads, head_dim] - auto v_shape = phi::vectorize(v.dims()); + auto v_shape = common::vectorize(v.dims()); int v_ndim = v_shape.size(); auto v_dist_attr = v.dist_attr(); int v_dims_mapping_size = v_dist_attr.dims_mapping().size(); @@ -411,10 +411,10 @@ SpmdInfo FlashAttGradInferSpmd(const DistMetaTensor& q, // fixed_seed_offset auto seed_offset_dist_attr = seed_offset.dist_attr(); - auto seed_offset_shape = phi::vectorize(seed_offset.dims()); + auto seed_offset_shape = common::vectorize(seed_offset.dims()); // attn_mask - auto attn_mask_shape = phi::vectorize(attn_mask.dims()); + auto attn_mask_shape = common::vectorize(attn_mask.dims()); int mask_ndim = attn_mask_shape.size(); auto attn_mask_dist_attr = attn_mask.dist_attr(); int mask_dims_mapping_size = attn_mask_dist_attr.dims_mapping().size(); @@ -428,13 +428,13 @@ SpmdInfo FlashAttGradInferSpmd(const DistMetaTensor& q, mask_dims_mapping_size)); } - auto out_shape = phi::vectorize(out.dims()); + auto out_shape = common::vectorize(out.dims()); auto out_dist_attr = out.dist_attr(); - auto softmax_lse_shape = phi::vectorize(softmax_lse.dims()); + auto softmax_lse_shape = common::vectorize(softmax_lse.dims()); auto softmax_lse_dist_attr = softmax_lse.dist_attr(); - auto out_grad_shape = phi::vectorize(out_grad.dims()); + auto out_grad_shape = common::vectorize(out_grad.dims()); auto out_grad_dist_attr = out_grad.dist_attr(); std::string alphabet = "abcdefghijklmnopqrstuvwxyz"; diff --git a/paddle/phi/infermeta/spmd_rules/flatten.cc b/paddle/phi/infermeta/spmd_rules/flatten.cc index ec0917b840785d..bd1b88cf07b06c 100644 --- a/paddle/phi/infermeta/spmd_rules/flatten.cc +++ b/paddle/phi/infermeta/spmd_rules/flatten.cc @@ -93,7 +93,7 @@ SpmdInfo FlattenInferSpmd(const DistMetaTensor& x, int start_axis, int stop_axis) { // Step0: Verify input args based on flatten logic - auto src_shape = phi::vectorize(x.dims()); + auto src_shape = common::vectorize(x.dims()); int x_ndim = static_cast(src_shape.size()); auto x_dist_attr_src = x.dist_attr(); std::vector x_dims_mapping = x_dist_attr_src.dims_mapping(); @@ -145,9 +145,9 @@ SpmdInfo FlattenInferSpmdReverse(const DistMetaTensor& x, int start_axis, int stop_axis) { // Step0: Verify input args based on flatten logic - auto x_shape = phi::vectorize(x.dims()); + auto x_shape = common::vectorize(x.dims()); auto x_ndim = x_shape.size(); - auto out_shape = phi::vectorize(out.dims()); + auto out_shape = common::vectorize(out.dims()); int out_ndim = out_shape.size(); auto out_dist_attr_src = out.dist_attr(); std::vector out_dims_mapping = out_dist_attr_src.dims_mapping(); diff --git a/paddle/phi/infermeta/spmd_rules/layer_norm.cc b/paddle/phi/infermeta/spmd_rules/layer_norm.cc index 9faf0f240d3d20..7bd9482f4aa615 100644 --- a/paddle/phi/infermeta/spmd_rules/layer_norm.cc +++ b/paddle/phi/infermeta/spmd_rules/layer_norm.cc @@ -32,9 +32,9 @@ SpmdInfo LayerNormInferSpmd(const DistMetaTensor& x, float epsilon, int begin_norm_axis) { // Step0: verify input args based on layer_norm logic - auto x_shape = phi::vectorize(x.dims()); - auto scale_shape = phi::vectorize(scale.dims()); - auto bias_shape = phi::vectorize(bias.dims()); + auto x_shape = common::vectorize(x.dims()); + auto scale_shape = common::vectorize(scale.dims()); + auto bias_shape = common::vectorize(bias.dims()); int x_ndim = x_shape.size(); int scale_ndim = scale_shape.size(); int bias_ndim = bias_shape.size(); @@ -158,10 +158,10 @@ SpmdInfo LayerNormInferSpmdReverse(const DistMetaTensor& x, float epsilon, int begin_norm_axis) { // Step0: Verify input args based on layer_norm logic - auto x_shape = phi::vectorize(x.dims()); - auto out_shape = phi::vectorize(out.dims()); - auto mean_shape = phi::vectorize(mean.dims()); - auto variance_shape = phi::vectorize(variance.dims()); + auto x_shape = common::vectorize(x.dims()); + auto out_shape = common::vectorize(out.dims()); + auto mean_shape = common::vectorize(mean.dims()); + auto variance_shape = common::vectorize(variance.dims()); int x_ndim = x_shape.size(); int out_ndim = out_shape.size(); int mean_ndim = mean_shape.size(); @@ -303,7 +303,7 @@ SpmdInfo LayerNormGradInferSpmd(const DistMetaTensor& x, float epsilon, int begin_norm_axis) { auto get_shape = [](const auto& meta) { - return phi::vectorize(meta.dims()); + return common::vectorize(meta.dims()); }; // 1、check tensors shapes auto x_shape = get_shape(x); diff --git a/paddle/phi/infermeta/spmd_rules/matmul.cc b/paddle/phi/infermeta/spmd_rules/matmul.cc index 277073fb798b16..5cd895401dc96e 100644 --- a/paddle/phi/infermeta/spmd_rules/matmul.cc +++ b/paddle/phi/infermeta/spmd_rules/matmul.cc @@ -119,8 +119,8 @@ SpmdInfo MatmulInferSpmd(const DistMetaTensor& x, bool trans_x, bool trans_y) { // Step0: verify input args based on matmul logic - auto x_shape = phi::vectorize(x.dims()); - auto y_shape = phi::vectorize(y.dims()); + auto x_shape = common::vectorize(x.dims()); + auto y_shape = common::vectorize(y.dims()); int x_ndim = x_shape.size(); int y_ndim = y_shape.size(); auto x_dist_attr_src = x.dist_attr(); @@ -226,11 +226,11 @@ SpmdInfo MatmulInferSpmdReverse(const DistMetaTensor& x, const DistMetaTensor& out, bool trans_x, bool trans_y) { - auto out_shape = phi::vectorize(out.dims()); + auto out_shape = common::vectorize(out.dims()); int out_ndim = out_shape.size(); - auto x_shape = phi::vectorize(x.dims()); - auto y_shape = phi::vectorize(y.dims()); + auto x_shape = common::vectorize(x.dims()); + auto y_shape = common::vectorize(y.dims()); int x_ndim = x_shape.size(); int y_ndim = y_shape.size(); int max_ndim = std::max(x_ndim, y_ndim); diff --git a/paddle/phi/infermeta/spmd_rules/numel.cc b/paddle/phi/infermeta/spmd_rules/numel.cc index 013639cbb0df2b..ca0678b7731635 100644 --- a/paddle/phi/infermeta/spmd_rules/numel.cc +++ b/paddle/phi/infermeta/spmd_rules/numel.cc @@ -25,7 +25,7 @@ using phi::distributed::auto_parallel::str_join; SpmdInfo NumelInferSpmd(const DistMetaTensor& x) { std::string alphabet = "abcdefghijklmnopqrstuvwxyz"; - auto x_shape = phi::vectorize(x.dims()); + auto x_shape = common::vectorize(x.dims()); int x_ndim = x_shape.size(); auto x_dist_attr_src = x.dist_attr(); std::vector x_dims_mapping = x_dist_attr_src.dims_mapping(); diff --git a/paddle/phi/infermeta/spmd_rules/reduction.cc b/paddle/phi/infermeta/spmd_rules/reduction.cc index 3935459683e551..8c8721a238a984 100644 --- a/paddle/phi/infermeta/spmd_rules/reduction.cc +++ b/paddle/phi/infermeta/spmd_rules/reduction.cc @@ -70,7 +70,7 @@ SpmdInfo ReductionInferSpmdBase(const DistMetaTensor& x, bool keep_dim, int reduce_type) { // Step0: Verify input args based on reduction logic - auto x_shape = phi::vectorize(x.dims()); + auto x_shape = common::vectorize(x.dims()); int x_ndim = x_shape.size(); auto x_dist_attr_src = x.dist_attr(); std::vector x_dims_mapping = x_dist_attr_src.dims_mapping(); @@ -170,8 +170,8 @@ SpmdInfo ReductionInferSpmdReverse(const DistMetaTensor& x, const std::vector& axis, bool keep_dim) { // Step0: Verify input args based on reduction logic - auto x_shape = phi::vectorize(x.dims()); - auto out_shape = phi::vectorize(out.dims()); + auto x_shape = common::vectorize(x.dims()); + auto out_shape = common::vectorize(out.dims()); int x_ndim = x_shape.size(); int out_ndim = out_shape.size(); auto out_dist_attr_src = out.dist_attr(); @@ -228,8 +228,8 @@ SpmdInfo ReductionGradInferSpmd(const DistMetaTensor& x, TensorDistAttr x_dist_attr = out_grad_dist_attr; TensorDistAttr x_grad_dist_attr = out_grad_dist_attr; - std::vector x_dim = phi::vectorize(x.dims()); - std::vector out_grad_dim = phi::vectorize(out_grad.dims()); + std::vector x_dim = common::vectorize(x.dims()); + std::vector out_grad_dim = common::vectorize(out_grad.dims()); if (x_dim.size() != out_grad_dim.size()) { auto dims_mapping = x_dist_attr.dims_mapping(); diff --git a/paddle/phi/infermeta/spmd_rules/replicated.cc b/paddle/phi/infermeta/spmd_rules/replicated.cc index d0c90f7b2d2a96..a6759a8ea3aa7e 100644 --- a/paddle/phi/infermeta/spmd_rules/replicated.cc +++ b/paddle/phi/infermeta/spmd_rules/replicated.cc @@ -72,7 +72,7 @@ SpmdInfo ReplicatedInferSpmd(const std::vector& ins, continue; } VLOG(4) << "Input" << std::to_string(i) << " shape: [" - << str_join(phi::vectorize(ins[i]->dims())) << "] " + << str_join(common::vectorize(ins[i]->dims())) << "] " << "src_dims_mapping: [" << str_join(ins[i]->dist_attr().dims_mapping()) << "] " << "dst_dims_mapping: [" @@ -81,7 +81,7 @@ SpmdInfo ReplicatedInferSpmd(const std::vector& ins, for (int64_t i = 0; i < noutputs; i++) { VLOG(4) << "Output" << std::to_string(i) << " shape: [" - << str_join(phi::vectorize(outs[i]->dims())) << "] " + << str_join(common::vectorize(outs[i]->dims())) << "] " << "dst_dims_mapping: [" << str_join(output_dist_attrs[i].dims_mapping()) << "]"; } @@ -122,7 +122,7 @@ SpmdInfo ReplicatedInferSpmdReverse( VLOG(4) << "ReplicatedSpmd InferBackward:"; for (int64_t i = 0; i < noutputs; i++) { VLOG(4) << "Output" << std::to_string(i) << " shape: [" - << str_join(phi::vectorize(outs[i]->dims())) << "] " + << str_join(common::vectorize(outs[i]->dims())) << "] " << "src_dims_mapping: [" << str_join(outs[i]->dist_attr().dims_mapping()) << "] " << "dst_dims_mapping: [" @@ -131,7 +131,7 @@ SpmdInfo ReplicatedInferSpmdReverse( for (int64_t i = 0; i < ninputs; i++) { VLOG(4) << "Input" << std::to_string(i) << " shape: [" - << str_join(phi::vectorize(ins[i]->dims())) << "] " + << str_join(common::vectorize(ins[i]->dims())) << "] " << "dst_dims_mapping: [" << str_join(dst_input_dist_attrs[i].dims_mapping()) << "]"; } diff --git a/paddle/phi/infermeta/spmd_rules/reshape.cc b/paddle/phi/infermeta/spmd_rules/reshape.cc index c1364dc97d876e..5f4f59f9995dd8 100644 --- a/paddle/phi/infermeta/spmd_rules/reshape.cc +++ b/paddle/phi/infermeta/spmd_rules/reshape.cc @@ -225,8 +225,8 @@ SpmdInfo ReshapeInferSpmdReverse(const DistMetaTensor& x, const DistMetaTensor& out, const std::vector& shape) { // Step0: Verify input args based on reshape logic - auto x_shape = phi::vectorize(x.dims()); - auto out_shape = phi::vectorize(out.dims()); + auto x_shape = common::vectorize(x.dims()); + auto out_shape = common::vectorize(out.dims()); int x_ndim = x_shape.size(); int out_ndim = out_shape.size(); auto out_dist_attr_src = out.dist_attr(); @@ -317,7 +317,7 @@ SpmdInfo ReshapeInferSpmdDynamic(const DistMetaTensor& x, SpmdInfo ReshapeGradInferSpmd(const DistMetaTensor& x_shape, const DistMetaTensor& out_grad) { - std::vector out_grad_shape = phi::vectorize(out_grad.dims()); + std::vector out_grad_shape = common::vectorize(out_grad.dims()); const auto& x_shape_dist_src = x_shape.dist_attr(); auto tmp = ReshapeInferSpmdDynamic(x_shape, out_grad_shape); // check no shard is needed diff --git a/paddle/phi/infermeta/spmd_rules/slice.cc b/paddle/phi/infermeta/spmd_rules/slice.cc index 73caa2e65aa45b..54e0233ac8e10f 100644 --- a/paddle/phi/infermeta/spmd_rules/slice.cc +++ b/paddle/phi/infermeta/spmd_rules/slice.cc @@ -29,7 +29,7 @@ using phi::distributed::auto_parallel::str_join; SpmdInfo SliceInferSpmdBase(const DistMetaTensor& input, const std::vector& axes) { // Step0: Verify input args based on slice logic - auto input_shape = phi::vectorize(input.dims()); + auto input_shape = common::vectorize(input.dims()); int input_ndim = input_shape.size(); auto input_dist_attr_src = input.dist_attr(); std::vector input_dims_mapping = input_dist_attr_src.dims_mapping(); @@ -109,12 +109,11 @@ SpmdInfo SliceInferSpmd(const DistMetaTensor& input, SpmdInfo SliceInferSpmdReverseBase(const DistMetaTensor& input, const DistMetaTensor& output, const std::vector& axes) { - // Step0: Verify input args based on slice logic - auto output_shape = phi::vectorize(output.dims()); + auto output_shape = common::vectorize(output.dims()); int out_ndim = output_shape.size(); auto out_dist_attr = output.dist_attr(); int out_dims_mapping_size = out_dist_attr.dims_mapping().size(); - auto input_shape = phi::vectorize(input.dims()); + auto input_shape = common::vectorize(input.dims()); int input_ndim = input_shape.size(); auto input_dist_attr = input.dist_attr(); std::vector input_dims_mapping = input_dist_attr.dims_mapping(); @@ -177,7 +176,7 @@ SpmdInfo SliceInferSpmdReverseBase(const DistMetaTensor& input, VLOG(4) << "SliceInferSpmdReverse:"; VLOG(4) << "Einsum Notation: " << input_axes << "-->" << out_axes; VLOG(4) << "Output" - << " shape: [" << str_join(phi::vectorize(output.dims())) << "] " + << " shape: [" << str_join(common::vectorize(output.dims())) << "] " << "axes: [" << str_join(axes) << "] " << "src_dims_mapping: [" << str_join(output.dist_attr().dims_mapping()) << "] " @@ -223,10 +222,10 @@ SpmdInfo SliceGradInferBase(const DistMetaTensor& input, auto out_dist_attr = out_grad.dist_attr(); input_dist_attr = UnShardTensorDims(input_dist_attr, axes); out_dist_attr = UnShardTensorDims(out_dist_attr, axes); - auto output_shape = phi::vectorize(out_grad.dims()); + auto output_shape = common::vectorize(out_grad.dims()); int out_ndim = output_shape.size(); int out_dims_mapping_size = out_dist_attr.dims_mapping().size(); - auto input_shape = phi::vectorize(input.dims()); + auto input_shape = common::vectorize(input.dims()); int input_ndim = input_shape.size(); std::vector input_dims_mapping = input_dist_attr.dims_mapping(); diff --git a/paddle/phi/infermeta/spmd_rules/softmax.cc b/paddle/phi/infermeta/spmd_rules/softmax.cc index 35f811ab99d2b7..1b3d9c5e56a946 100644 --- a/paddle/phi/infermeta/spmd_rules/softmax.cc +++ b/paddle/phi/infermeta/spmd_rules/softmax.cc @@ -30,7 +30,7 @@ using phi::distributed::auto_parallel::str_join; SpmdInfo SoftmaxInferSpmd(const DistMetaTensor& x, int axis) { // Step0: Verify input args based on softmax logic - auto x_shape = phi::vectorize(x.dims()); + auto x_shape = common::vectorize(x.dims()); int x_ndim = x_shape.size(); auto x_dist_attr_src = x.dist_attr(); std::vector x_dims_mapping = x_dist_attr_src.dims_mapping(); @@ -98,8 +98,8 @@ SpmdInfo SoftmaxInferSpmdReverse(const DistMetaTensor& x, const DistMetaTensor& out, int axis) { // Step0: verify input args based on softmax logic - auto x_shape = phi::vectorize(x.dims()); - auto out_shape = phi::vectorize(out.dims()); + auto x_shape = common::vectorize(x.dims()); + auto out_shape = common::vectorize(out.dims()); int x_ndim = x_shape.size(); int out_ndim = out_shape.size(); auto out_dist_attr_src = out.dist_attr(); diff --git a/paddle/phi/infermeta/spmd_rules/split.cc b/paddle/phi/infermeta/spmd_rules/split.cc index 19c1ff96558710..895075a52f4578 100644 --- a/paddle/phi/infermeta/spmd_rules/split.cc +++ b/paddle/phi/infermeta/spmd_rules/split.cc @@ -28,7 +28,7 @@ using phi::distributed::auto_parallel::str_join; SpmdInfo SplitWithNumInferSpmd(const DistMetaTensor& x, int num, int axis) { // Step0: Verify input args based on split logic - auto x_shape = phi::vectorize(x.dims()); + auto x_shape = common::vectorize(x.dims()); int x_ndim = x_shape.size(); auto x_dist_attr_src = x.dist_attr(); std::vector x_dims_mapping = x_dist_attr_src.dims_mapping(); @@ -105,8 +105,8 @@ SpmdInfo SplitWithNumInferSpmdReverse( int axis) { // Step0: Verify input args based on split logic int nouts = outs.size(); - int out_ndim = phi::vectorize(outs[0]->dims()).size(); - auto x_shape = phi::vectorize(x.dims()); + int out_ndim = common::vectorize(outs[0]->dims()).size(); + auto x_shape = common::vectorize(x.dims()); int x_ndim = x_shape.size(); auto x_dist_attr = x.dist_attr(); std::vector x_dims_mapping = x_dist_attr.dims_mapping(); @@ -125,7 +125,7 @@ SpmdInfo SplitWithNumInferSpmdReverse( x_ndim, out_ndim)); for (int i = 0; i < num; i++) { - auto shape = phi::vectorize(outs[i]->dims()); + auto shape = common::vectorize(outs[i]->dims()); int ndim = shape.size(); auto dist_attr = outs[i]->dist_attr(); int dims_mapping_size = dist_attr.dims_mapping().size(); @@ -187,7 +187,7 @@ SpmdInfo SplitWithNumInferSpmdReverse( VLOG(4) << "Einsum Notation: " << x_axes << "-->" << out_axes; for (int i = 0; i < nouts; i++) { VLOG(4) << "Output" << std::to_string(i) << " shape: [" - << str_join(phi::vectorize(outs[i]->dims())) << "] " + << str_join(common::vectorize(outs[i]->dims())) << "] " << "src_dims_mapping: [" << str_join(outs[i]->dist_attr().dims_mapping()) << "] " << "dst_dims_mapping: [" diff --git a/paddle/phi/infermeta/spmd_rules/squeeze.cc b/paddle/phi/infermeta/spmd_rules/squeeze.cc index 8080e6c3d24ac3..6f711e04d6e219 100644 --- a/paddle/phi/infermeta/spmd_rules/squeeze.cc +++ b/paddle/phi/infermeta/spmd_rules/squeeze.cc @@ -105,7 +105,7 @@ void MakeSqueezeDimTransReverseWithAxis( SpmdInfo SqueezeInferSpmd(const DistMetaTensor& x, const std::vector& axis) { // Step0: Verify input args based on squeeze logic - auto x_shape = phi::vectorize(x.dims()); + auto x_shape = common::vectorize(x.dims()); int x_ndim = x_shape.size(); auto x_dist_attr_src = x.dist_attr(); std::vector x_dims_mapping = x_dist_attr_src.dims_mapping(); @@ -177,9 +177,9 @@ SpmdInfo SqueezeInferSpmdReverse(const DistMetaTensor& x, const DistMetaTensor& out, const std::vector& axis) { // Step0: Verify input args based on squeeze logic - auto x_shape = phi::vectorize(x.dims()); + auto x_shape = common::vectorize(x.dims()); int x_ndim = x_shape.size(); - auto out_shape = phi::vectorize(out.dims()); + auto out_shape = common::vectorize(out.dims()); int out_ndim = out_shape.size(); auto out_dist_attr_src = out.dist_attr(); std::vector out_dims_mapping = out_dist_attr_src.dims_mapping(); diff --git a/paddle/phi/infermeta/spmd_rules/stack.cc b/paddle/phi/infermeta/spmd_rules/stack.cc index d6f667a7fbdfa9..5f3499a5b1ad7a 100644 --- a/paddle/phi/infermeta/spmd_rules/stack.cc +++ b/paddle/phi/infermeta/spmd_rules/stack.cc @@ -42,7 +42,7 @@ SpmdInfo StackInferSpmd(const std::vector& x, int axis) { x.end(), std::back_inserter(tensor_shapes), [](const DistMetaTensor& meta) { - return phi::vectorize(meta.dims()); + return common::vectorize(meta.dims()); }); bool all_empty = std::all_of(tensor_shapes.begin(), tensor_shapes.end(), IsEmpty); diff --git a/paddle/phi/infermeta/spmd_rules/transpose.cc b/paddle/phi/infermeta/spmd_rules/transpose.cc index 441ede3850d363..e4942f2e4718ef 100644 --- a/paddle/phi/infermeta/spmd_rules/transpose.cc +++ b/paddle/phi/infermeta/spmd_rules/transpose.cc @@ -50,7 +50,7 @@ void BuildEinsumNotation(const size_t x_ndim, SpmdInfo TransposeInferSpmd(const DistMetaTensor& x, const std::vector& perm) { // Step0: Verify input args based on transpose logic - std::vector x_shape = phi::vectorize(x.dims()); + std::vector x_shape = common::vectorize(x.dims()); size_t x_ndim = x_shape.size(); const TensorDistAttr& x_dist_attr_src = x.dist_attr(); std::vector x_dims_mapping = x_dist_attr_src.dims_mapping(); @@ -107,8 +107,8 @@ SpmdInfo TransposeInferSpmdReverse(const DistMetaTensor& x, const DistMetaTensor& out, const std::vector& perm) { // Step0: Verify input args based on transpose logic - const std::vector x_shape = phi::vectorize(x.dims()); - const std::vector out_shape = phi::vectorize(out.dims()); + const std::vector x_shape = common::vectorize(x.dims()); + const std::vector out_shape = common::vectorize(out.dims()); int x_ndim = x_shape.size(); int out_ndim = out_shape.size(); TensorDistAttr out_dist_attr_src = out.dist_attr(); @@ -171,7 +171,8 @@ SpmdInfo TransposeInferSpmdReverse(const DistMetaTensor& x, SpmdInfo TransposeGradInferSpmd(const DistMetaTensor& out_grad, const std::vector& perm) { - const std::vector out_grad_shape = phi::vectorize(out_grad.dims()); + const std::vector out_grad_shape = + common::vectorize(out_grad.dims()); size_t out_grad_ndim = out_grad_shape.size(); const std::vector out_grad_dims_mapping = out_grad.dist_attr().dims_mapping(); diff --git a/paddle/phi/infermeta/spmd_rules/triu.cc b/paddle/phi/infermeta/spmd_rules/triu.cc index 4414e3b14016f7..ed98889de4ea72 100644 --- a/paddle/phi/infermeta/spmd_rules/triu.cc +++ b/paddle/phi/infermeta/spmd_rules/triu.cc @@ -24,7 +24,7 @@ namespace distributed { using phi::distributed::auto_parallel::str_join; SpmdInfo TriuInferSpmdBase(const DistMetaTensor& x) { - auto x_shape = phi::vectorize(x.dims()); + auto x_shape = common::vectorize(x.dims()); int x_ndim = x_shape.size(); auto x_dist_attr_src = x.dist_attr(); std::vector x_dims_mapping = x_dist_attr_src.dims_mapping(); @@ -71,7 +71,7 @@ SpmdInfo TriuInferSpmd(const DistMetaTensor& x, int diagonal) { SpmdInfo TriuInferSpmdReverseBase(const DistMetaTensor& x, const DistMetaTensor& out) { - auto out_shape = phi::vectorize(out.dims()); + auto out_shape = common::vectorize(out.dims()); int out_ndim = out_shape.size(); auto out_dist_attr_src = out.dist_attr(); std::vector out_dims_mapping = out_dist_attr_src.dims_mapping(); @@ -117,7 +117,7 @@ SpmdInfo TriuInferSpmdReverse(const DistMetaTensor& x, } SpmdInfo TriuGradInferSpmdBase(const DistMetaTensor& out_grad) { - auto out_shape = phi::vectorize(out_grad.dims()); + auto out_shape = common::vectorize(out_grad.dims()); int out_ndim = out_shape.size(); auto out_dist_attr_src = out_grad.dist_attr(); const std::vector& out_dims_mapping = diff --git a/paddle/phi/infermeta/spmd_rules/unsqueeze.cc b/paddle/phi/infermeta/spmd_rules/unsqueeze.cc index a5819f5adac39a..935140a0509bab 100644 --- a/paddle/phi/infermeta/spmd_rules/unsqueeze.cc +++ b/paddle/phi/infermeta/spmd_rules/unsqueeze.cc @@ -92,7 +92,7 @@ std::vector> MakeUnsqueezeDimTransReverse( SpmdInfo UnsqueezeInferSpmd(const DistMetaTensor& x, const std::vector& axis) { // Step0: Verify input args based on unsqueeze logic - auto x_shape = phi::vectorize(x.dims()); + auto x_shape = common::vectorize(x.dims()); int x_ndim = x_shape.size(); auto x_dist_attr_src = x.dist_attr(); std::vector x_dims_mapping = x_dist_attr_src.dims_mapping(); @@ -161,9 +161,9 @@ SpmdInfo UnsqueezeInferSpmdReverse(const DistMetaTensor& x, const DistMetaTensor& out, const std::vector& axis) { // Step0: Verify input args based on unsqueeze logic - auto x_shape = phi::vectorize(x.dims()); + auto x_shape = common::vectorize(x.dims()); int x_ndim = x_shape.size(); - auto out_shape = phi::vectorize(out.dims()); + auto out_shape = common::vectorize(out.dims()); int out_ndim = out_shape.size(); auto out_dist_attr_src = out.dist_attr(); std::vector out_dims_mapping = out_dist_attr_src.dims_mapping(); diff --git a/paddle/phi/infermeta/spmd_rules/where.cc b/paddle/phi/infermeta/spmd_rules/where.cc index d5cd639f8d5798..b176365bb2d7d0 100644 --- a/paddle/phi/infermeta/spmd_rules/where.cc +++ b/paddle/phi/infermeta/spmd_rules/where.cc @@ -26,7 +26,7 @@ using phi::distributed::auto_parallel::str_join; SpmdInfo WhereInferSpmd(const DistMetaTensor& condition, const DistMetaTensor& x, const DistMetaTensor& y) { - auto cond_shape = phi::vectorize(condition.dims()); + auto cond_shape = common::vectorize(condition.dims()); int cond_ndim = cond_shape.size(); auto cond_dist_attr_src = condition.dist_attr(); std::vector cond_dims_mapping = cond_dist_attr_src.dims_mapping(); @@ -41,7 +41,7 @@ SpmdInfo WhereInferSpmd(const DistMetaTensor& condition, std::string alphabet = "abcdefghijklmnopqrstuvwxyz"; std::string cond_axes = alphabet.substr(0, cond_ndim); - auto x_shape = phi::vectorize(x.dims()); + auto x_shape = common::vectorize(x.dims()); int x_ndim = x_shape.size(); auto x_dist_attr_src = x.dist_attr(); std::vector x_dims_mapping = x_dist_attr_src.dims_mapping(); @@ -63,7 +63,7 @@ SpmdInfo WhereInferSpmd(const DistMetaTensor& condition, std::string x_axes = alphabet.substr(cond_ndim - x_ndim, x_ndim); - auto y_shape = phi::vectorize(y.dims()); + auto y_shape = common::vectorize(y.dims()); int y_ndim = y_shape.size(); auto y_dist_attr_src = y.dist_attr(); std::vector y_dims_mapping = y_dist_attr_src.dims_mapping(); @@ -134,7 +134,7 @@ SpmdInfo WhereInferSpmdReverse(const DistMetaTensor& condition, const DistMetaTensor& x, const DistMetaTensor& y, const DistMetaTensor& output) { - auto cond_shape = phi::vectorize(condition.dims()); + auto cond_shape = common::vectorize(condition.dims()); int cond_ndim = cond_shape.size(); auto cond_dist_attr_src = condition.dist_attr(); std::vector cond_dims_mapping = cond_dist_attr_src.dims_mapping(); @@ -149,7 +149,7 @@ SpmdInfo WhereInferSpmdReverse(const DistMetaTensor& condition, std::string alphabet = "abcdefghijklmnopqrstuvwxyz"; std::string cond_axes = alphabet.substr(0, cond_ndim); - auto x_shape = phi::vectorize(x.dims()); + auto x_shape = common::vectorize(x.dims()); int x_ndim = x_shape.size(); auto x_dist_attr_src = x.dist_attr(); std::vector x_dims_mapping = x_dist_attr_src.dims_mapping(); @@ -170,7 +170,7 @@ SpmdInfo WhereInferSpmdReverse(const DistMetaTensor& condition, std::string x_axes = alphabet.substr(cond_ndim - x_ndim, x_ndim); - auto y_shape = phi::vectorize(y.dims()); + auto y_shape = common::vectorize(y.dims()); int y_ndim = y_shape.size(); auto y_dist_attr_src = y.dist_attr(); std::vector y_dims_mapping = y_dist_attr_src.dims_mapping(); @@ -191,7 +191,7 @@ SpmdInfo WhereInferSpmdReverse(const DistMetaTensor& condition, std::string y_axes = alphabet.substr(cond_ndim - y_ndim, y_ndim); - auto out_shape = phi::vectorize(output.dims()); + auto out_shape = common::vectorize(output.dims()); int out_ndim = out_shape.size(); auto out_dist_attr_src = output.dist_attr(); std::vector out_dims_mapping = out_dist_attr_src.dims_mapping(); @@ -248,7 +248,7 @@ SpmdInfo WhereGradInferSpmd(const DistMetaTensor& condition, const DistMetaTensor& x, const DistMetaTensor& y, const DistMetaTensor& out_grad) { - auto cond_shape = phi::vectorize(condition.dims()); + auto cond_shape = common::vectorize(condition.dims()); int cond_ndim = cond_shape.size(); auto cond_dist_attr_src = condition.dist_attr(); std::vector cond_dims_mapping = cond_dist_attr_src.dims_mapping(); @@ -263,7 +263,7 @@ SpmdInfo WhereGradInferSpmd(const DistMetaTensor& condition, std::string alphabet = "abcdefghijklmnopqrstuvwxyz"; std::string cond_axes = alphabet.substr(0, cond_ndim); - auto x_shape = phi::vectorize(x.dims()); + auto x_shape = common::vectorize(x.dims()); int x_ndim = x_shape.size(); auto x_dist_attr_src = x.dist_attr(); std::vector x_dims_mapping = x_dist_attr_src.dims_mapping(); @@ -284,7 +284,7 @@ SpmdInfo WhereGradInferSpmd(const DistMetaTensor& condition, std::string x_axes = alphabet.substr(cond_ndim - x_ndim, x_ndim); - auto y_shape = phi::vectorize(y.dims()); + auto y_shape = common::vectorize(y.dims()); int y_ndim = y_shape.size(); auto y_dist_attr_src = y.dist_attr(); std::vector y_dims_mapping = y_dist_attr_src.dims_mapping(); @@ -305,7 +305,7 @@ SpmdInfo WhereGradInferSpmd(const DistMetaTensor& condition, std::string y_axes = alphabet.substr(cond_ndim - y_ndim, y_ndim); - auto out_grad_shape = phi::vectorize(out_grad.dims()); + auto out_grad_shape = common::vectorize(out_grad.dims()); int out_grad_ndim = out_grad_shape.size(); auto out_grad_dist_attr_src = out_grad.dist_attr(); std::vector out_grad_dims_mapping = diff --git a/paddle/phi/infermeta/strings/nullary.cc b/paddle/phi/infermeta/strings/nullary.cc index c2428a2ff3ae9e..80f75c0e067217 100644 --- a/paddle/phi/infermeta/strings/nullary.cc +++ b/paddle/phi/infermeta/strings/nullary.cc @@ -17,7 +17,7 @@ namespace phi { namespace strings { void CreateInferMeta(const IntArray& shape, MetaTensor* out) { - const auto& out_dims = phi::make_ddim(shape.GetData()); + const auto& out_dims = common::make_ddim(shape.GetData()); out->set_dims(out_dims); out->set_dtype(DataType::PSTRING); out->set_layout(DataLayout::PSTRING_UNION); diff --git a/paddle/phi/infermeta/ternary.cc b/paddle/phi/infermeta/ternary.cc index a38e9ca6f9a14f..27fe7dc19ae4cc 100644 --- a/paddle/phi/infermeta/ternary.cc +++ b/paddle/phi/infermeta/ternary.cc @@ -16,8 +16,8 @@ limitations under the License. */ #include "glog/logging.h" -#include "paddle/phi/common/layout.h" -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" +#include "paddle/common/layout.h" #include "paddle/phi/kernels/funcs/common_shape.h" #include "paddle/phi/kernels/impl/box_coder.h" @@ -66,9 +66,9 @@ void AccuracyInferMeta(const MetaTensor& out, label_dim[0])); } - accuracy->set_dims(phi::make_ddim({})); - correct->set_dims(phi::make_ddim({})); - total->set_dims(phi::make_ddim({})); + accuracy->set_dims(common::make_ddim({})); + correct->set_dims(common::make_ddim({})); + total->set_dims(common::make_ddim({})); accuracy->set_dtype(out.dtype()); correct->set_dtype(out.dtype()); total->set_dtype(out.dtype()); @@ -141,7 +141,7 @@ void AddmmInferMeta(const MetaTensor& input, output_dims.push_back(x_dims[0]); output_dims.push_back(y_dims[1]); - out->set_dims(make_ddim(output_dims)); + out->set_dims(common::make_ddim(output_dims)); out->share_lod(input); out->set_dtype(input.dtype()); } @@ -264,12 +264,12 @@ void DpsgdInferMeta(const MetaTensor& param, int size, MetaTensor* param_out) { auto lr_dims = learning_rate.dims(); - PADDLE_ENFORCE_EQ(phi::product(lr_dims), + PADDLE_ENFORCE_EQ(common::product(lr_dims), 1, phi::errors::InvalidArgument( "Learning rate should have 1 dimension. But Received " "LearningRate's dims [%s].", - phi::product(lr_dims))); + common::product(lr_dims))); auto param_dims = param.dims(); PADDLE_ENFORCE_EQ( param_dims, @@ -299,23 +299,23 @@ void ArangeTensorInferMeta(const MetaTensor& start, const MetaTensor& end, const MetaTensor& step, MetaTensor* out) { - PADDLE_ENFORCE_EQ(phi::product(start.dims()), + PADDLE_ENFORCE_EQ(common::product(start.dims()), 1, phi::errors::InvalidArgument( "The numel of Input(start) should be 1, but got %d", - phi::product(start.dims()))); + common::product(start.dims()))); - PADDLE_ENFORCE_EQ(phi::product(end.dims()), + PADDLE_ENFORCE_EQ(common::product(end.dims()), 1, phi::errors::InvalidArgument( "The numel of Input(end) should be 1, but got %d", - phi::product(end.dims()))); + common::product(end.dims()))); - PADDLE_ENFORCE_EQ(phi::product(step.dims()), + PADDLE_ENFORCE_EQ(common::product(step.dims()), 1, phi::errors::InvalidArgument( "The numel of Input(step) should be 1, but got %d", - phi::product(step.dims()))); + common::product(step.dims()))); out->set_dims({-1}); out->set_dtype(start.dtype()); @@ -334,7 +334,7 @@ void InstanceNormInferMeta(const MetaTensor& x, phi::errors::InvalidArgument( "The y in InstanceNormInferMeta can't be nullptr.")); const auto x_dims = x.dims(); - PADDLE_ENFORCE_NE(phi::product(x_dims), + PADDLE_ENFORCE_NE(common::product(x_dims), 0, phi::errors::PreconditionNotMet( "The Input variable X has not " @@ -373,7 +373,7 @@ void InstanceNormInferMeta(const MetaTensor& x, "of scale is [%d]", scale_dim, scale_dim.size())); - bool check = !((!config.is_runtime) && (phi::product(scale_dim) <= 0)); + bool check = !((!config.is_runtime) && (common::product(scale_dim) <= 0)); if (check) { PADDLE_ENFORCE_EQ(scale_dim[0], C, @@ -395,7 +395,7 @@ void InstanceNormInferMeta(const MetaTensor& x, "of bias is [%d]", bias_dim, bias_dim.size())); - bool check = !((!config.is_runtime) && (phi::product(bias_dim) <= 0)); + bool check = !((!config.is_runtime) && (common::product(bias_dim) <= 0)); if (check) { PADDLE_ENFORCE_EQ(bias_dim[0], C, @@ -458,7 +458,7 @@ void GroupNormInferMeta(const MetaTensor& x, x_dim.size(), x_dim)); - const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); + const DataLayout data_layout = common::StringToDataLayout(data_layout_str); const int64_t channel_num = (data_layout == DataLayout::kNCHW ? x_dim[1] : x_dim[x_dim.size() - 1]); auto batch_size = x_dim[0]; @@ -578,7 +578,7 @@ void LayerNormInferMeta(const MetaTensor& x, begin_norm_axis, x_dim.size())); - auto matrix_dim = phi::flatten_to_2d(x_dim, begin_norm_axis); + auto matrix_dim = common::flatten_to_2d(x_dim, begin_norm_axis); // keep the axis size before normalization for shape of variance and mean auto before_norm_dims = slice_ddim(x_dim, 0, begin_norm_axis); @@ -693,27 +693,27 @@ void LinspaceRawInferMeta(const MetaTensor& start, const MetaTensor& number, MetaTensor* out) { PADDLE_ENFORCE_EQ( - phi::product(start.dims()), + common::product(start.dims()), 1, phi::errors::InvalidArgument("The size of Input(start) should be 1," "but got %d.", - phi::product(start.dims()))); + common::product(start.dims()))); PADDLE_ENFORCE_EQ( - phi::product(stop.dims()), + common::product(stop.dims()), 1, phi::errors::InvalidArgument("The size of Input(stop) should be 1," "but got %d.", - phi::product(stop.dims()))); + common::product(stop.dims()))); PADDLE_ENFORCE_EQ( - phi::product(number.dims()), + common::product(number.dims()), 1, phi::errors::InvalidArgument("The size of Input(number) should be 1," "but got %d.", - phi::product(number.dims()))); + common::product(number.dims()))); - out->set_dims(phi::make_ddim({-1})); + out->set_dims(common::make_ddim({-1})); out->set_dtype(start.dtype()); } @@ -810,11 +810,11 @@ void MultiClassNMSInferMeta(const MetaTensor& bboxes, // Here the box_dims[0] is not the real dimension of output. // It will be rewritten in the computing kernel. - out->set_dims(phi::make_ddim({-1, box_dims[2] + 2})); + out->set_dims(common::make_ddim({-1, box_dims[2] + 2})); out->set_dtype(bboxes.dtype()); - index->set_dims(phi::make_ddim({-1, 1})); + index->set_dims(common::make_ddim({-1, 1})); index->set_dtype(DataType::INT32); - nms_rois_num->set_dims(phi::make_ddim({-1})); + nms_rois_num->set_dims(common::make_ddim({-1})); nms_rois_num->set_dtype(DataType::INT32); } @@ -832,8 +832,8 @@ void NllLossRawInferMeta(const MetaTensor& input, true, phi::errors::InvalidArgument( "The tensor rank of Input(X) must be 2 or 4.")); - bool contain_unknown_dim = - phi::contain_unknown_dim(x_dims) || phi::contain_unknown_dim(label_dims); + bool contain_unknown_dim = common::contain_unknown_dim(x_dims) || + common::contain_unknown_dim(label_dims); bool check = config.is_runtime || !contain_unknown_dim; if (check) { PADDLE_ENFORCE_EQ( @@ -867,7 +867,7 @@ void NllLossRawInferMeta(const MetaTensor& input, if (reduction == "none") { out->set_dims({x_dims[0]}); } else { - out->set_dims(phi::make_ddim({})); + out->set_dims(common::make_ddim({})); } } else if (x_dims.size() == 4) { PADDLE_ENFORCE_EQ(label_dims.size(), @@ -890,10 +890,10 @@ void NllLossRawInferMeta(const MetaTensor& input, if (reduction == "none") { out->set_dims({x_dims[0], x_dims[2], x_dims[3]}); } else { - out->set_dims(phi::make_ddim({})); + out->set_dims(common::make_ddim({})); } } - total_weight->set_dims(phi::make_ddim({})); + total_weight->set_dims(common::make_ddim({})); out->set_dtype(input.dtype()); total_weight->set_dtype(input.dtype()); } @@ -1250,9 +1250,9 @@ void SendURecvInferMeta(const MetaTensor& x, "Src_index and Dst_index should have the same shape.")); auto dims = x.dims(); - std::vector dims_ = phi::vectorize(dims); + std::vector dims_ = common::vectorize(dims); dims_[0] = -1; - out->set_dims(phi::make_ddim(dims_)); + out->set_dims(common::make_ddim(dims_)); out->set_dtype(x.dtype()); if (reduce_op == "MEAN") { @@ -1267,7 +1267,7 @@ void SparseMomentumInferMeta(const MetaTensor& param, MetaTensor* param_out, MetaTensor* velocity_out, MetaTensor* master_param_out) { - auto lr_dims = phi::product(learning_rate.dims()); + auto lr_dims = common::product(learning_rate.dims()); PADDLE_ENFORCE_EQ(lr_dims != 0 && lr_dims == 1, true, phi::errors::InvalidArgument( @@ -1500,7 +1500,7 @@ void QuantLinearInferMeta(const MetaTensor& x, std::vector output_dims; - auto in_mat_dims = phi::flatten_to_2d(in_dims, in_num_col_dims); + auto in_mat_dims = common::flatten_to_2d(in_dims, in_num_col_dims); auto w_dims0 = padding_weights ? w_dims[0] - 4 : w_dims[0]; auto w_dims1 = padding_weights ? w_dims[1] - 4 : w_dims[1]; PADDLE_ENFORCE_EQ( @@ -1514,14 +1514,14 @@ void QuantLinearInferMeta(const MetaTensor& x, in_mat_dims[1], in_mat_dims, w_dims0, - phi::make_ddim({w_dims0, w_dims1}))); + common::make_ddim({w_dims0, w_dims1}))); output_dims.reserve(static_cast(in_num_col_dims + 1)); for (int i = 0; i < in_num_col_dims; ++i) { output_dims.push_back(in_dims[i]); } output_dims.push_back(w_dims1); - y->set_dims(make_ddim(output_dims)); + y->set_dims(common::make_ddim(output_dims)); y->share_lod(x); y->set_dtype(x.dtype()); } diff --git a/paddle/phi/infermeta/unary.cc b/paddle/phi/infermeta/unary.cc index 26568d561ad007..bd3aea376503c3 100644 --- a/paddle/phi/infermeta/unary.cc +++ b/paddle/phi/infermeta/unary.cc @@ -37,12 +37,12 @@ namespace phi { namespace detail { // Used in MatrixRankInferMeta static DDim CheckAndGetOutputDim(const DDim& dim_x) { - auto x_vec = phi::vectorize(dim_x); + auto x_vec = common::vectorize(dim_x); if (x_vec.size() == 2) { - return phi::make_ddim({}); + return common::make_ddim({}); } x_vec.erase(x_vec.end() - 2, x_vec.end()); - return phi::make_ddim(x_vec); + return common::make_ddim(x_vec); } } // namespace detail @@ -115,10 +115,10 @@ void AffineGridInferMeta(const MetaTensor& input, theta_dims)); if (outputShape.GetData().size() == 4 && !is_from_tensor) { // N * H * W * 2 - output->set_dims(phi::make_ddim({theta_dims[0], -1, -1, 2})); + output->set_dims(common::make_ddim({theta_dims[0], -1, -1, 2})); } else { // N * D * H * W * 3 - output->set_dims(phi::make_ddim({theta_dims[0], -1, -1, -1, 3})); + output->set_dims(common::make_ddim({theta_dims[0], -1, -1, -1, 3})); } output->set_dtype(input.dtype()); output->share_lod(input); @@ -146,7 +146,7 @@ void AllToAllInferMeta(const MetaTensor& x, MetaTensor* out) { void ArrayLengthInferMeta(const MetaTensor& x, MetaTensor* out) { out->set_dtype(phi::DataType::INT64); - out->set_dims(make_ddim({1})); + out->set_dims(common::make_ddim({1})); } void ArrayToTensorInferMeta(const MetaTensor& x, @@ -158,21 +158,21 @@ void ArrayToTensorInferMeta(const MetaTensor& x, if (config.is_runtime) return; auto dims = x.dims(); // if the shape is empty - if (dims == phi::make_ddim({0UL})) return; + if (dims == common::make_ddim({0UL})) return; // otherwise, suppose the shape of array is the shape of tensor in the // array, which is consistent with what tensor_array_read_write dose if (use_stack) { - auto dim_vec = phi::vectorize(dims); + auto dim_vec = common::vectorize(dims); // use -1 for the stack dim size dim_vec.insert(dim_vec.begin() + axis, -1); - dims = phi::make_ddim(dim_vec); + dims = common::make_ddim(dim_vec); } else { // use -1 for the concat dim size dims[axis] = -1; } out->set_dims(dims); out_index->set_dtype(DataType::INT32); - out_index->set_dims(phi::make_ddim({-1})); + out_index->set_dims(common::make_ddim({-1})); } void ArgMinMaxInferMeta(const MetaTensor& x, @@ -208,7 +208,7 @@ void ArgMinMaxInferMeta(const MetaTensor& x, vec = std::vector(x.dims().size() - 1, -1); } } - out->set_dims(phi::make_ddim(vec)); + out->set_dims(common::make_ddim(vec)); if (dtype == DataType::INT32 || dtype == DataType::INT64) { out->set_dtype(dtype); } @@ -249,7 +249,7 @@ void ArgMinMaxInferMeta(const MetaTensor& x, if (dtype == DataType::INT32) { int64_t all_element_num = 0; if (flatten) { - all_element_num = phi::product(x_dims); + all_element_num = common::product(x_dims); } else { all_element_num = x_dims[static_cast(int_axis)]; } @@ -282,7 +282,7 @@ void ArgMinMaxInferMeta(const MetaTensor& x, vec.emplace_back(x_dims[static_cast(i)]); } - out->set_dims(phi::make_ddim(vec)); + out->set_dims(common::make_ddim(vec)); if (dtype == DataType::INT32 || dtype == DataType::INT64) { out->set_dtype(dtype); } @@ -325,9 +325,9 @@ void ArgsortInferMeta(const MetaTensor& input, } void AsRealInferMeta(const MetaTensor& input, MetaTensor* output) { - auto out_dims_v = phi::vectorize(input.dims()); + auto out_dims_v = common::vectorize(input.dims()); out_dims_v.push_back(2); - auto out_dims = phi::make_ddim(out_dims_v); + auto out_dims = common::make_ddim(out_dims_v); output->set_dims(out_dims); output->share_lod(input); output->set_dtype(dtype::ToReal(input.dtype())); @@ -374,7 +374,7 @@ void BatchSizeLikeInferMeta(const MetaTensor& x, std::transform(shape.begin(), shape.end(), shape_int64.begin(), [](int a) { return static_cast(a); }); - auto output_dim = phi::make_ddim(shape_int64); + auto output_dim = common::make_ddim(shape_int64); int input_dim_size = static_cast(x.dims().size()); PADDLE_ENFORCE_GE( @@ -459,7 +459,7 @@ void CINNBroadcastInferMeta(const MetaTensor& x, const std::vector& axes, const std::vector& out_shape, MetaTensor* out) { - out->set_dims(phi::make_ddim(out_shape)); + out->set_dims(common::make_ddim(out_shape)); out->set_dtype(x.dtype()); } @@ -488,7 +488,7 @@ void ClassCenterSampleInferMeta(const MetaTensor& label, "output of sampled local class center should not be null.")); remapped_label->set_dims(label.dims()); remapped_label->set_dtype(label.dtype()); - sampled_local_class_center->set_dims(phi::make_ddim({num_samples})); + sampled_local_class_center->set_dims(common::make_ddim({num_samples})); sampled_local_class_center->set_dtype(label.dtype()); } @@ -532,7 +532,7 @@ void CumInferMeta(const MetaTensor& x, MetaTensor* out) { auto x_dims = x.dims(); if (flatten) { - out->set_dims(phi::make_ddim({phi::product(x_dims)})); + out->set_dims(common::make_ddim({common::product(x_dims)})); out->set_dtype(x.dtype()); } else { if (x_dims.size() > 0) { @@ -598,7 +598,7 @@ void CumWithIndicesInferMeta(const MetaTensor& x, _axis = axis; } PADDLE_ENFORCE_LT( - phi::vectorize(x_dims)[_axis], + common::vectorize(x_dims)[_axis], INT32_MAX, phi::errors::OutOfRange( "cummax with axis %ld may be overflow, set dtype int64 to continue", @@ -678,7 +678,7 @@ void CropInferMeta(const MetaTensor& x, } } } - out->set_dims(phi::make_ddim(out_dims)); + out->set_dims(common::make_ddim(out_dims)); out->set_dtype(x.dtype()); } @@ -698,7 +698,7 @@ void DecodeJpegInferMeta(const MetaTensor& x, mode); } if (out != nullptr) { - out->set_dims(phi::make_ddim(out_dims)); + out->set_dims(common::make_ddim(out_dims)); out->set_dtype(x.dtype()); } } @@ -767,11 +767,11 @@ void DiagEmbedInferMeta( dim2)); int new_dim_len = static_cast(offset_ + x_dims[x_dims.size() - 1]); - auto sizes = vectorize(x_dims); + auto sizes = common::vectorize(x_dims); sizes.pop_back(); sizes.insert(sizes.begin() + std::min(dim1_, dim2_), new_dim_len); sizes.insert(sizes.begin() + std::max(dim1_, dim2_), new_dim_len); - out->set_dims(phi::make_ddim(sizes)); + out->set_dims(common::make_ddim(sizes)); out->set_dtype(x.dtype()); } @@ -873,7 +873,7 @@ void DiagonalInferMeta(const MetaTensor& input, axis1, axis2)); - auto out_dims = vectorize(x_dims); + auto out_dims = common::vectorize(x_dims); // from out_dims get the dim size of axis1_. auto axis1_size = out_dims[axis1_]; auto axis2_size = out_dims[axis2_]; @@ -903,7 +903,7 @@ void DiagonalInferMeta(const MetaTensor& input, out_dims.push_back(0); } } - out->set_dims(phi::make_ddim(out_dims)); + out->set_dims(common::make_ddim(out_dims)); out->set_dtype(input.dtype()); } @@ -962,7 +962,7 @@ void EigInferMeta(const MetaTensor& x, MetaTensor* out_w, MetaTensor* out_v) { const DataType& x_dtype = x.dtype(); const DataType& out_dtype = IsComplexType(x_dtype) ? x_dtype : ToComplexType(x_dtype); - out_w->set_dims(phi::make_ddim(batch_dims_vec)); + out_w->set_dims(common::make_ddim(batch_dims_vec)); out_w->set_dtype(out_dtype); out_v->set_dims(x_dims); out_v->set_dtype(out_dtype); @@ -1009,7 +1009,7 @@ void EighInferMeta(const MetaTensor& x, for (auto i = 0; i < rank - 1; i++) { values_dim.emplace_back(input_dim[i]); } - out_w->set_dims(phi::make_ddim(values_dim)); + out_w->set_dims(common::make_ddim(values_dim)); out_w->set_dtype(dtype::ToReal(x.dtype())); out_v->set_dims(input_dim); out_v->set_dtype(x.dtype()); @@ -1026,7 +1026,7 @@ void EigvalsInferMeta(const MetaTensor& x, MetaTensor* out, MetaConfig config) { x_dims.size(), x_dims)); - if (config.is_runtime || !phi::contain_unknown_dim(x_dims)) { + if (config.is_runtime || !common::contain_unknown_dim(x_dims)) { int last_dim = x_dims.size() - 1; PADDLE_ENFORCE_EQ(x_dims[last_dim], x_dims[last_dim - 1], @@ -1037,14 +1037,14 @@ void EigvalsInferMeta(const MetaTensor& x, MetaTensor* out, MetaConfig config) { x_dims)); } - auto out_dims = vectorize(x_dims); + auto out_dims = common::vectorize(x_dims); out_dims.resize(x_dims.size() - 1); const DataType& x_dtype = x.dtype(); const DataType& out_dtype = IsComplexType(x_dtype) ? x_dtype : ToComplexType(x_dtype); - out->set_dims(make_ddim(out_dims)); + out->set_dims(common::make_ddim(out_dims)); out->set_dtype(out_dtype); } @@ -1079,7 +1079,7 @@ void EigvalshInferMeta(const MetaTensor& x, } if (out_w != nullptr) { - out_w->set_dims(phi::make_ddim(values_dim)); + out_w->set_dims(common::make_ddim(values_dim)); out_w->set_dtype(dtype::ToReal(x.dtype())); } if (out_v != nullptr) { @@ -1127,7 +1127,7 @@ void EinsumInferMeta(const std::vector& inputs, << paddle::string::join_strings(output_dims, ","); VLOG(3) << "Label Type is : " << label_to_string(all_labels, labeltype); VLOG(3) << "Label Shape is : " << label_to_string(all_labels, labelshape); - out->set_dims(make_ddim(output_dims)); + out->set_dims(common::make_ddim(output_dims)); out->set_dtype(inputs[0]->dtype()); } @@ -1207,7 +1207,7 @@ void ExpandInferMeta(const MetaTensor& x, } } - out->set_dims(make_ddim(out_shape)); + out->set_dims(common::make_ddim(out_shape)); out->set_dtype(x.dtype()); if (out_rank > 0 && out_shape[0] == x_dims[0]) { out->share_lod(x); @@ -1403,7 +1403,7 @@ void FlattenWithXShapeInferMeta(const MetaTensor& x, for (int i = stop_axis + 1; i < in_dims_size; i++) { out_shape.push_back(x_dims[i]); // NOLINT } - const auto& out_dims = phi::make_ddim(out_shape); + const auto& out_dims = common::make_ddim(out_shape); out->set_dims(out_dims); out->set_dtype(x.dtype()); out->set_layout(x.layout()); @@ -1419,7 +1419,7 @@ void FlattenWithXShapeInferMeta(const MetaTensor& x, for (int i = 0; i < x_dims.size(); ++i) { xshape_dims[i + 1] = x_dims[i]; } - xshape->set_dims(phi::make_ddim(xshape_dims)); + xshape->set_dims(common::make_ddim(xshape_dims)); xshape->share_lod(x); } @@ -1493,7 +1493,7 @@ void FlipInferMeta(const MetaTensor& x, output_dims[i] = x_dims[i]; } - out->set_dims(phi::make_ddim(output_dims)); + out->set_dims(common::make_ddim(output_dims)); out->set_dtype(x.dtype()); out->share_lod(x); } @@ -1699,7 +1699,7 @@ void FoldInferMeta(const MetaTensor& x, out_dims.push_back(output_height); out_dims.push_back(output_width); if (out != nullptr) { - out->set_dims(phi::make_ddim(out_dims)); + out->set_dims(common::make_ddim(out_dims)); out->set_dtype(x.dtype()); } } @@ -1751,7 +1751,7 @@ void FrameInferMeta(const MetaTensor& x, end_axis = x_rank - 2; } - bool contain_unknown_dim = phi::contain_unknown_dim(x_dims); + bool contain_unknown_dim = common::contain_unknown_dim(x_dims); bool check = config.is_runtime || !contain_unknown_dim; if (check) { PADDLE_ENFORCE_LE(frame_length, @@ -1784,7 +1784,7 @@ void FrameInferMeta(const MetaTensor& x, output_shape.push_back(n_frames); } - out->set_dims(phi::make_ddim(output_shape)); + out->set_dims(common::make_ddim(output_shape)); out->set_dtype(x.dtype()); } @@ -1835,7 +1835,7 @@ void IdentityLossInferMeta(const MetaTensor& x, out->set_dtype(x.dtype()); out->set_dims(x.dims()); } else { - out->set_dims(phi::make_ddim({})); + out->set_dims(common::make_ddim({})); out->set_dtype(x.dtype()); } } @@ -1854,8 +1854,8 @@ void IncrementInferMeta(const MetaTensor& x, float value, MetaTensor* out) { static phi::DDim ValidateShape(const std::vector shape, const phi::DDim& in_dims) { - const int64_t in_size = phi::product(in_dims); - auto in_dims_vec = phi::vectorize(in_dims); + const int64_t in_size = common::product(in_dims); + auto in_dims_vec = common::vectorize(in_dims); std::vector output_shape(shape.size(), 0); int64_t capacity = 1; int unk_dim_idx = -1; @@ -1869,7 +1869,7 @@ static phi::DDim ValidateShape(const std::vector shape, phi::errors::InvalidArgument( "Only one dimension value of 'shape' in ReshapeOp can " "be -1. But received shape = [%s], shape[%d] is also -1.", - phi::make_ddim(shape), + common::make_ddim(shape), i)); unk_dim_idx = static_cast(i); output_shape[i] = shape[i]; @@ -1893,7 +1893,7 @@ static phi::DDim ValidateShape(const std::vector shape, "Each dimension value of 'shape' in ReshapeOp must not " "be negative except one unknown dimension. " "But received shape = [%s], shape[%d] = %d.", - phi::make_ddim(shape), + common::make_ddim(shape), i, shape[i])); output_shape[i] = shape[i]; @@ -1912,7 +1912,7 @@ static phi::DDim ValidateShape(const std::vector shape, "can not rehsape %s to %s, because the unspecified " "dimension %i can be any number and is ambiguous", in_dims, - phi::make_ddim(shape), + common::make_ddim(shape), unk_dim_idx)); } @@ -1934,7 +1934,7 @@ static phi::DDim ValidateShape(const std::vector shape, "'shape' is [%s], known capacity of 'shape' is %d.", in_dims, in_size, - phi::make_ddim(shape), + common::make_ddim(shape), capacity)); } else { // such as [-1, 8, 3]->[-1, 8], out_shape will remain [-1, 8] @@ -1953,12 +1953,12 @@ static phi::DDim ValidateShape(const std::vector shape, "[%s], the capacity of 'shape' is %d.", in_dims, in_size, - phi::make_ddim(shape), + common::make_ddim(shape), capacity)); } } - return phi::make_ddim(output_shape); + return common::make_ddim(output_shape); } void InferMetaFromVecValue(const MetaTensor& x, @@ -2015,7 +2015,7 @@ void InverseInferMeta(const MetaTensor& x, MetaTensor* out) { } void IsEmptyInferMeta(const MetaTensor& x, MetaTensor* out) { - out->set_dims(phi::make_ddim({})); + out->set_dims(common::make_ddim({})); out->set_dtype(DataType::BOOL); } @@ -2086,7 +2086,7 @@ void KthvalueInferMeta(const MetaTensor& x, for (int i = axis + 1; i < dim_size; i++) { dimvec.emplace_back(input_dims[i]); } - DDim dims = phi::make_ddim(dimvec); + DDim dims = common::make_ddim(dimvec); out->set_dims(dims); out->share_lod(x); out->set_dtype(x.dtype()); @@ -2163,17 +2163,17 @@ void LUInferMeta(const MetaTensor& x, int m = static_cast(x_dims[x_rank - 1]); int n = static_cast(x_dims[x_rank - 2]); int min_mn = std::min(m, n); - auto dims_vec = phi::vectorize(x_dims); + auto dims_vec = common::vectorize(x_dims); PADDLE_ENFORCE_NOT_NULL( infos, phi::errors::InvalidArgument("Output(Infos) should not be nullptr.")); if (x_rank == 2) { auto Infos_dim = std::vector(1); - infos->set_dims(phi::make_ddim(Infos_dim)); + infos->set_dims(common::make_ddim(Infos_dim)); } else { auto Infos_dim = std::vector(dims_vec.begin(), dims_vec.begin() + x_rank - 2); - infos->set_dims(phi::make_ddim(Infos_dim)); + infos->set_dims(common::make_ddim(Infos_dim)); } infos->set_dtype(DataType::INT32); if (pivot) { @@ -2183,7 +2183,7 @@ void LUInferMeta(const MetaTensor& x, auto Pivots_dim = std::vector(dims_vec.begin(), dims_vec.begin() + x_rank - 1); Pivots_dim[x_rank - 2] = min_mn; - pivots->set_dims(phi::make_ddim(Pivots_dim)); + pivots->set_dims(common::make_ddim(Pivots_dim)); pivots->set_dtype(DataType::INT32); } } @@ -2253,7 +2253,7 @@ void MaxOutInferMeta(const MetaTensor& x, std::vector output_shape( {in_x_dims[0], in_x_dims[1], in_x_dims[2], in_x_dims[3]}); output_shape[axis] = in_x_dims[axis] / groups; - out->set_dims(phi::make_ddim(output_shape)); + out->set_dims(common::make_ddim(output_shape)); out->set_dtype(x.dtype()); } @@ -2326,15 +2326,15 @@ void MaxPoolWithIndexInferMeta(const MetaTensor& x, } } - out->set_dims(make_ddim(output_shape)); + out->set_dims(common::make_ddim(output_shape)); out->set_dtype(x.dtype()); - mask->set_dims(make_ddim(output_shape)); + mask->set_dims(common::make_ddim(output_shape)); mask->set_dtype(phi::CppTypeToDataType::Type()); } void MeanAllInferMeta(const MetaTensor& x, MetaTensor* out) { - out->set_dims(phi::make_ddim({})); + out->set_dims(common::make_ddim({})); out->set_dtype(x.dtype()); out->set_layout(x.layout()); } @@ -2389,7 +2389,7 @@ void ModeInferMeta(const MetaTensor& x, for (int i = axis + 1; i < dim_size; i++) { dimvec.emplace_back(input_dims[i]); } - DDim dims = phi::make_ddim(dimvec); + DDim dims = common::make_ddim(dimvec); out->set_dims(dims); out->share_lod(x); out->set_dtype(x.dtype()); @@ -2436,7 +2436,7 @@ void MultinomialInferMeta(const MetaTensor& x, out_dims[x_rank - 1] = -1; } - out->set_dims(make_ddim(out_dims)); + out->set_dims(common::make_ddim(out_dims)); out->set_dtype(DataType::INT64); } @@ -2450,7 +2450,7 @@ void NanmedianInferMeta(const MetaTensor& x, int64_t x_rank = x_dim.size(); out->set_dtype(x.dtype()); median_index->set_dtype(DataType::INT64); - median_index->set_dims(make_ddim({x.numel() * 2})); + median_index->set_dims(common::make_ddim({x.numel() * 2})); std::vector out_dim; if (axis_list.empty()) { @@ -2506,7 +2506,7 @@ void NanmedianInferMeta(const MetaTensor& x, } } - out->set_dims(make_ddim(out_dim)); + out->set_dims(common::make_ddim(out_dim)); } void NMSInferMeta(const MetaTensor& x, float threshold, MetaTensor* out) { @@ -2518,7 +2518,7 @@ void NMSInferMeta(const MetaTensor& x, float threshold, MetaTensor* out) { "whose shape must be [N, 4] " "N is the number of boxes " "in last dimension in format [x1, x2, y1, y2]. ")); - out->set_dims(phi::make_ddim({-1})); + out->set_dims(common::make_ddim({-1})); out->set_dtype(DataType::INT64); } @@ -2529,7 +2529,7 @@ void NonZeroInferMeta(const MetaTensor& condition, MetaTensor* out) { 1UL, phi::errors::InvalidArgument( "Input(Condition) should have number of dimension at least 1")); - out->set_dims(phi::make_ddim({-1, rank})); + out->set_dims(common::make_ddim({-1, rank})); out->set_dtype(DataType::INT64); } @@ -2561,9 +2561,9 @@ void OneHotRawInferMeta(const MetaTensor& x, x_dims.size(), 0, phi::errors::InvalidArgument("Rank of Input(X) should be at least 0.")); - auto out_dims_vec = phi::vectorize(x_dims); + auto out_dims_vec = common::vectorize(x_dims); out_dims_vec.push_back(depth.to()); - auto out_dims = phi::make_ddim(out_dims_vec); + auto out_dims = common::make_ddim(out_dims_vec); out->set_dims(out_dims); out->share_lod(x); out->set_dtype(dtype); @@ -2579,9 +2579,9 @@ void OneHotInferMeta(const MetaTensor& x, phi::errors::InvalidArgument("Rank of Input(X) should be at least 0.")); int depth = depth_t.to(); - auto out_dims_vec = phi::vectorize(x_dims); + auto out_dims_vec = common::vectorize(x_dims); out_dims_vec.push_back(depth); - auto out_dims = phi::make_ddim(out_dims_vec); + auto out_dims = common::make_ddim(out_dims_vec); out->set_dims(out_dims); out->share_lod(x); @@ -2637,7 +2637,7 @@ void OverlapAddInferMeta(const MetaTensor& x, end_axis = x_rank - 3; } - bool contain_unknown_dim = phi::contain_unknown_dim(x_dims); + bool contain_unknown_dim = common::contain_unknown_dim(x_dims); bool check = config.is_runtime || !contain_unknown_dim; if (check) { PADDLE_ENFORCE_LE( @@ -2669,7 +2669,7 @@ void OverlapAddInferMeta(const MetaTensor& x, output_shape.push_back(seq_length); } - out->set_dims(phi::make_ddim(output_shape)); + out->set_dims(common::make_ddim(output_shape)); } void PadInferMeta(const MetaTensor& input, @@ -2704,7 +2704,7 @@ void PadInferMeta(const MetaTensor& input, out_dims[i] = x_dim[i] + paddings[i * 2] + paddings[i * 2 + 1]; } } - out->set_dims(phi::make_ddim(out_dims)); + out->set_dims(common::make_ddim(out_dims)); if (out_dims[0] == x_dim[0]) { // Only pass LoD when the first dimension is equal between // output and input. @@ -2786,7 +2786,7 @@ void Pad3dInferMeta(const MetaTensor& x, } } - out->set_dims(phi::make_ddim(out_dims)); + out->set_dims(common::make_ddim(out_dims)); out->set_dtype(x.dtype()); out->share_lod(x); } @@ -2986,7 +2986,7 @@ void PNormInferMeta(const MetaTensor& x, } } - out->set_dims(phi::make_ddim(out_dim_vector)); + out->set_dims(common::make_ddim(out_dim_vector)); out->set_dtype(x.dtype()); } @@ -3007,7 +3007,7 @@ void Pool2DInferMeta(const MetaTensor& x, (data_format == "NHWC" || data_format == "NDHWC"); if (!config.is_runtime && kernel_size.FromTensor()) { auto x_dims = x.dims(); - std::vector output_shape = std::move(phi::vectorize(x_dims)); + std::vector output_shape = std::move(common::vectorize(x_dims)); // set dims of HW -1 output_shape[x_dims.size() - 2] = -1; if (channel_last) { // for NHWC, NDHWC @@ -3015,7 +3015,7 @@ void Pool2DInferMeta(const MetaTensor& x, } else { // for NCHW output_shape[x_dims.size() - 1] = -1; } - out->set_dims(make_ddim(output_shape)); + out->set_dims(common::make_ddim(output_shape)); out->share_lod(x); out->set_dtype(x.dtype()); } else { @@ -3107,7 +3107,7 @@ void PoolInferMeta(const MetaTensor& x, x_dims.size(), x_dims, kernel_size_.size(), - make_ddim(kernel_size_))); + common::make_ddim(kernel_size_))); PADDLE_ENFORCE_EQ( kernel_size_.size(), @@ -3119,8 +3119,8 @@ void PoolInferMeta(const MetaTensor& x, "size is %d, Attr(kernel_size_) is [%s], Attr(strides)is [%s].", kernel_size_.size(), strides.size(), - make_ddim(kernel_size_), - make_ddim(strides))); + common::make_ddim(kernel_size_), + common::make_ddim(strides))); // MKL-DNN Kernels are using NCHW order of dims description // so we ignore data_format consideration for MKL-DNN kernel @@ -3175,7 +3175,7 @@ void PoolInferMeta(const MetaTensor& x, output_shape.insert(output_shape.begin() + 1, x_dims[1]); } - out->set_dims(make_ddim(output_shape)); + out->set_dims(common::make_ddim(output_shape)); out->share_lod(x); out->set_dtype(x.dtype()); } @@ -3205,18 +3205,18 @@ void QrInferMeta(const MetaTensor& x, if (compute_q) { int k = reduced_mode ? min_mn : m; - auto q_dims_vec = phi::vectorize(x_dims); + auto q_dims_vec = common::vectorize(x_dims); q_dims_vec[q_dims_vec.size() - 1] = k; - q->set_dims(phi::make_ddim(q_dims_vec)); + q->set_dims(common::make_ddim(q_dims_vec)); } else { - q->set_dims(phi::make_ddim({0})); + q->set_dims(common::make_ddim({0})); } int k = reduced_mode ? min_mn : m; - auto r_dims_vec = phi::vectorize(x_dims); + auto r_dims_vec = common::vectorize(x_dims); r_dims_vec[r_dims_vec.size() - 2] = k; r_dims_vec[r_dims_vec.size() - 1] = n; - r->set_dims(phi::make_ddim(r_dims_vec)); + r->set_dims(common::make_ddim(r_dims_vec)); q->share_lod(x); r->share_lod(x); @@ -3288,7 +3288,7 @@ DDim ReduceInferDim(const MetaTensor& x, } } - DDim out_dim = phi::make_ddim(out_dim_vector); + DDim out_dim = common::make_ddim(out_dim_vector); return out_dim; } @@ -3339,7 +3339,7 @@ DDim ReduceInferDimForIntArrayAxis(const MetaTensor& x, } } } - return phi::make_ddim(vec_dim); + return common::make_ddim(vec_dim); } void ReduceIntArrayAxisInferMetaBase(const MetaTensor& x, @@ -3390,7 +3390,7 @@ void RepeatInterleaveInferMeta(const MetaTensor& x, int dim, MetaTensor* out) { const auto& input_dim = x.dims(); - auto output_dim = phi::vectorize(input_dim); + auto output_dim = common::vectorize(input_dim); auto n_dim = dim; if (n_dim < 0) n_dim += input_dim.size(); @@ -3425,7 +3425,7 @@ void RepeatInterleaveInferMeta(const MetaTensor& x, "repeat_interleave's output tensor can't be nullptr")); output_dim[n_dim] = input_dim[n_dim] * repeats; - out->set_dims(phi::make_ddim(output_dim)); + out->set_dims(common::make_ddim(output_dim)); out->share_lod(x); out->set_dtype(x.dtype()); } @@ -3439,7 +3439,7 @@ void ReshapeInferMeta(const MetaTensor& x, phi::errors::InvalidArgument( "Output(Out) of ReshapeOp should not be null.")); if (!config.is_runtime && shape.FromTensor()) { - out->set_dims(phi::make_ddim(shape_data)); + out->set_dims(common::make_ddim(shape_data)); out->share_lod(x); return; } @@ -3461,7 +3461,7 @@ void ReshapeWithXShapeInferMeta(const MetaTensor& x, for (int i = 0; i < x_dims.size(); ++i) { xshape_dims[i + 1] = x_dims[i]; } - xshape->set_dims(phi::make_ddim(xshape_dims)); + xshape->set_dims(common::make_ddim(xshape_dims)); xshape->share_lod(x); xshape->set_strides(x.strides()); ReshapeInferMeta(x, shape, out, config); @@ -3620,7 +3620,7 @@ void SetValueInferMeta(const MetaTensor& x, MetaTensor* out) { void ShapeInferMeta(const MetaTensor& input, MetaTensor* out) { auto in_dim = input.dims(); - out->set_dims(phi::make_ddim({in_dim.size()})); + out->set_dims(common::make_ddim({in_dim.size()})); out->set_dtype(DataType::INT32); } @@ -3654,7 +3654,7 @@ void ShardIndexInferMeta(const MetaTensor& in, void NumelInferMeta(const MetaTensor& input, MetaTensor* out) { out->set_dtype(DataType::INT64); - out->set_dims(phi::make_ddim({})); + out->set_dims(common::make_ddim({})); } // This logic is copied from @@ -3862,7 +3862,7 @@ void SplitInferMeta(const MetaTensor& x, if ((sections.FromTensor() && !config.is_runtime) || axis_value == -1) { out_dims = std::vector( sections_data.size(), - phi::make_ddim(std::vector(x.dims().size(), -1))); + common::make_ddim(std::vector(x.dims().size(), -1))); } else { out_dims = std::vector(sections_data.size(), x.dims()); } @@ -3904,7 +3904,7 @@ void SplitInferMeta(const MetaTensor& x, "Only one dimension value of Attr(num_or_sections) " "in SplitOp can be -1. " "But received Attr(num_or_sections) = [%s].", - phi::make_ddim(sections_data))); + common::make_ddim(sections_data))); if (unknow_dim_idx != -1) { // for example, input shape = [4 ,5], axis = 1, sections = [2, 3, -1]. @@ -3919,7 +3919,7 @@ void SplitInferMeta(const MetaTensor& x, "size " "along the split dimension. But received Attr(num_or_sections) " "= [%s], input(X)'s shape = [%s], Attr(dim) = %d.", - phi::make_ddim(sections_data), + common::make_ddim(sections_data), x.dims(), axis_value)); @@ -3933,7 +3933,7 @@ void SplitInferMeta(const MetaTensor& x, "size " "along the split dimension. But received Attr(num_or_sections)" " = [%s], input(X)'s shape = [%s], Attr(dim) = %d.", - phi::make_ddim(sections_data), + common::make_ddim(sections_data), x.dims(), axis_value)); } @@ -3953,7 +3953,7 @@ void SplitWithNumInferMeta(const MetaTensor& x, std::vector out_dims; if (axis_value == -1) { out_dims = std::vector( - num, phi::make_ddim(std::vector(x.dims().size(), -1))); + num, common::make_ddim(std::vector(x.dims().size(), -1))); } else { out_dims = std::vector(num, x.dims()); } @@ -4024,7 +4024,7 @@ void SqueezeInferMeta(const MetaTensor& x, } std::vector vec_out_dims(output_size, -1); - out->set_dims(phi::make_ddim(vec_out_dims)); + out->set_dims(common::make_ddim(vec_out_dims)); } else { std::vector tmp; tmp.reserve(axes.GetData().size()); @@ -4055,7 +4055,7 @@ void SqueezeWithXShapeInferMeta(const MetaTensor& x, xshape_dims[i + 1] = x_dims[i]; } if (xshape) { - xshape->set_dims(phi::make_ddim(xshape_dims)); + xshape->set_dims(common::make_ddim(xshape_dims)); xshape->share_lod(x); xshape->set_dtype(x.dtype()); } @@ -4159,7 +4159,7 @@ void StridedSliceRawInferMeta(const MetaTensor& x, axes.size(), true); } - DDim out_dims(phi::make_ddim(out_dims_vector)); + DDim out_dims(common::make_ddim(out_dims_vector)); // generate new shape if (!decrease_axis.empty()) { std::vector new_out_shape; @@ -4180,7 +4180,7 @@ void StridedSliceRawInferMeta(const MetaTensor& x, new_out_shape.push_back(out_dims[i]); } } - out_dims = phi::make_ddim(new_out_shape); + out_dims = common::make_ddim(new_out_shape); } VLOG(4) << "out_dims: " << out_dims; out->set_dims(out_dims); @@ -4255,24 +4255,24 @@ void SvdInferMeta(const MetaTensor& x, MetaTensor* vh) { auto UDDim = [](const DDim& x_dim, int k) { // get x_dim and return the ddim of U - auto x_vec = vectorize(x_dim); + auto x_vec = common::vectorize(x_dim); x_vec[x_vec.size() - 1] = k; - return phi::make_ddim(x_vec); + return common::make_ddim(x_vec); }; auto VHDDim = [](const DDim& x_dim, int k) { // get x_dim and return the ddim of U - auto x_vec = vectorize(x_dim); + auto x_vec = common::vectorize(x_dim); x_vec[x_vec.size() - 2] = k; - return phi::make_ddim(x_vec); + return common::make_ddim(x_vec); }; auto SDDim = [](const DDim& x_dim, int k) { // get x_dim and return the ddim of U - auto x_vec = vectorize(x_dim); + auto x_vec = common::vectorize(x_dim); x_vec[x_vec.size() - 2] = k; x_vec.erase(x_vec.end() - 1); // rank - 1 - return phi::make_ddim(x_vec); + return common::make_ddim(x_vec); }; auto in_dims = x.dims(); @@ -4380,7 +4380,7 @@ void TileInferMeta(const MetaTensor& x, auto out_rank = std::max(static_cast(x_dims.size()), repeat_times_data.size()); std::vector out_shape(out_rank); - auto x_dim_vec = phi::vectorize(x_dims); + auto x_dim_vec = common::vectorize(x_dims); if (x_dim_vec.size() > repeat_times_data.size()) { auto diff = x_dim_vec.size() - repeat_times_data.size(); repeat_times_data.insert(repeat_times_data.begin(), diff, 1); @@ -4403,7 +4403,7 @@ void TileInferMeta(const MetaTensor& x, } } - out->set_dims(phi::make_ddim(out_shape)); + out->set_dims(common::make_ddim(out_shape)); if (out_rank > 0 && (out_shape[0] == x_dims[0])) { out->share_lod(x); } @@ -4529,14 +4529,14 @@ void TraceInferMeta( dim1, dim2)); - auto sizes = vectorize(x_dims); + auto sizes = common::vectorize(x_dims); if (x_dims.size() == 2) { sizes.clear(); } else { sizes.erase(sizes.begin() + std::max(dim1_, dim2_)); sizes.erase(sizes.begin() + std::min(dim1_, dim2_)); } - out->set_dims(phi::make_ddim(sizes)); + out->set_dims(common::make_ddim(sizes)); out->set_dtype(x.dtype()); } @@ -4634,7 +4634,7 @@ void UnbindInferMeta(const MetaTensor& x, for (int i = 0; i < in_dims.size(); ++i) { if (i != axis) out_dim.push_back(in_dims[i]); // NOLINT } - auto out_dims = phi::make_ddim(out_dim); + auto out_dims = common::make_ddim(out_dim); for (auto& out : outs) { out->set_dtype(x.dtype()); @@ -4872,7 +4872,7 @@ void UnfoldInferMeta(const MetaTensor& x, output_height == -1 || output_width == -1 ? -1 : output_col_length; } out_dims.push_back(output_col_length); - out->set_dims(phi::make_ddim(out_dims)); + out->set_dims(common::make_ddim(out_dims)); out->set_dtype(x.dtype()); } @@ -4954,7 +4954,7 @@ void UniqueConsecutiveInferMeta(const MetaTensor& x, out->set_dims({-1}); out->set_dtype(x.dtype()); if (return_inverse) { - index->set_dims({phi::product(in_dims)}); + index->set_dims({common::product(in_dims)}); } } else { int axis_value = axis[0]; @@ -5023,7 +5023,7 @@ void UniqueRawInferMeta(const MetaTensor& x, "The Input(X) should be 0-D or 1-D Tensor, " "But now the dims of Input(X) is %d.", x.dims().size())); - out->set_dims(phi::make_ddim({-1})); + out->set_dims(common::make_ddim({-1})); index->set_dims(x.dims()); return; } @@ -5038,9 +5038,9 @@ void UniqueRawInferMeta(const MetaTensor& x, } if (axis.empty()) { - out->set_dims(phi::make_ddim({-1})); + out->set_dims(common::make_ddim({-1})); if (return_inverse) { - index->set_dims(phi::make_ddim({phi::product(x.dims())})); + index->set_dims(common::make_ddim({common::product(x.dims())})); } } else { int axis_value = axis[0]; @@ -5067,16 +5067,16 @@ void UniqueRawInferMeta(const MetaTensor& x, out_dims[axis_value] = -1; out->set_dims(out_dims); if (return_inverse) { - index->set_dims(phi::make_ddim({x.dims()[axis_value]})); + index->set_dims(common::make_ddim({x.dims()[axis_value]})); index->set_dtype(dtype); } } if (return_index) { - indices->set_dims(phi::make_ddim({-1})); + indices->set_dims(common::make_ddim({-1})); indices->set_dtype(dtype); } if (return_counts) { - counts->set_dims(phi::make_ddim({-1})); + counts->set_dims(common::make_ddim({-1})); counts->set_dtype(dtype); } } @@ -5098,7 +5098,7 @@ void UnsqueezeInferMeta(const MetaTensor& x, int output_size = static_cast(x.dims().size() + axes.GetData().size()); std::vector vec_out_dims(output_size, -1); out->set_dtype(x.dtype()); - out->set_dims(phi::make_ddim(vec_out_dims)); + out->set_dims(common::make_ddim(vec_out_dims)); } else { auto out_dims = funcs::GetUnsqueezeShape(axes.GetData(), x_dims); out->set_dims(out_dims); @@ -5123,7 +5123,7 @@ void UnsqueezeWithXShapeInferMeta(const MetaTensor& x, xshape_dims[i + 1] = x_dims[i]; } if (xshape) { - xshape->set_dims(phi::make_ddim(xshape_dims)); + xshape->set_dims(common::make_ddim(xshape_dims)); xshape->share_lod(x); xshape->set_dtype(x.dtype()); } @@ -5168,10 +5168,10 @@ void UnStackInferMeta(const MetaTensor& x, x_dim[axis], num)); } - auto vec = phi::vectorize(x_dim); + auto vec = common::vectorize(x_dim); vec.erase(vec.begin() + axis); for (size_t i = 0; i < output_count; i++) { - outs[i]->set_dims(phi::make_ddim(vec)); + outs[i]->set_dims(common::make_ddim(vec)); outs[i]->set_dtype(x.dtype()); } } @@ -5217,11 +5217,11 @@ void WeightQuantizeInferMeta(const MetaTensor& x, "'llm.int8'], but got[%s]", algo); } - out->set_dims(phi::make_ddim(dim_out)); + out->set_dims(common::make_ddim(dim_out)); out->set_dtype(DataType::INT8); - scale->set_dims(phi::make_ddim(dim_scale)); + scale->set_dims(common::make_ddim(dim_scale)); scale->set_dtype(DataType::FLOAT32); } @@ -5280,9 +5280,9 @@ void CheckNumericsInferMeta(const MetaTensor& tensor, MetaTensor* stats, MetaTensor* values) { stats->set_dtype(DataType::INT64); - stats->set_dims(phi::make_ddim({3})); + stats->set_dims(common::make_ddim({3})); values->set_dtype(DataType::FLOAT32); - values->set_dims(phi::make_ddim({3})); + values->set_dims(common::make_ddim({3})); } void StridedUnChangedInferMeta(const MetaTensor& x, MetaTensor* out) { diff --git a/paddle/phi/kernels/array_kernel.cc b/paddle/phi/kernels/array_kernel.cc index 4217b41e2aed9a..8a599dcf9d80d8 100644 --- a/paddle/phi/kernels/array_kernel.cc +++ b/paddle/phi/kernels/array_kernel.cc @@ -14,8 +14,8 @@ #include "paddle/phi/kernels/array_kernel.h" +#include "paddle/common/layout.h" #include "paddle/phi/backends/cpu/cpu_context.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/concat_kernel.h" #include "paddle/phi/kernels/full_kernel.h" @@ -92,9 +92,9 @@ void ArrayToTensorKernel(const Context& dev_ctx, } } } - auto vec = phi::vectorize(out_dims); + auto vec = common::vectorize(out_dims); vec.insert(vec.begin() + axis, x.size()); // NOLINT - out->Resize(phi::make_ddim(vec)); + out->Resize(common::make_ddim(vec)); std::vector tmp_inputs(x.size()); std::vector inputs; @@ -115,7 +115,7 @@ void ArrayToTensorKernel(const Context& dev_ctx, ConcatKernel(dev_ctx, inputs, axis, out); } - out_index->Resize(phi::make_ddim({static_cast(x.size())})); + out_index->Resize(common::make_ddim({static_cast(x.size())})); StackKernel(dev_ctx, indexs, 0, out_index); } diff --git a/paddle/phi/kernels/assign_kernel.cc b/paddle/phi/kernels/assign_kernel.cc index 7c8ed23131a88b..b4504f83818d77 100644 --- a/paddle/phi/kernels/assign_kernel.cc +++ b/paddle/phi/kernels/assign_kernel.cc @@ -106,7 +106,7 @@ void AssignValueKernel(const Context& dev_ctx, dtype, template_dtype)); CopyVectorToTensor(dev_ctx, values, out); - out->Resize(phi::make_ddim(shape)); + out->Resize(common::make_ddim(shape)); } } // namespace phi diff --git a/paddle/phi/kernels/autotune/cache_base.h b/paddle/phi/kernels/autotune/cache_base.h index 68463e900c3578..82af1ccbb71325 100644 --- a/paddle/phi/kernels/autotune/cache_base.h +++ b/paddle/phi/kernels/autotune/cache_base.h @@ -18,8 +18,8 @@ #include #include +#include "paddle/common/errors.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/core/flags.h" PHI_DECLARE_int32(search_cache_max_number); diff --git a/paddle/phi/kernels/autotune/gpu_timer.h b/paddle/phi/kernels/autotune/gpu_timer.h index c50a571a7fd95d..1882c21b9cd72b 100644 --- a/paddle/phi/kernels/autotune/gpu_timer.h +++ b/paddle/phi/kernels/autotune/gpu_timer.h @@ -14,6 +14,7 @@ #pragma once +#include "paddle/common/errors.h" #include "paddle/phi/backends/context_pool.h" #include "paddle/phi/backends/dynload/port.h" #include "paddle/phi/backends/gpu/gpu_context.h" @@ -21,7 +22,6 @@ #include "paddle/phi/common/place.h" #include "paddle/phi/core/device_context.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #ifdef PADDLE_WITH_CUDA #include diff --git a/paddle/phi/kernels/coalesce_tensor_kernel.cc b/paddle/phi/kernels/coalesce_tensor_kernel.cc index 707218e9940981..a60369af449f4e 100644 --- a/paddle/phi/kernels/coalesce_tensor_kernel.cc +++ b/paddle/phi/kernels/coalesce_tensor_kernel.cc @@ -194,7 +194,7 @@ void CoalesceTensorKernel(const Context &dev_ctx, // Alloc the continuous space void *fused_tensor_ptr = dev_ctx.Alloc( - &fused_output->Resize(phi::make_ddim({static_cast(numel)})), + &fused_output->Resize(common::make_ddim({static_cast(numel)})), dtype); VLOG(10) << "Fused tensor addr " << fused_tensor_ptr; diff --git a/paddle/phi/kernels/cpu/affine_grid_grad_kernel.cc b/paddle/phi/kernels/cpu/affine_grid_grad_kernel.cc index 86568a0a018468..abd7188acefe50 100644 --- a/paddle/phi/kernels/cpu/affine_grid_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/affine_grid_grad_kernel.cc @@ -29,7 +29,7 @@ struct Linspace { bool align_corners, DenseTensor* numbers, const phi::CPUContext& dev_ctx) { - numbers->Resize(phi::make_ddim({count})); + numbers->Resize(common::make_ddim({count})); T* number_data = dev_ctx.template Alloc(numbers); T slice = (end - start) / (T)(count - 1); if (!align_corners) { @@ -55,7 +55,7 @@ void AffineGridGrad4DKernel(const Context& dev_ctx, int w = 0; h = static_cast(size_attr[2]); w = static_cast(size_attr[3]); - theta_grad->Resize(phi::make_ddim({n, 2, 3})); + theta_grad->Resize(common::make_ddim({n, 2, 3})); dev_ctx.template Alloc(theta_grad); phi::funcs::SetConstant()(dev_ctx, theta_grad, static_cast(0)); DenseTensor grid; @@ -94,7 +94,7 @@ void AffineGridGrad5DKernel(const Context& dev_ctx, d = static_cast(size_attr[2]); h = static_cast(size_attr[3]); w = static_cast(size_attr[4]); - theta_grad->Resize(phi::make_ddim({n, 3, 4})); + theta_grad->Resize(common::make_ddim({n, 3, 4})); dev_ctx.template Alloc(theta_grad); phi::funcs::SetConstant()(dev_ctx, theta_grad, static_cast(0)); DenseTensor grid; diff --git a/paddle/phi/kernels/cpu/affine_grid_kernel.cc b/paddle/phi/kernels/cpu/affine_grid_kernel.cc index 3ad0812f441f38..fef81c008e23a9 100644 --- a/paddle/phi/kernels/cpu/affine_grid_kernel.cc +++ b/paddle/phi/kernels/cpu/affine_grid_kernel.cc @@ -29,7 +29,7 @@ struct Linspace { bool align_corners, DenseTensor* numbers, const phi::CPUContext& dev_ctx) { - numbers->Resize(phi::make_ddim({count})); + numbers->Resize(common::make_ddim({count})); T* number_data = dev_ctx.template Alloc(numbers); T slice = (end - start) / (T)(count - 1); if (!align_corners) { @@ -55,7 +55,7 @@ void AffineGrid4DKernel(const Context& dev_ctx, int w = 0; h = static_cast(size_attr[2]); w = static_cast(size_attr[3]); - output->Resize(phi::make_ddim({n, h, w, 2})); + output->Resize(common::make_ddim({n, h, w, 2})); dev_ctx.template Alloc(output); phi::funcs::SetConstant()(dev_ctx, output, static_cast(0)); DenseTensor grid; @@ -89,7 +89,7 @@ void AffineGrid5DKernel(const Context& dev_ctx, d = static_cast(size_attr[2]); h = static_cast(size_attr[3]); w = static_cast(size_attr[4]); - output->Resize(phi::make_ddim({n, d, h, w, 3})); + output->Resize(common::make_ddim({n, d, h, w, 3})); dev_ctx.template Alloc(output); phi::funcs::SetConstant()(dev_ctx, output, static_cast(0)); DenseTensor grid; diff --git a/paddle/phi/kernels/cpu/arange_kernel.cc b/paddle/phi/kernels/cpu/arange_kernel.cc index b2684b2f6159af..4120e49c6af2fd 100644 --- a/paddle/phi/kernels/cpu/arange_kernel.cc +++ b/paddle/phi/kernels/cpu/arange_kernel.cc @@ -28,7 +28,7 @@ void ArangeFunc(const Context& dev_ctx, DenseTensor* out) { int64_t size = 0; phi::funcs::GetSize(start_value, end_value, step_value, &size); - out->Resize(phi::make_ddim({size})); + out->Resize(common::make_ddim({size})); T* out_data = dev_ctx.template Alloc(out); T value = start_value; for (int64_t i = 0; i < size; ++i) { diff --git a/paddle/phi/kernels/cpu/arg_min_max_kernel.cc b/paddle/phi/kernels/cpu/arg_min_max_kernel.cc index ce00926101f2cc..351701c97f675a 100644 --- a/paddle/phi/kernels/cpu/arg_min_max_kernel.cc +++ b/paddle/phi/kernels/cpu/arg_min_max_kernel.cc @@ -14,8 +14,8 @@ #include "paddle/phi/kernels/arg_min_max_kernel.h" +#include "paddle/common/ddim.h" #include "paddle/phi/backends/cpu/cpu_context.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/kernels/funcs/eigen/common.h" @@ -96,8 +96,8 @@ struct VisitDataArgMinMaxFunctor { int new_axis = axis; if (flatten) { // always reduce 1D -> 0D - x_dims = phi::make_ddim({x.numel()}); - out_dims = phi::make_ddim({}); + x_dims = common::make_ddim({x.numel()}); + out_dims = common::make_ddim({}); new_axis = 0; } else { x_dims = x.dims(); diff --git a/paddle/phi/kernels/cpu/argsort_grad_kernel.cc b/paddle/phi/kernels/cpu/argsort_grad_kernel.cc index 9958a23254f027..92135f1eb02346 100644 --- a/paddle/phi/kernels/cpu/argsort_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/argsort_grad_kernel.cc @@ -74,7 +74,7 @@ void ArgsortGradKernel(const Context& dev_ctx, // Do full assign if (axis == -1 || axis + 1 == in_dims.size()) { const int64_t input_height = - phi::product(phi::slice_ddim(in_dims, 0, in_dims.size() - 1)); + common::product(common::slice_ddim(in_dims, 0, in_dims.size() - 1)); const int64_t input_width = in_dims[in_dims.size() - 1]; FullAssign(input_height, @@ -108,8 +108,8 @@ void ArgsortGradKernel(const Context& dev_ctx, TransposeKernel(dev_ctx, out_grad, trans, &trans_dO); TransposeKernel(dev_ctx, indices, trans, &trans_ind); - const int64_t input_height = - phi::product(phi::slice_ddim(trans_dims, 0, trans_dims.size() - 1)); + const int64_t input_height = common::product( + common::slice_ddim(trans_dims, 0, trans_dims.size() - 1)); const int64_t input_width = trans_dims[trans_dims.size() - 1]; DenseTensor tmp_out; diff --git a/paddle/phi/kernels/cpu/argsort_kernel.cc b/paddle/phi/kernels/cpu/argsort_kernel.cc index ba78865d40acc9..cfca255e947948 100644 --- a/paddle/phi/kernels/cpu/argsort_kernel.cc +++ b/paddle/phi/kernels/cpu/argsort_kernel.cc @@ -91,7 +91,7 @@ void ArgsortKernel(const Context& dev_ctx, // Do full sort if (axis == -1 || axis + 1 == in_dims.size()) { const int64_t input_height = - phi::product(phi::slice_ddim(in_dims, 0, in_dims.size() - 1)); + common::product(common::slice_ddim(in_dims, 0, in_dims.size() - 1)); const int64_t input_width = in_dims[in_dims.size() - 1]; int64_t* ids_data = dev_ctx.template Alloc(indices); FullSort(input_height, @@ -123,8 +123,8 @@ void ArgsortKernel(const Context& dev_ctx, // Do transpose TransposeKernel(dev_ctx, input, trans, &trans_inp); - const int64_t input_height = - phi::product(phi::slice_ddim(trans_dims, 0, trans_dims.size() - 1)); + const int64_t input_height = common::product( + common::slice_ddim(trans_dims, 0, trans_dims.size() - 1)); const int64_t input_width = trans_dims[trans_dims.size() - 1]; DenseTensor tmp_out; diff --git a/paddle/phi/kernels/cpu/assign_pos_kernel.cc b/paddle/phi/kernels/cpu/assign_pos_kernel.cc index ceab18c5ecc7b4..7bad2262dad685 100644 --- a/paddle/phi/kernels/cpu/assign_pos_kernel.cc +++ b/paddle/phi/kernels/cpu/assign_pos_kernel.cc @@ -13,7 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/assign_pos_kernel.h" -#include "paddle/phi/core/errors.h" +#include "paddle/common/errors.h" #include "paddle/phi/core/kernel_registry.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/batch_norm_grad_kernel.cc b/paddle/phi/kernels/cpu/batch_norm_grad_kernel.cc index 7dc8f39da05132..23296fd352d15a 100644 --- a/paddle/phi/kernels/cpu/batch_norm_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/batch_norm_grad_kernel.cc @@ -58,7 +58,7 @@ void BatchNormGradFunctor(const Context& ctx, DenseTensor* bias_grad) { const auto* d_y = &y_grad; - DataLayout data_layout = phi::StringToDataLayout(data_layout_str); + DataLayout data_layout = common::StringToDataLayout(data_layout_str); auto* d_x = x_grad; auto* d_scale = scale_grad; @@ -381,7 +381,7 @@ void BatchNormDoubleGradKernel( "you want to use global status in pre_train model, " "please set `use_global_stats = True`")); - const auto data_layout = phi::StringToDataLayout(data_layout_str); + const auto data_layout = common::StringToDataLayout(data_layout_str); const auto* ddX = x_grad_grad.get_ptr(); const auto* ddScale = scale_grad_grad.get_ptr(); diff --git a/paddle/phi/kernels/cpu/batch_norm_kernel.cc b/paddle/phi/kernels/cpu/batch_norm_kernel.cc index e6acb16a89185a..b0ee0b52cdd1c3 100644 --- a/paddle/phi/kernels/cpu/batch_norm_kernel.cc +++ b/paddle/phi/kernels/cpu/batch_norm_kernel.cc @@ -55,7 +55,7 @@ void BatchNormKernel(const Context& ctx, bool global_stats = test_mode || use_global_stats; - auto data_layout = phi::StringToDataLayout(data_layout_str); + auto data_layout = common::StringToDataLayout(data_layout_str); const auto& x_dims = x.dims(); PADDLE_ENFORCE_GE( diff --git a/paddle/phi/kernels/cpu/concat_kernel.cc b/paddle/phi/kernels/cpu/concat_kernel.cc index fab30d620c10f6..4c49df6a2c7966 100644 --- a/paddle/phi/kernels/cpu/concat_kernel.cc +++ b/paddle/phi/kernels/cpu/concat_kernel.cc @@ -84,8 +84,8 @@ void ConcatKernel(const Context& dev_ctx, if (in->numel() == 0UL) { continue; } - auto in_stride = phi::stride_numel(in->dims()); - auto out_stride = phi::stride_numel(out->dims()); + auto in_stride = common::stride_numel(in->dims()); + auto out_stride = common::stride_numel(out->dims()); phi::funcs::StridedNumelCopyWithAxis( dev_ctx, axis, diff --git a/paddle/phi/kernels/cpu/conv_util.h b/paddle/phi/kernels/cpu/conv_util.h index f051002d367008..8c8ba34d784622 100644 --- a/paddle/phi/kernels/cpu/conv_util.h +++ b/paddle/phi/kernels/cpu/conv_util.h @@ -14,7 +14,7 @@ #pragma once -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" #include "paddle/phi/core/meta_tensor.h" namespace phi { @@ -27,7 +27,7 @@ inline void UpdatePaddingAndDilation(std::vector* paddings, const std::vector& strides, const std::vector& ksize) { // set padding size == data_dims.size() * 2 - auto data_shape = vectorize(data_dims); + auto data_shape = common::vectorize(data_dims); if (static_cast(paddings->size()) == data_dims.size()) { for (int i = 0; i < data_dims.size(); ++i) { T copy_pad = *(paddings->begin() + 2 * i); @@ -43,7 +43,7 @@ inline void UpdatePaddingAndDilation(std::vector* paddings, "But received: padding's size is %d, padding is [%s]; input's " "dimension is %d, input's shape is [%s].", paddings->size(), - make_ddim(*paddings), + common::make_ddim(*paddings), data_dims.size(), data_dims)); } @@ -173,7 +173,7 @@ inline std::vector ComputeOutputShape( in_dims.size(), in_dims, strides.size(), - phi::make_ddim(strides), + common::make_ddim(strides), in_dims.size() - stride_size)); const auto input_channels = @@ -218,19 +218,20 @@ inline std::vector ComputeOutputShape( phi::DDim in_data_dims; if (channel_last) { - in_data_dims = phi::slice_ddim(in_dims, 1, in_dims.size() - 1); + in_data_dims = common::slice_ddim(in_dims, 1, in_dims.size() - 1); } else { - in_data_dims = phi::slice_ddim(in_dims, 2, in_dims.size()); + in_data_dims = common::slice_ddim(in_dims, 2, in_dims.size()); } phi::DDim filter_data_dims; if (channel_last) { - filter_data_dims = phi::slice_ddim(filter_dims, 1, filter_dims.size() - 1); + filter_data_dims = + common::slice_ddim(filter_dims, 1, filter_dims.size() - 1); } else { - filter_data_dims = phi::slice_ddim(filter_dims, 2, filter_dims.size()); + filter_data_dims = common::slice_ddim(filter_dims, 2, filter_dims.size()); } - std::vector ksize = phi::vectorize(filter_data_dims); + std::vector ksize = common::vectorize(filter_data_dims); std::vector paddings_vec = paddings; std::vector dilations_vec = dilations; phi::UpdatePaddingAndDilation(&paddings_vec, diff --git a/paddle/phi/kernels/cpu/cum_maxmin_kernel.cc b/paddle/phi/kernels/cpu/cum_maxmin_kernel.cc index 881664601b85c4..72683b003685b1 100644 --- a/paddle/phi/kernels/cpu/cum_maxmin_kernel.cc +++ b/paddle/phi/kernels/cpu/cum_maxmin_kernel.cc @@ -59,7 +59,7 @@ void ComputeImp(const DenseTensor& x, int64_t x_stride = compute_stride(axis, x.dims()); int64_t values_stride = compute_stride(axis, out->dims()); int64_t indices_stride = compute_stride(axis, indices->dims()); - auto x_dim_vec = phi::vectorize(x.dims()); + auto x_dim_vec = common::vectorize(x.dims()); int x_dim_size = x_dim_vec[axis]; BinaryFunction op; diff --git a/paddle/phi/kernels/cpu/cumprod_grad_kernel.cc b/paddle/phi/kernels/cpu/cumprod_grad_kernel.cc index a2cc99c59fe2d8..cd4e90d2c7918f 100644 --- a/paddle/phi/kernels/cpu/cumprod_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/cumprod_grad_kernel.cc @@ -14,9 +14,9 @@ #include "paddle/phi/kernels/cumprod_grad_kernel.h" +#include "paddle/common/ddim.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/allocator.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/complex_functors.h" #include "paddle/phi/kernels/funcs/cumprod.h" diff --git a/paddle/phi/kernels/cpu/diagonal_grad_kernel.cc b/paddle/phi/kernels/cpu/diagonal_grad_kernel.cc index d8383b45beb799..dcca60f97b30c9 100644 --- a/paddle/phi/kernels/cpu/diagonal_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/diagonal_grad_kernel.cc @@ -30,11 +30,11 @@ void DiagonalGradKernel(const Context& dev_ctx, DenseTensor* in_grad) { const auto* dout = &out_grad; const T* dout_data = dout->data(); - auto dout_dim = vectorize(dout->dims()); + auto dout_dim = common::vectorize(dout->dims()); auto* dx = in_grad; T* dx_data = dev_ctx.template Alloc(dx); - auto dx_dim = vectorize(dx->dims()); + auto dx_dim = common::vectorize(dx->dims()); auto dx_dim_size = dx_dim.size(); const int64_t offset_ = offset; diff --git a/paddle/phi/kernels/cpu/diagonal_kernel.cc b/paddle/phi/kernels/cpu/diagonal_kernel.cc index 58d542a1b7d328..cd71c6f06fd0af 100644 --- a/paddle/phi/kernels/cpu/diagonal_kernel.cc +++ b/paddle/phi/kernels/cpu/diagonal_kernel.cc @@ -29,12 +29,12 @@ void DiagonalKernel(const Context& dev_ctx, DenseTensor* out) { auto* input = &x; const T* input_data = input->data(); - auto input_dim = vectorize(input->dims()); + auto input_dim = common::vectorize(input->dims()); auto input_dim_size = input_dim.size(); auto* output = out; T* output_data = dev_ctx.template Alloc(output); - auto output_dim = vectorize(output->dims()); + auto output_dim = common::vectorize(output->dims()); auto output_dim_size = output_dim.size(); const int64_t offset_ = offset; diff --git a/paddle/phi/kernels/cpu/dropout_grad_kernel.cc b/paddle/phi/kernels/cpu/dropout_grad_kernel.cc index 445e92716a899d..9a48fb3994adb4 100644 --- a/paddle/phi/kernels/cpu/dropout_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/dropout_grad_kernel.cc @@ -46,7 +46,7 @@ void DropoutNdGradKernel(const Context& dev_ctx, dX.device(place) = dY * static_cast(1.0f - prob); } } else { - std::vector out_dims = phi::vectorize(out_grad.dims()); + std::vector out_dims = common::vectorize(out_grad.dims()); auto M = EigenVector::Flatten(mask); if (dropout_implementation == "upscale_in_train") { if (prob == 1.0f) { diff --git a/paddle/phi/kernels/cpu/dropout_kernel.cc b/paddle/phi/kernels/cpu/dropout_kernel.cc index 79d805f62d2029..322ce0110d2bc0 100644 --- a/paddle/phi/kernels/cpu/dropout_kernel.cc +++ b/paddle/phi/kernels/cpu/dropout_kernel.cc @@ -65,7 +65,7 @@ void DropoutRawKernel(const Context& dev_ctx, bool upscale_in_train = (dropout_implementation == "upscale_in_train"); if (!is_test && mask) { auto* mask_data = dev_ctx.template Alloc(mask); - size_t size = phi::product(mask->dims()); + size_t size = common::product(mask->dims()); // Special case when dropout_prob is 1.0 if (dropout_prob == 1.0f) { @@ -135,7 +135,7 @@ void DropoutNdKernel(const Context& dev_ctx, t_mask.Resize(mask->dims()); T* t_mask_data = dev_ctx.template Alloc(&t_mask); auto* mask_data = dev_ctx.template Alloc(mask); - size_t size = phi::product(mask->dims()); + size_t size = common::product(mask->dims()); // Special case when dropout_prob is 1.0 if (dropout_prob == 1.0f) { diff --git a/paddle/phi/kernels/cpu/eig.h b/paddle/phi/kernels/cpu/eig.h index 3ec862c1d471b2..e23b27598c46d2 100644 --- a/paddle/phi/kernels/cpu/eig.h +++ b/paddle/phi/kernels/cpu/eig.h @@ -149,7 +149,7 @@ void LapackEig(DenseTensor* input, DenseTensor rwork; phi::dtype::Real* rwork_data = nullptr; - rwork.Resize(phi::make_ddim({lda * 2})); + rwork.Resize(common::make_ddim({lda * 2})); rwork_data = dev_ctx.template Alloc>(&rwork); // call lapackEig once to compute the size of work; @@ -172,7 +172,7 @@ void LapackEig(DenseTensor* input, lwork = std::max( 1, static_cast(phi::dtype::Real(computed_work_size))); DenseTensor work; - work.Resize(phi::make_ddim({lwork})); + work.Resize(common::make_ddim({lwork})); T* work_data = dev_ctx.template Alloc(&work); for (auto i = 0; i < batch_count; ++i) { @@ -217,8 +217,8 @@ void ApplyEigKernel(const DenseTensor& input, DenseTensor vectors_row_major; int num_dims = input.dims().size(); - // transfer to column-major memory layout i.e. make_ddim from tranposed_input: - // [batch,row,col]->[batch,col,row] + // transfer to column-major memory layout i.e. common::make_ddim from + // tranposed_input: [batch,row,col]->[batch,col,row] TransposeTwoAxis( input, &input_column_major, num_dims - 1, num_dims - 2, dev_ctx); // make sure 'vectors_row_major' holds memory before passed to LapackEig() diff --git a/paddle/phi/kernels/cpu/eig_kernel.cc b/paddle/phi/kernels/cpu/eig_kernel.cc index 8f5905f8f10892..f59e1abb7f0541 100644 --- a/paddle/phi/kernels/cpu/eig_kernel.cc +++ b/paddle/phi/kernels/cpu/eig_kernel.cc @@ -45,12 +45,12 @@ void EigKernel(const Context& dev_ctx, // double the size of real_w, the first half stores the real part, // the next half stores the imag part - std::vector origin_dim = phi::vectorize(out_w->dims()); + std::vector origin_dim = common::vectorize(out_w->dims()); int last_item = origin_dim.back(); origin_dim.pop_back(); origin_dim.push_back(last_item * 2); - phi::DDim big_dim = phi::make_ddim(origin_dim); + phi::DDim big_dim = common::make_ddim(origin_dim); real_w.Resize(big_dim); dev_ctx.template Alloc>(&real_w); diff --git a/paddle/phi/kernels/cpu/eigvals_kernel.cc b/paddle/phi/kernels/cpu/eigvals_kernel.cc index cd4aaca2ecf83f..f716a4de539222 100644 --- a/paddle/phi/kernels/cpu/eigvals_kernel.cc +++ b/paddle/phi/kernels/cpu/eigvals_kernel.cc @@ -78,7 +78,7 @@ typename std::enable_if::value>::type LapackEigvals( DenseTensor w; int64_t n_dim = input.dims()[1]; - w.Resize(make_ddim({n_dim << 1})); + w.Resize(common::make_ddim({n_dim << 1})); T* w_data = ctx.template Alloc(&w); int64_t work_mem = static_cast(work->memory_size()); @@ -190,9 +190,9 @@ void SpiltBatchSquareMatrix(const DenseTensor& input, DDim flattened_input_dims, flattened_output_dims; if (input_dims.size() > 2) { flattened_input_dims = - phi::flatten_to_3d(input_dims, last_dim - 1, last_dim); + common::flatten_to_3d(input_dims, last_dim - 1, last_dim); } else { - flattened_input_dims = phi::make_ddim({1, n_dim, n_dim}); + flattened_input_dims = common::make_ddim({1, n_dim, n_dim}); } DenseTensor flattened_input; @@ -211,7 +211,7 @@ void EigvalsKernel(const Context& ctx, const DenseTensor& x, DenseTensor* out) { int64_t n_dim = x_matrices[0].dims()[1]; int64_t n_batch = static_cast(x_matrices.size()); DDim out_dims = out->dims(); - out->Resize(make_ddim({n_batch, n_dim})); + out->Resize(common::make_ddim({n_batch, n_dim})); std::vector out_vectors = out->Split(1, 0); // query workspace size @@ -235,11 +235,11 @@ void EigvalsKernel(const Context& ctx, const DenseTensor& x, DenseTensor* out) { DenseTensor work, rwork; - work.Resize(make_ddim({lwork})); + work.Resize(common::make_ddim({lwork})); ctx.template Alloc(&work); if (IsComplexType(x.dtype())) { - rwork.Resize(make_ddim({n_dim << 1})); + rwork.Resize(common::make_ddim({n_dim << 1})); ctx.template Alloc>(&rwork); } diff --git a/paddle/phi/kernels/cpu/fill_diagonal_tensor_kernel.cc b/paddle/phi/kernels/cpu/fill_diagonal_tensor_kernel.cc index fa4d1ae7a710e5..c612b7a6a3f5d5 100644 --- a/paddle/phi/kernels/cpu/fill_diagonal_tensor_kernel.cc +++ b/paddle/phi/kernels/cpu/fill_diagonal_tensor_kernel.cc @@ -86,7 +86,7 @@ void FillDiagonalTensorKernel(const Context &ctx, phi::Copy(ctx, x, ctx.GetPlace(), false, out); auto out_dims = out->dims(); auto matdims = y.dims(); - auto fill_dims = phi::flatten_to_2d(matdims, matdims.size() - 1); + auto fill_dims = common::flatten_to_2d(matdims, matdims.size() - 1); std::array new_dims; std::array strides; diff --git a/paddle/phi/kernels/cpu/flip_kernel.cc b/paddle/phi/kernels/cpu/flip_kernel.cc index f83967073e293e..d53ffaa3df439b 100644 --- a/paddle/phi/kernels/cpu/flip_kernel.cc +++ b/paddle/phi/kernels/cpu/flip_kernel.cc @@ -38,7 +38,7 @@ void FlipKernel(const Context& dev_ctx, } dim_bitset[dim] = true; } - auto x_strides = phi::stride(x_dims); + auto x_strides = common::stride(x_dims); auto numel = x.numel(); const T* x_data = x.data(); T* out_data = dev_ctx.template Alloc(out); diff --git a/paddle/phi/kernels/cpu/full_kernel.cc b/paddle/phi/kernels/cpu/full_kernel.cc index e4ba06778817c0..b1a6ceda3647d5 100644 --- a/paddle/phi/kernels/cpu/full_kernel.cc +++ b/paddle/phi/kernels/cpu/full_kernel.cc @@ -35,7 +35,7 @@ void FullKernel(const Context& dev_ctx, const Scalar& val, DataType dtype UNUSED, DenseTensor* out) { - out->Resize(phi::make_ddim(shape.GetData())); + out->Resize(common::make_ddim(shape.GetData())); FullValue(dev_ctx, out, val.to()); } @@ -91,7 +91,7 @@ void FullIntArrayKernel(const Context& dev_ctx, const std::vector& shape, DataType dtype UNUSED, DenseTensor* out) { - out->Resize(phi::make_ddim({static_cast(shape.size())})); + out->Resize(common::make_ddim({static_cast(shape.size())})); T* out_data = dev_ctx.template Alloc(out); for (size_t i = 0; i < shape.size(); ++i) { int64_t val = shape[i]; diff --git a/paddle/phi/kernels/cpu/gaussian_kernel.cc b/paddle/phi/kernels/cpu/gaussian_kernel.cc index 00ed6aaf357409..8915f721a6911b 100644 --- a/paddle/phi/kernels/cpu/gaussian_kernel.cc +++ b/paddle/phi/kernels/cpu/gaussian_kernel.cc @@ -32,7 +32,7 @@ void GaussianKernel(const Context& dev_ctx, std::normal_distribution dist(mean, std); - tensor->Resize(phi::make_ddim(shape.GetData())); + tensor->Resize(common::make_ddim(shape.GetData())); int64_t size = tensor->numel(); T* data = dev_ctx.template Alloc(tensor); std::shared_ptr engine; diff --git a/paddle/phi/kernels/cpu/generate_proposals_kernel.cc b/paddle/phi/kernels/cpu/generate_proposals_kernel.cc index e9764035613ed3..3f3398ae59496c 100644 --- a/paddle/phi/kernels/cpu/generate_proposals_kernel.cc +++ b/paddle/phi/kernels/cpu/generate_proposals_kernel.cc @@ -73,7 +73,7 @@ void FilterBoxes(const phi::CPUContext& ctx, bool pixel_offset = true) { const T* im_info_data = im_info.data(); const T* boxes_data = boxes->data(); - keep->Resize(phi::make_ddim({boxes->dims()[0]})); + keep->Resize(common::make_ddim({boxes->dims()[0]})); min_size = std::max(min_size, 1.0f); int* keep_data = ctx.template Alloc(keep); T offset = pixel_offset ? static_cast(1.0) : 0; @@ -101,7 +101,7 @@ void FilterBoxes(const phi::CPUContext& ctx, } } } - keep->Resize(phi::make_ddim({keep_len})); + keep->Resize(common::make_ddim({keep_len})); } template @@ -189,7 +189,7 @@ std::pair ProposalForOneImage( // Sort index DenseTensor index_t; - index_t.Resize(phi::make_ddim({scores_slice.numel()})); + index_t.Resize(common::make_ddim({scores_slice.numel()})); int* index = ctx.template Alloc(&index_t); for (int i = 0; i < scores_slice.numel(); ++i) { index[i] = i; @@ -203,20 +203,20 @@ std::pair ProposalForOneImage( } else { std::nth_element( index, index + pre_nms_top_n, index + scores_slice.numel(), compare); - index_t.Resize(phi::make_ddim({pre_nms_top_n})); + index_t.Resize(common::make_ddim({pre_nms_top_n})); } DenseTensor scores_sel, bbox_sel, anchor_sel, var_sel; - scores_sel.Resize(phi::make_ddim({index_t.numel(), 1})); + scores_sel.Resize(common::make_ddim({index_t.numel(), 1})); ctx.template Alloc(&scores_sel); - bbox_sel.Resize(phi::make_ddim({index_t.numel(), 4})); + bbox_sel.Resize(common::make_ddim({index_t.numel(), 4})); ctx.template Alloc(&bbox_sel); - anchor_sel.Resize(phi::make_ddim({index_t.numel(), 4})); + anchor_sel.Resize(common::make_ddim({index_t.numel(), 4})); ctx.template Alloc(&anchor_sel); - var_sel.Resize(phi::make_ddim({index_t.numel(), 4})); + var_sel.Resize(common::make_ddim({index_t.numel(), 4})); ctx.template Alloc(&var_sel); phi::funcs::CPUGather(ctx, scores_slice, index_t, &scores_sel); @@ -225,7 +225,7 @@ std::pair ProposalForOneImage( phi::funcs::CPUGather(ctx, variances, index_t, &var_sel); DenseTensor proposals; - proposals.Resize(phi::make_ddim({index_t.numel(), 4})); + proposals.Resize(common::make_ddim({index_t.numel(), 4})); ctx.template Alloc(&proposals); BoxCoder(ctx, &anchor_sel, &bbox_sel, &var_sel, &proposals, pixel_offset); @@ -239,20 +239,20 @@ std::pair ProposalForOneImage( // Handle the case when there is no keep index left if (keep.numel() == 0) { phi::funcs::SetConstant set_zero; - bbox_sel.Resize(phi::make_ddim({1, 4})); + bbox_sel.Resize(common::make_ddim({1, 4})); ctx.template Alloc(&bbox_sel); set_zero(ctx, &bbox_sel, static_cast(0)); DenseTensor scores_filter; - scores_filter.Resize(phi::make_ddim({1, 1})); + scores_filter.Resize(common::make_ddim({1, 1})); ctx.template Alloc(&scores_filter); set_zero(ctx, &scores_filter, static_cast(0)); return std::make_pair(bbox_sel, scores_filter); } DenseTensor scores_filter; - bbox_sel.Resize(phi::make_ddim({keep.numel(), 4})); + bbox_sel.Resize(common::make_ddim({keep.numel(), 4})); ctx.template Alloc(&bbox_sel); - scores_filter.Resize(phi::make_ddim({keep.numel(), 1})); + scores_filter.Resize(common::make_ddim({keep.numel(), 1})); ctx.template Alloc(&scores_filter); phi::funcs::CPUGather(ctx, proposals, keep, &bbox_sel); phi::funcs::CPUGather(ctx, scores_sel, keep, &scores_filter); @@ -264,12 +264,12 @@ std::pair ProposalForOneImage( ctx, &bbox_sel, &scores_filter, nms_thresh, eta, pixel_offset); if (post_nms_top_n > 0 && post_nms_top_n < keep_nms.numel()) { - keep_nms.Resize(phi::make_ddim({post_nms_top_n})); + keep_nms.Resize(common::make_ddim({post_nms_top_n})); } - proposals.Resize(phi::make_ddim({keep_nms.numel(), 4})); + proposals.Resize(common::make_ddim({keep_nms.numel(), 4})); ctx.template Alloc(&proposals); - scores_sel.Resize(phi::make_ddim({keep_nms.numel(), 1})); + scores_sel.Resize(common::make_ddim({keep_nms.numel(), 1})); ctx.template Alloc(&scores_sel); phi::funcs::CPUGather(ctx, bbox_sel, keep_nms, &proposals); phi::funcs::CPUGather(ctx, scores_filter, keep_nms, &scores_sel); @@ -304,17 +304,17 @@ void GenerateProposalsKernel(const Context& ctx, int64_t h_bbox = bbox_dim[2]; int64_t w_bbox = bbox_dim[3]; - rpn_rois->Resize(phi::make_ddim({bbox_deltas.numel() / 4, 4})); + rpn_rois->Resize(common::make_ddim({bbox_deltas.numel() / 4, 4})); ctx.template Alloc(rpn_rois); - rpn_roi_probs->Resize(phi::make_ddim({scores.numel(), 1})); + rpn_roi_probs->Resize(common::make_ddim({scores.numel(), 1})); ctx.template Alloc(rpn_roi_probs); DenseTensor bbox_deltas_swap, scores_swap; - bbox_deltas_swap.Resize(phi::make_ddim({num, h_bbox, w_bbox, c_bbox})); + bbox_deltas_swap.Resize(common::make_ddim({num, h_bbox, w_bbox, c_bbox})); ctx.template Alloc(&bbox_deltas_swap); - scores_swap.Resize(phi::make_ddim({num, h_score, w_score, c_score})); + scores_swap.Resize(common::make_ddim({num, h_score, w_score, c_score})); ctx.template Alloc(&scores_swap); phi::funcs::Transpose trans; @@ -328,8 +328,8 @@ void GenerateProposalsKernel(const Context& ctx, lod0.push_back(0); DenseTensor tmp_anchors = anchors; DenseTensor tmp_variances = variances; - tmp_anchors.Resize(phi::make_ddim({tmp_anchors.numel() / 4, 4})); - tmp_variances.Resize(phi::make_ddim({tmp_variances.numel() / 4, 4})); + tmp_anchors.Resize(common::make_ddim({tmp_anchors.numel() / 4, 4})); + tmp_variances.Resize(common::make_ddim({tmp_variances.numel() / 4, 4})); std::vector tmp_num; int64_t num_proposals = 0; @@ -338,8 +338,9 @@ void GenerateProposalsKernel(const Context& ctx, DenseTensor bbox_deltas_slice = bbox_deltas_swap.Slice(i, i + 1); DenseTensor scores_slice = scores_swap.Slice(i, i + 1); - bbox_deltas_slice.Resize(phi::make_ddim({h_bbox * w_bbox * c_bbox / 4, 4})); - scores_slice.Resize(phi::make_ddim({h_score * w_score * c_score, 1})); + bbox_deltas_slice.Resize( + common::make_ddim({h_bbox * w_bbox * c_bbox / 4, 4})); + scores_slice.Resize(common::make_ddim({h_score * w_score * c_score, 1})); std::pair tensor_pair = ProposalForOneImage(ctx, @@ -364,16 +365,16 @@ void GenerateProposalsKernel(const Context& ctx, tmp_num.push_back(static_cast(proposals.dims()[0])); } if (rpn_rois_num != nullptr) { - rpn_rois_num->Resize(phi::make_ddim({num})); + rpn_rois_num->Resize(common::make_ddim({num})); ctx.template Alloc(rpn_rois_num); int* num_data = rpn_rois_num->data(); for (int i = 0; i < num; i++) { num_data[i] = tmp_num[i]; } - rpn_rois_num->Resize(phi::make_ddim({num})); + rpn_rois_num->Resize(common::make_ddim({num})); } - rpn_rois->Resize(phi::make_ddim({num_proposals, 4})); - rpn_roi_probs->Resize(phi::make_ddim({num_proposals, 1})); + rpn_rois->Resize(common::make_ddim({num_proposals, 4})); + rpn_roi_probs->Resize(common::make_ddim({num_proposals, 1})); } } // namespace phi diff --git a/paddle/phi/kernels/cpu/grid_sample_kernel.cc b/paddle/phi/kernels/cpu/grid_sample_kernel.cc index 172ca16d0deb87..1d8a8536e01dbc 100644 --- a/paddle/phi/kernels/cpu/grid_sample_kernel.cc +++ b/paddle/phi/kernels/cpu/grid_sample_kernel.cc @@ -320,7 +320,7 @@ void GridSampleKernel(const Context& dev_ctx, const int in_h = static_cast(x.dims()[2]); const int in_w = static_cast(x.dims()[3]); - out->Resize(phi::make_ddim({n, c, out_h, out_w})); + out->Resize(common::make_ddim({n, c, out_h, out_w})); dev_ctx.template Alloc(out); phi::funcs::SetConstant()(dev_ctx, out, static_cast(0)); @@ -353,7 +353,7 @@ void GridSampleKernel(const Context& dev_ctx, const int in_h = static_cast(x.dims()[3]); const int in_w = static_cast(x.dims()[4]); - out->Resize(phi::make_ddim({n, c, out_d, out_h, out_w})); + out->Resize(common::make_ddim({n, c, out_d, out_h, out_w})); dev_ctx.template Alloc(out); phi::funcs::SetConstant()(dev_ctx, out, static_cast(0)); diff --git a/paddle/phi/kernels/cpu/group_norm_grad_kernel.cc b/paddle/phi/kernels/cpu/group_norm_grad_kernel.cc index a4c123f2f94ff1..75d7a164a99240 100644 --- a/paddle/phi/kernels/cpu/group_norm_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/group_norm_grad_kernel.cc @@ -19,8 +19,8 @@ #include #include +#include "paddle/common/layout.h" #include "paddle/phi/backends/cpu/cpu_context.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/eigen/common.h" @@ -44,7 +44,7 @@ void GroupNormGradKernel(const Context& dev_ctx, DenseTensor* d_x, DenseTensor* d_scale, DenseTensor* d_bias) { - const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); + const DataLayout data_layout = common::StringToDataLayout(data_layout_str); const auto scale_ptr = scale.get_ptr(); const auto bias_ptr = bias.get_ptr(); const auto& x_dims = y.dims(); diff --git a/paddle/phi/kernels/cpu/group_norm_kernel.cc b/paddle/phi/kernels/cpu/group_norm_kernel.cc index 35975018dca1cc..499e47949c32e8 100644 --- a/paddle/phi/kernels/cpu/group_norm_kernel.cc +++ b/paddle/phi/kernels/cpu/group_norm_kernel.cc @@ -19,8 +19,8 @@ #include #include +#include "paddle/common/layout.h" #include "paddle/phi/backends/cpu/cpu_context.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/eigen/common.h" @@ -40,7 +40,7 @@ void GroupNormKernel(const Context& dev_ctx, DenseTensor* y, DenseTensor* mean, DenseTensor* var) { - const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); + const DataLayout data_layout = common::StringToDataLayout(data_layout_str); const auto scale_ptr = scale.get_ptr(); const auto bias_ptr = bias.get_ptr(); diff --git a/paddle/phi/kernels/cpu/gumbel_softmax_kernel.cc b/paddle/phi/kernels/cpu/gumbel_softmax_kernel.cc index 12d4a668be33f6..94ef3231c70101 100644 --- a/paddle/phi/kernels/cpu/gumbel_softmax_kernel.cc +++ b/paddle/phi/kernels/cpu/gumbel_softmax_kernel.cc @@ -34,7 +34,7 @@ struct GumbleNoiseGenerator { std::uniform_real_distribution dist(0.00001, 1); auto engine = ctx.GetGenerator()->GetCPUEngine(); DenseTensor random_tensor; - random_tensor.Resize(make_ddim({size})); + random_tensor.Resize(common::make_ddim({size})); auto* random_data = ctx.template Alloc(&random_tensor); for (int64_t i = 0; i < size; ++i) { random_data[i] = dist(*engine); diff --git a/paddle/phi/kernels/cpu/hsigmoid_loss_kernel.cc b/paddle/phi/kernels/cpu/hsigmoid_loss_kernel.cc index 3b43c2e8c2e5c7..cee94ff3fd734f 100644 --- a/paddle/phi/kernels/cpu/hsigmoid_loss_kernel.cc +++ b/paddle/phi/kernels/cpu/hsigmoid_loss_kernel.cc @@ -51,7 +51,7 @@ void HSigmoidLossKernel(const Context& ctx, : static_cast(phi::funcs::FindLastSet(num_classes_st - 1)); int64_t batch_size = x.dims()[0]; DenseTensor sum; - pre_out->Resize(phi::make_ddim({batch_size, code_length})); + pre_out->Resize(common::make_ddim({batch_size, code_length})); ctx.template Alloc(pre_out); auto* pre_out_data = pre_out->data(); auto pre_out_mat = EigenMatrix::From(*pre_out); @@ -72,7 +72,7 @@ void HSigmoidLossKernel(const Context& ctx, } std::vector sum_dims({batch_size, 1UL}); - sum.Resize(phi::make_ddim(sum_dims)); + sum.Resize(common::make_ddim(sum_dims)); ctx.template Alloc(&sum); auto sum_mat = EigenMatrix::From(sum); ctx.template Alloc(out); diff --git a/paddle/phi/kernels/cpu/index_add_impl.h b/paddle/phi/kernels/cpu/index_add_impl.h index 0a0671951b357a..d16cbc382215dc 100644 --- a/paddle/phi/kernels/cpu/index_add_impl.h +++ b/paddle/phi/kernels/cpu/index_add_impl.h @@ -77,8 +77,8 @@ void IndexAddInner(const Context& ctx, VLOG(3) << "Index_Add_Debug; outer_nums: " << outer_nums << "; slice_size: " << slice_size << "; index_size: " << index_size; - output->Resize(phi::make_ddim({outer_nums, input_dim[axis], slice_size})); - add_value->Resize(phi::make_ddim({outer_nums, index_size, slice_size})); + output->Resize(common::make_ddim({outer_nums, input_dim[axis], slice_size})); + add_value->Resize(common::make_ddim({outer_nums, index_size, slice_size})); VLOG(3) << "output.dims: " << output->dims() << ", add_value.dims: " << add_value->dims(); diff --git a/paddle/phi/kernels/cpu/index_put_grad_kernel.cc b/paddle/phi/kernels/cpu/index_put_grad_kernel.cc index 7385a928c17916..8a100af33f0184 100644 --- a/paddle/phi/kernels/cpu/index_put_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/index_put_grad_kernel.cc @@ -91,7 +91,7 @@ void LaunchIndexPutGradKernel(const Context& dev_ctx, auto x_grad_dims = x_grad->dims(); const int64_t numel = indices[0]->numel(); - auto x_grad_stride = phi::stride(x_grad_dims); + auto x_grad_stride = common::stride(x_grad_dims); set_zero_kernel( numel, pd_indices.data(), x_grad_stride, x_grad_dims, x_grad_data); @@ -100,7 +100,7 @@ void LaunchIndexPutGradKernel(const Context& dev_ctx, auto out_grad_dims = out_grad.dims(); const int64_t numel = indices[0]->numel(); - auto out_grad_stride = phi::stride(out_grad_dims); + auto out_grad_stride = common::stride(out_grad_dims); if (value_grad) { if (value_grad->numel() == 1) { @@ -150,8 +150,9 @@ void LaunchIndexPutGradKernel(const Context& dev_ctx, out_grad_dims, tmp_value_grad_data); - std::vector after_dims = phi::vectorize(tmp_value_grad.dims()); - std::vector before_dims = phi::vectorize(value_grad->dims()); + std::vector after_dims = + common::vectorize(tmp_value_grad.dims()); + std::vector before_dims = common::vectorize(value_grad->dims()); std::vector compress_dims; std::vector dims_without_1; @@ -159,7 +160,7 @@ void LaunchIndexPutGradKernel(const Context& dev_ctx, &after_dims, &before_dims, &compress_dims, &dims_without_1); auto pre_dims = value_grad->dims(); - value_grad->Resize(phi::make_ddim(dims_without_1)); + value_grad->Resize(common::make_ddim(dims_without_1)); IntArray v_axis(compress_dims); SumKernel(dev_ctx, tmp_value_grad, @@ -196,7 +197,7 @@ void IndexPutGradKernel(const Context& dev_ctx, } if (value_grad) { FullKernel(dev_ctx, - phi::vectorize(value_grad->dims()), + common::vectorize(value_grad->dims()), 0.0f, value_grad->dtype(), value_grad); @@ -205,7 +206,7 @@ void IndexPutGradKernel(const Context& dev_ctx, } auto bd_dim = funcs::BroadCastTensorsDims(int_indices_v); - std::vector res_dim_v(phi::vectorize(bd_dim)); + std::vector res_dim_v(common::vectorize(bd_dim)); std::vector res_indices_v(x.dims().size(), nullptr); std::vector tmp_res_indices_v; std::vector range_tensor_v; diff --git a/paddle/phi/kernels/cpu/index_put_kernel.cc b/paddle/phi/kernels/cpu/index_put_kernel.cc index f587978c2c2adf..4820dbc3087b9c 100644 --- a/paddle/phi/kernels/cpu/index_put_kernel.cc +++ b/paddle/phi/kernels/cpu/index_put_kernel.cc @@ -72,7 +72,7 @@ void LaunchIndexPutKernel(const Context& dev_ctx, auto x_dims = x.dims(); const int64_t numel = indices[0]->numel(); - auto x_stride = phi::stride(x_dims); + auto x_stride = common::stride(x_dims); int64_t is_single_val_tensor = (value.numel() == 1) ? 0 : INT64_MAX; @@ -127,7 +127,7 @@ void IndexPutKernel(const Context& dev_ctx, auto bd_dim = funcs::BroadCastTensorsDims(int_indices_v); - std::vector res_dim_v(phi::vectorize(bd_dim)); + std::vector res_dim_v(common::vectorize(bd_dim)); std::vector res_indices_v(x.dims().size(), nullptr); std::vector tmp_res_indices_v; std::vector tmp_value_v; @@ -150,7 +150,7 @@ void IndexPutKernel(const Context& dev_ctx, &res_dim_v); if (value.numel() != 1) { tmp_value_v.emplace_back( - DenseTensor(value.dtype()).Resize(phi::make_ddim(res_dim_v))); + DenseTensor(value.dtype()).Resize(common::make_ddim(res_dim_v))); ExpandKernel( dev_ctx, value, IntArray(res_dim_v), &tmp_value_v[0]); ptr_value = &tmp_value_v[0]; diff --git a/paddle/phi/kernels/cpu/index_sample_kernel.cc b/paddle/phi/kernels/cpu/index_sample_kernel.cc index 02f3afcb67b6ef..f2aa55b9d326f2 100644 --- a/paddle/phi/kernels/cpu/index_sample_kernel.cc +++ b/paddle/phi/kernels/cpu/index_sample_kernel.cc @@ -76,7 +76,7 @@ void IndexSampleInner(const Context &context, res[i] = v; } - auto ddim = phi::make_ddim({batch_size, index_length}); + auto ddim = common::make_ddim({batch_size, index_length}); context.template Alloc(output); phi::TensorFromVector(res, context, output); output->Resize(ddim); diff --git a/paddle/phi/kernels/cpu/index_select_impl.h b/paddle/phi/kernels/cpu/index_select_impl.h index 7c20ab8ea5dfd2..522719e56fcfdc 100644 --- a/paddle/phi/kernels/cpu/index_select_impl.h +++ b/paddle/phi/kernels/cpu/index_select_impl.h @@ -106,8 +106,8 @@ void IndexSelectInner(const Context& ctx, VLOG(3) << "Index_Select_Debug; outer_nums: " << outer_nums << "; slice_size: " << slice_size << "; index_size: " << index_size; - input->Resize(phi::make_ddim({outer_nums, input_dim[dim], slice_size})); - output->Resize(phi::make_ddim({outer_nums, index_size, slice_size})); + input->Resize(common::make_ddim({outer_nums, input_dim[dim], slice_size})); + output->Resize(common::make_ddim({outer_nums, index_size, slice_size})); auto input_tensor = EigenTensor::From(*input); auto output_tensor = EigenTensor::From(*output); diff --git a/paddle/phi/kernels/cpu/instance_norm_grad_kernel.cc b/paddle/phi/kernels/cpu/instance_norm_grad_kernel.cc index d798c6b81c9666..b53482c9d8d3fd 100644 --- a/paddle/phi/kernels/cpu/instance_norm_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/instance_norm_grad_kernel.cc @@ -18,8 +18,8 @@ #include #include +#include "paddle/common/layout.h" #include "paddle/phi/backends/cpu/cpu_context.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/eigen/extensions.h" diff --git a/paddle/phi/kernels/cpu/instance_norm_kernel.cc b/paddle/phi/kernels/cpu/instance_norm_kernel.cc index 1242babaf0c835..56af2dc5f23403 100644 --- a/paddle/phi/kernels/cpu/instance_norm_kernel.cc +++ b/paddle/phi/kernels/cpu/instance_norm_kernel.cc @@ -18,8 +18,8 @@ #include #include +#include "paddle/common/layout.h" #include "paddle/phi/backends/cpu/cpu_context.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/full_kernel.h" #include "paddle/phi/kernels/funcs/eigen/common.h" diff --git a/paddle/phi/kernels/cpu/interpolate_grad_kernel.cc b/paddle/phi/kernels/cpu/interpolate_grad_kernel.cc index e32738b4588c83..79aac41a34903f 100644 --- a/paddle/phi/kernels/cpu/interpolate_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/interpolate_grad_kernel.cc @@ -15,9 +15,9 @@ #include "paddle/phi/kernels/interpolate_grad_kernel.h" #include +#include "paddle/common/layout.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/common/amp_type_traits.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/interpolate_function.h" #include "paddle/phi/kernels/funcs/math_function.h" @@ -406,7 +406,7 @@ static void Interpolate1DCPUBwd( bool align_corners, int align_mode, DenseTensor* input_grad) { - const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); + const DataLayout data_layout = common::StringToDataLayout(data_layout_str); int n = 0, c = 0, in_d = 0, in_h = 0, in_w = 0; funcs::ExtractNCDWH(input.dims(), data_layout, &n, &c, &in_d, &in_h, &in_w); @@ -507,7 +507,7 @@ static void Interpolate2DCPUBwd( bool align_corners, int align_mode, DenseTensor* input_grad) { - const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); + const DataLayout data_layout = common::StringToDataLayout(data_layout_str); int n = 0, c = 0, in_d = 0, in_h = 0, in_w = 0; funcs::ExtractNCDWH(input.dims(), data_layout, &n, &c, &in_d, &in_h, &in_w); @@ -673,7 +673,7 @@ static void Interpolate3DCPUBwd( bool align_corners, int align_mode, DenseTensor* input_grad) { - const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); + const DataLayout data_layout = common::StringToDataLayout(data_layout_str); int n = 0, c = 0, in_d = 0, in_h = 0, in_w = 0; funcs::ExtractNCDWH(input.dims(), data_layout, &n, &c, &in_d, &in_h, &in_w); diff --git a/paddle/phi/kernels/cpu/interpolate_kernel.cc b/paddle/phi/kernels/cpu/interpolate_kernel.cc index 7c957657ceb39e..495ecc6b3cdfd4 100644 --- a/paddle/phi/kernels/cpu/interpolate_kernel.cc +++ b/paddle/phi/kernels/cpu/interpolate_kernel.cc @@ -15,9 +15,9 @@ #include "paddle/phi/kernels/interpolate_kernel.h" #include +#include "paddle/common/layout.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/common/amp_type_traits.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/interpolate_function.h" @@ -560,7 +560,7 @@ static void Interpolate1DCPUFwd( bool align_corners, int align_mode, DenseTensor* output) { - const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); + const DataLayout data_layout = common::StringToDataLayout(data_layout_str); int n = 0, c = 0, in_d = 0, in_h = 0, in_w = 0; funcs::ExtractNCDWH(x.dims(), data_layout, &n, &c, &in_d, &in_h, &in_w); @@ -661,7 +661,7 @@ static void Interpolate2DCPUFwd( bool align_corners, int align_mode, DenseTensor* output) { - const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); + const DataLayout data_layout = common::StringToDataLayout(data_layout_str); int n = 0, c = 0, in_d = 0, in_h = 0, in_w = 0; funcs::ExtractNCDWH(x.dims(), data_layout, &n, &c, &in_d, &in_h, &in_w); @@ -832,7 +832,7 @@ static void Interpolate3DCPUFwd( bool align_corners, int align_mode, DenseTensor* output) { - const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); + const DataLayout data_layout = common::StringToDataLayout(data_layout_str); int n = 0, c = 0, in_d = 0, in_h = 0, in_w = 0; funcs::ExtractNCDWH(x.dims(), data_layout, &n, &c, &in_d, &in_h, &in_w); diff --git a/paddle/phi/kernels/cpu/kthvalue_grad_kernel.cc b/paddle/phi/kernels/cpu/kthvalue_grad_kernel.cc index 5239512537b428..12c6d9fb28e06e 100644 --- a/paddle/phi/kernels/cpu/kthvalue_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/kthvalue_grad_kernel.cc @@ -73,12 +73,12 @@ void KthvalueGradKernel(const Context& dev_ctx, for (int i = axis + 1; i < in_dims.size(); i++) { tmp_out_shape.emplace_back(out_dims[i - 1]); } - out_dims = phi::make_ddim(tmp_out_shape); + out_dims = common::make_ddim(tmp_out_shape); } if (axis == in_dims.size() - 1) { const int64_t input_height = - phi::product(phi::slice_ddim(in_dims, 0, in_dims.size() - 1)); + common::product(common::slice_ddim(in_dims, 0, in_dims.size() - 1)); const int64_t input_width = in_dims[in_dims.size() - 1]; memset(x_grad_data, 0, d_x->numel() * sizeof(T)); if (keepdim) { @@ -147,8 +147,8 @@ void KthvalueGradKernel(const Context& dev_ctx, funcs::TransCompute( ndims, dev_ctx, indices_tmp, &trans_ind, trans); } - const int64_t input_height = phi::product( - phi::slice_ddim(trans_in_dims, 0, trans_in_dims.size() - 1)); + const int64_t input_height = common::product( + common::slice_ddim(trans_in_dims, 0, trans_in_dims.size() - 1)); const int64_t input_width = trans_in_dims[trans_in_dims.size() - 1]; DenseTensor tmp_out; tmp_out.Resize(trans_in_dims); diff --git a/paddle/phi/kernels/cpu/kthvalue_kernel.cc b/paddle/phi/kernels/cpu/kthvalue_kernel.cc index eb8cc8f813ff9e..6e719f52b6e051 100644 --- a/paddle/phi/kernels/cpu/kthvalue_kernel.cc +++ b/paddle/phi/kernels/cpu/kthvalue_kernel.cc @@ -101,7 +101,7 @@ void KthvalueKernel(const Context& dev_ctx, auto out_dims = output->dims(); if (axis == in_dims.size() - 1) { const int64_t& input_height = - phi::product(phi::slice_ddim(in_dims, 0, in_dims.size() - 1)); + common::product(common::slice_ddim(in_dims, 0, in_dims.size() - 1)); const int64_t& input_width = in_dims[in_dims.size() - 1]; getKthvalue(input_height, input_width, @@ -129,7 +129,7 @@ void KthvalueKernel(const Context& dev_ctx, for (int i = axis + 1; i < in_dims.size(); i++) { tmp_out_shape.emplace_back(in_dims[i]); } - DDim tmp_out_dims = phi::make_ddim(tmp_out_shape); + DDim tmp_out_dims = common::make_ddim(tmp_out_shape); output->Resize(tmp_out_dims); indices->Resize(tmp_out_dims); } @@ -148,8 +148,8 @@ void KthvalueKernel(const Context& dev_ctx, funcs::TransCompute( ndims, dev_ctx, x, &trans_inp, trans); - const int64_t input_height = - phi::product(phi::slice_ddim(trans_dims, 0, trans_dims.size() - 1)); + const int64_t input_height = common::product( + common::slice_ddim(trans_dims, 0, trans_dims.size() - 1)); const int64_t input_width = trans_dims[trans_dims.size() - 1]; DenseTensor tmp_out, tmp_indices; tmp_out.Resize(trans_out_dims); diff --git a/paddle/phi/kernels/cpu/layer_norm_grad_kernel.cc b/paddle/phi/kernels/cpu/layer_norm_grad_kernel.cc index ddc63598756710..341d8ef98fd947 100644 --- a/paddle/phi/kernels/cpu/layer_norm_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/layer_norm_grad_kernel.cc @@ -48,7 +48,7 @@ void LayerNormGradKernel(const Context& dev_ctx, auto* d_bias = bias_grad; const auto& x_dims = x.dims(); - auto matrix_dim = phi::flatten_to_2d(x_dims, begin_norm_axis); + auto matrix_dim = common::flatten_to_2d(x_dims, begin_norm_axis); int left = static_cast(matrix_dim[0]); int right = static_cast(matrix_dim[1]); DDim matrix_shape({left, right}); diff --git a/paddle/phi/kernels/cpu/layer_norm_kernel.cc b/paddle/phi/kernels/cpu/layer_norm_kernel.cc index b15b1554a51c43..8713d2f49e60e9 100644 --- a/paddle/phi/kernels/cpu/layer_norm_kernel.cc +++ b/paddle/phi/kernels/cpu/layer_norm_kernel.cc @@ -46,7 +46,7 @@ void LayerNormKernel(const Context& dev_ctx, dev_ctx.template Alloc(mean); dev_ctx.template Alloc(var); - auto matrix_dim = phi::flatten_to_2d(x_dims, begin_norm_axis); + auto matrix_dim = common::flatten_to_2d(x_dims, begin_norm_axis); int left = static_cast(matrix_dim[0]); int right = static_cast(matrix_dim[1]); DDim matrix_shape({left, right}); diff --git a/paddle/phi/kernels/cpu/limit_by_capacity_kernel.cc b/paddle/phi/kernels/cpu/limit_by_capacity_kernel.cc index ea2f6cbc6ee82c..1057120b2ae5e1 100644 --- a/paddle/phi/kernels/cpu/limit_by_capacity_kernel.cc +++ b/paddle/phi/kernels/cpu/limit_by_capacity_kernel.cc @@ -13,7 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/limit_by_capacity_kernel.h" -#include "paddle/phi/core/errors.h" +#include "paddle/common/errors.h" #include "paddle/phi/core/kernel_registry.h" #if defined(PADDLE_WITH_GLOO) diff --git a/paddle/phi/kernels/cpu/linspace_kernel.cc b/paddle/phi/kernels/cpu/linspace_kernel.cc index 4b8b7f7a2e05c7..70cd1e17ca9ce5 100644 --- a/paddle/phi/kernels/cpu/linspace_kernel.cc +++ b/paddle/phi/kernels/cpu/linspace_kernel.cc @@ -40,7 +40,7 @@ void LinspaceKernel(const Context& ctx, "than 0, but received num is %d", num)); - out->Resize(phi::make_ddim({num})); + out->Resize(common::make_ddim({num})); T* out_data = ctx.template Alloc(out); if (num > 1) { diff --git a/paddle/phi/kernels/cpu/logspace_kernel.cc b/paddle/phi/kernels/cpu/logspace_kernel.cc index fbb31057a35ae9..f6a31fed1e13ff 100644 --- a/paddle/phi/kernels/cpu/logspace_kernel.cc +++ b/paddle/phi/kernels/cpu/logspace_kernel.cc @@ -45,7 +45,7 @@ void LogspaceKernel(const Context& ctx, "than 0, but received num is %d", num)); - out->Resize(phi::make_ddim({num})); + out->Resize(common::make_ddim({num})); T* out_data = ctx.template Alloc(out); if (num > 1) { diff --git a/paddle/phi/kernels/cpu/lstsq_kernel.cc b/paddle/phi/kernels/cpu/lstsq_kernel.cc index 3e4782c1a9f0a9..2b81649caf904f 100644 --- a/paddle/phi/kernels/cpu/lstsq_kernel.cc +++ b/paddle/phi/kernels/cpu/lstsq_kernel.cc @@ -70,11 +70,11 @@ void LstsqKernel(const Context& dev_ctx, int ldb = std::max(1, std::max(m, n)); DenseTensor* new_x = new DenseTensor(); - new_x->Resize(phi::make_ddim({batch_count, m, n})); + new_x->Resize(common::make_ddim({batch_count, m, n})); dev_ctx.template Alloc(new_x); phi::Copy(dev_ctx, x, dev_ctx.GetPlace(), true, new_x); - solution->Resize(phi::make_ddim({batch_count, std::max(m, n), nrhs})); + solution->Resize(common::make_ddim({batch_count, std::max(m, n), nrhs})); dev_ctx.template Alloc(solution); if (m >= n) { @@ -122,7 +122,7 @@ void LstsqKernel(const Context& dev_ctx, DenseTensor* jpvt = new DenseTensor(); int* jpvt_data = nullptr; if (driver == LapackDriverType::Gelsy) { - jpvt->Resize(phi::make_ddim({std::max(1, n)})); + jpvt->Resize(common::make_ddim({std::max(1, n)})); jpvt_data = dev_ctx.template Alloc(jpvt); } @@ -185,7 +185,7 @@ void LstsqKernel(const Context& dev_ctx, lwork = std::max(1, static_cast(phi::dtype::Real(wkopt))); DenseTensor* work = new DenseTensor(); - work->Resize(phi::make_ddim({lwork})); + work->Resize(common::make_ddim({lwork})); T* work_data = dev_ctx.template Alloc(work); // "rwork" only used for complex inputs and "gelsy/gelsd/gelss" drivers @@ -200,7 +200,7 @@ void LstsqKernel(const Context& dev_ctx, } else if (driver == LapackDriverType::Gelsd) { rwork_len = std::max(1, rwkopt); } - rwork->Resize(phi::make_ddim({rwork_len})); + rwork->Resize(common::make_ddim({rwork_len})); rwork_data = dev_ctx.template Alloc(rwork); } @@ -208,7 +208,7 @@ void LstsqKernel(const Context& dev_ctx, DenseTensor* iwork = new DenseTensor(); int* iwork_data = nullptr; if (driver == LapackDriverType::Gelsd) { - iwork->Resize(phi::make_ddim({std::max(1, iwkopt)})); + iwork->Resize(common::make_ddim({std::max(1, iwkopt)})); iwork_data = dev_ctx.template Alloc(iwork); } @@ -293,7 +293,7 @@ void LstsqKernel(const Context& dev_ctx, if (batch_count > 1) { solution->Resize(solution_dim); } else { - solution->Resize(phi::make_ddim({n, nrhs})); + solution->Resize(common::make_ddim({n, nrhs})); } GetResidualsTensor(dev_ctx, x, y, solution, residuals); diff --git a/paddle/phi/kernels/cpu/lu_kernel.cc b/paddle/phi/kernels/cpu/lu_kernel.cc index 731a722372d656..5790b24025b2e0 100644 --- a/paddle/phi/kernels/cpu/lu_kernel.cc +++ b/paddle/phi/kernels/cpu/lu_kernel.cc @@ -43,15 +43,15 @@ void LUKernel(const Context& dev_ctx, int n = static_cast(outdims[outrank - 2]); int lda = std::max(1, m); - auto ipiv_dims = phi::slice_ddim(outdims, 0, outrank - 1); + auto ipiv_dims = common::slice_ddim(outdims, 0, outrank - 1); ipiv_dims[outrank - 2] = std::min(m, n); pivots->Resize(ipiv_dims); dev_ctx.template Alloc(pivots); auto ipiv_data = pivots->data(); - auto info_dims = phi::slice_ddim(outdims, 0, outrank - 2); + auto info_dims = common::slice_ddim(outdims, 0, outrank - 2); if (info_dims.size() == 0) { - info_dims = phi::make_ddim({1}); + info_dims = common::make_ddim({1}); } infos->Resize(info_dims); dev_ctx.template Alloc(infos); diff --git a/paddle/phi/kernels/cpu/masked_select_grad_kernel.cc b/paddle/phi/kernels/cpu/masked_select_grad_kernel.cc index 58f4f7361eb64d..fa120de4b79521 100644 --- a/paddle/phi/kernels/cpu/masked_select_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/masked_select_grad_kernel.cc @@ -40,8 +40,8 @@ void MaskedSelectGradKernel(const Context& dev_ctx, bool expand_x = false; auto expanded_size = funcs::MatrixGetBroadcastBatchPortion( - vectorize(x_grad->dims()), vectorize(mask.dims())); - auto expaned_dims = make_ddim(expanded_size); + common::vectorize(x_grad->dims()), common::vectorize(mask.dims())); + auto expaned_dims = common::make_ddim(expanded_size); if (mask.dims() != expaned_dims) { ExpandKernel( diff --git a/paddle/phi/kernels/cpu/masked_select_kernel.cc b/paddle/phi/kernels/cpu/masked_select_kernel.cc index 837a8921e8148a..8e9e3bbebecd4d 100644 --- a/paddle/phi/kernels/cpu/masked_select_kernel.cc +++ b/paddle/phi/kernels/cpu/masked_select_kernel.cc @@ -29,9 +29,9 @@ void MaskedSelectKernel(const Context& dev_ctx, DenseTensor x_expand; auto expanded_size = funcs::MatrixGetBroadcastBatchPortion( - vectorize(x.dims()), vectorize(mask.dims())); + common::vectorize(x.dims()), common::vectorize(mask.dims())); - DDim epxand_dims = make_ddim(expanded_size); + DDim epxand_dims = common::make_ddim(expanded_size); if (mask.dims() != epxand_dims) { ExpandKernel( dev_ctx, mask, IntArray(expanded_size), &mask_expand); diff --git a/paddle/phi/kernels/cpu/matrix_nms_kernel.cc b/paddle/phi/kernels/cpu/matrix_nms_kernel.cc index b2827d039bacce..fb993029bb1916 100644 --- a/paddle/phi/kernels/cpu/matrix_nms_kernel.cc +++ b/paddle/phi/kernels/cpu/matrix_nms_kernel.cc @@ -14,8 +14,8 @@ #include "paddle/phi/kernels/matrix_nms_kernel.h" +#include "paddle/common/ddim.h" #include "paddle/phi/backends/cpu/cpu_context.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/kernel_registry.h" @@ -257,7 +257,7 @@ void MatrixNMSKernel(const Context& ctx, DenseTensor* out, DenseTensor* index, DenseTensor* roisnum) { - auto score_dims = phi::vectorize(scores.dims()); + auto score_dims = common::vectorize(scores.dims()); auto batch_size = score_dims[0]; auto num_boxes = score_dims[2]; auto box_dim = bboxes.dims()[2]; @@ -297,21 +297,21 @@ void MatrixNMSKernel(const Context& ctx, int64_t num_kept = static_cast(offsets.back()); if (num_kept == 0) { - out->Resize(phi::make_ddim({0, out_dim})); + out->Resize(common::make_ddim({0, out_dim})); ctx.template Alloc(out); - index->Resize(phi::make_ddim({0, 1})); + index->Resize(common::make_ddim({0, 1})); ctx.template Alloc(index); } else { - out->Resize(phi::make_ddim({num_kept, out_dim})); + out->Resize(common::make_ddim({num_kept, out_dim})); ctx.template Alloc(out); - index->Resize(phi::make_ddim({num_kept, 1})); + index->Resize(common::make_ddim({num_kept, 1})); ctx.template Alloc(index); std::copy(detections.begin(), detections.end(), out->data()); std::copy(indices.begin(), indices.end(), index->data()); } if (roisnum != nullptr) { - roisnum->Resize(phi::make_ddim({batch_size})); + roisnum->Resize(common::make_ddim({batch_size})); ctx.template Alloc(roisnum); std::copy(num_per_batch.begin(), num_per_batch.end(), roisnum->data()); } diff --git a/paddle/phi/kernels/cpu/mode_grad_kernel.cc b/paddle/phi/kernels/cpu/mode_grad_kernel.cc index c4268080d50c07..a0c592c4bab9a0 100644 --- a/paddle/phi/kernels/cpu/mode_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/mode_grad_kernel.cc @@ -53,14 +53,14 @@ void ModeGradKernel(const Context& dev_ctx, for (int i = axis + 1; i < in_dims.size(); i++) { tmp_out_shape.emplace_back(out_dims[i - 1]); } - out_dims = phi::make_ddim(tmp_out_shape); + out_dims = common::make_ddim(tmp_out_shape); } if (axis == in_dims.size() - 1) { // allocate the memory for the input_grad // assign the out_grad to input_grad directly const int64_t input_height = - phi::product(phi::slice_ddim(in_dims, 0, in_dims.size() - 1)); + common::product(common::slice_ddim(in_dims, 0, in_dims.size() - 1)); const int64_t input_width = in_dims[in_dims.size() - 1]; // init the output grad with 0, because some input elements has no grad @@ -143,8 +143,8 @@ void ModeGradKernel(const Context& dev_ctx, funcs::TransCompute( ndims, dev_ctx, indices_tmp, &trans_ind, trans_axis); } - const int64_t input_height = phi::product( - phi::slice_ddim(trans_in_shape, 0, trans_in_shape.size() - 1)); + const int64_t input_height = common::product( + common::slice_ddim(trans_in_shape, 0, trans_in_shape.size() - 1)); const int64_t input_width = trans_in_shape[trans_in_shape.size() - 1]; // Assign the out_grad to tranpose input_grad diff --git a/paddle/phi/kernels/cpu/mode_kernel.cc b/paddle/phi/kernels/cpu/mode_kernel.cc index 3459fd8372d9de..f5a0a1ee05e914 100644 --- a/paddle/phi/kernels/cpu/mode_kernel.cc +++ b/paddle/phi/kernels/cpu/mode_kernel.cc @@ -52,7 +52,7 @@ void ModeKernel(const Context& dev_ctx, // calculation, then tranpose it back to original axis. if (axis == in_dims.size() - 1) { const int64_t& input_height = - phi::product(phi::slice_ddim(in_dims, 0, in_dims.size() - 1)); + common::product(common::slice_ddim(in_dims, 0, in_dims.size() - 1)); const int64_t& input_width = in_dims[in_dims.size() - 1]; funcs::GetMode(input_height, input_width, @@ -80,7 +80,7 @@ void ModeKernel(const Context& dev_ctx, for (int i = axis + 1; i < in_dims.size(); i++) { tmp_out_shape.emplace_back(in_dims[i]); } - DDim tmp_out_dim = phi::make_ddim(tmp_out_shape); + DDim tmp_out_dim = common::make_ddim(tmp_out_shape); out->Resize(tmp_out_dim); indices->Resize(tmp_out_dim); } @@ -104,8 +104,8 @@ void ModeKernel(const Context& dev_ctx, funcs::TransCompute( ndims, dev_ctx, x, &trans_input, trans_axis); - const int64_t input_height = - phi::product(phi::slice_ddim(trans_shape, 0, trans_shape.size() - 1)); + const int64_t input_height = common::product( + common::slice_ddim(trans_shape, 0, trans_shape.size() - 1)); const int64_t input_width = trans_shape[trans_shape.size() - 1]; DenseTensor tmp_out; tmp_out.Resize(trans_out_shape); diff --git a/paddle/phi/kernels/cpu/multiclass_nms3_kernel.cc b/paddle/phi/kernels/cpu/multiclass_nms3_kernel.cc index aa04288124a9b7..7bfc41e91d6cc5 100644 --- a/paddle/phi/kernels/cpu/multiclass_nms3_kernel.cc +++ b/paddle/phi/kernels/cpu/multiclass_nms3_kernel.cc @@ -494,7 +494,7 @@ void MultiClassNMSKernel(const Context& ctx, DenseTensor* nms_rois_num) { bool return_index = index != nullptr; bool has_roisnum = rois_num.get_ptr() != nullptr; - auto score_dims = phi::vectorize(scores.dims()); + auto score_dims = common::vectorize(scores.dims()); auto score_size = score_dims.size(); std::vector>> all_indices; diff --git a/paddle/phi/kernels/cpu/nms_kernel.cc b/paddle/phi/kernels/cpu/nms_kernel.cc index 5534fd71a18274..e733af0128d15b 100644 --- a/paddle/phi/kernels/cpu/nms_kernel.cc +++ b/paddle/phi/kernels/cpu/nms_kernel.cc @@ -84,7 +84,7 @@ void NMSKernel(const Context& dev_ctx, int64_t num_boxes = boxes.dims()[0]; DenseTensor output_tmp; - output_tmp.Resize(phi::make_ddim({num_boxes})); + output_tmp.Resize(common::make_ddim({num_boxes})); auto output_tmp_data = dev_ctx.template Alloc(&output_tmp); int64_t num_keep_boxes = diff --git a/paddle/phi/kernels/cpu/nonzero_kernel.cc b/paddle/phi/kernels/cpu/nonzero_kernel.cc index 653a03e7f99071..038244accfdef6 100644 --- a/paddle/phi/kernels/cpu/nonzero_kernel.cc +++ b/paddle/phi/kernels/cpu/nonzero_kernel.cc @@ -62,7 +62,7 @@ void NonZeroKernel(const Context& dev_ctx, } } auto true_num = true_index.size(); - out->Resize(phi::make_ddim({static_cast(true_num), rank})); + out->Resize(common::make_ddim({static_cast(true_num), rank})); auto* out_ptr = dev_ctx.template Alloc(out); if (true_num == 0) { diff --git a/paddle/phi/kernels/cpu/overlap_add_grad_kernel.cc b/paddle/phi/kernels/cpu/overlap_add_grad_kernel.cc index 93fe7a6cb6f095..fe0ab8c309fc4a 100644 --- a/paddle/phi/kernels/cpu/overlap_add_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/overlap_add_grad_kernel.cc @@ -53,17 +53,17 @@ void OverlapAddGradKernel(const Context& dev_ctx, phi::DDim x_grad_resized_dims; phi::DDim out_grad_resized_dims; if (axis == 0) { - preserved_dims = - phi::slice_ddim(out_grad_.dims(), 1, static_cast(out_grad_rank)); + preserved_dims = common::slice_ddim( + out_grad_.dims(), 1, static_cast(out_grad_rank)); x_grad_resized_dims = { - n_frames, frame_length, phi::product(preserved_dims)}; - out_grad_resized_dims = {seq_length, phi::product(preserved_dims)}; + n_frames, frame_length, common::product(preserved_dims)}; + out_grad_resized_dims = {seq_length, common::product(preserved_dims)}; } else { - preserved_dims = phi::slice_ddim( + preserved_dims = common::slice_ddim( out_grad_.dims(), 0, static_cast(out_grad_rank) - 1); x_grad_resized_dims = { - phi::product(preserved_dims), frame_length, n_frames}; - out_grad_resized_dims = {phi::product(preserved_dims), seq_length}; + common::product(preserved_dims), frame_length, n_frames}; + out_grad_resized_dims = {common::product(preserved_dims), seq_length}; } x_grad->Resize(x_grad_resized_dims); out_grad_.Resize(out_grad_resized_dims); @@ -78,31 +78,31 @@ void OverlapAddGradKernel(const Context& dev_ctx, trans_out_grad = out_grad_; std::vector perm_x_grad{1, 0}; - auto x_grad_dims_vec = phi::vectorize(x_grad->dims()); + auto x_grad_dims_vec = common::vectorize(x_grad->dims()); for (int i = 0; i < x_grad->dims().size(); ++i) { x_grad_dims_vec[i] = x_grad->dims()[perm_x_grad[i]]; } - trans_x_grad.Resize(phi::make_ddim(x_grad_dims_vec)); + trans_x_grad.Resize(common::make_ddim(x_grad_dims_vec)); dev_ctx.template Alloc(&trans_x_grad); phi::funcs::TransCompute( perm_x_grad.size(), dev_ctx, *x_grad, &trans_x_grad, perm_x_grad); } else { std::vector perm_d_out{1, 0}; - auto out_grad_dims_vec = phi::vectorize(out_grad_.dims()); + auto out_grad_dims_vec = common::vectorize(out_grad_.dims()); for (int i = 0; i < out_grad_.dims().size(); ++i) { out_grad_dims_vec[i] = out_grad_.dims()[perm_d_out[i]]; } - trans_out_grad.Resize(phi::make_ddim(out_grad_dims_vec)); + trans_out_grad.Resize(common::make_ddim(out_grad_dims_vec)); dev_ctx.template Alloc(&trans_out_grad); phi::funcs::TransCompute( perm_d_out.size(), dev_ctx, out_grad_, &trans_out_grad, perm_d_out); std::vector perm_x_grad{2, 1, 0}; - auto x_grad_dims_vec = phi::vectorize(x_grad->dims()); + auto x_grad_dims_vec = common::vectorize(x_grad->dims()); for (int i = 0; i < x_grad->dims().size(); ++i) { x_grad_dims_vec[i] = x_grad->dims()[perm_x_grad[i]]; } - trans_x_grad.Resize(phi::make_ddim(x_grad_dims_vec)); + trans_x_grad.Resize(common::make_ddim(x_grad_dims_vec)); dev_ctx.template Alloc(&trans_x_grad); phi::funcs::TransCompute( perm_x_grad.size(), dev_ctx, *x_grad, &trans_x_grad, perm_x_grad); @@ -151,7 +151,7 @@ void OverlapAddGradKernel(const Context& dev_ctx, restored_x_grad_shape.push_back(n_frames); } - x_grad->Resize(phi::make_ddim(restored_x_grad_shape)); + x_grad->Resize(common::make_ddim(restored_x_grad_shape)); } } diff --git a/paddle/phi/kernels/cpu/overlap_add_kernel.cc b/paddle/phi/kernels/cpu/overlap_add_kernel.cc index ec7fc6d656d03c..ac00eec3e9f138 100644 --- a/paddle/phi/kernels/cpu/overlap_add_kernel.cc +++ b/paddle/phi/kernels/cpu/overlap_add_kernel.cc @@ -50,14 +50,16 @@ void OverlapAddKernel(const Context& dev_ctx, phi::DDim out_resized_dims; if (axis == 0) { preserved_dims = - phi::slice_ddim(out->dims(), 1, static_cast(out_rank)); - x_resized_dims = {n_frames, frame_length, phi::product(preserved_dims)}; - out_resized_dims = {seq_length, phi::product(preserved_dims)}; + common::slice_ddim(out->dims(), 1, static_cast(out_rank)); + x_resized_dims = { + n_frames, frame_length, common::product(preserved_dims)}; + out_resized_dims = {seq_length, common::product(preserved_dims)}; } else { preserved_dims = - phi::slice_ddim(out->dims(), 0, static_cast(out_rank) - 1); - x_resized_dims = {phi::product(preserved_dims), frame_length, n_frames}; - out_resized_dims = {phi::product(preserved_dims), seq_length}; + common::slice_ddim(out->dims(), 0, static_cast(out_rank) - 1); + x_resized_dims = { + common::product(preserved_dims), frame_length, n_frames}; + out_resized_dims = {common::product(preserved_dims), seq_length}; } x_.Resize(x_resized_dims); out->Resize(out_resized_dims); @@ -72,31 +74,31 @@ void OverlapAddKernel(const Context& dev_ctx, trans_out = *out; std::vector perm_x{1, 0}; - auto x_dims_vec = phi::vectorize(x_.dims()); + auto x_dims_vec = common::vectorize(x_.dims()); for (int i = 0; i < x_.dims().size(); ++i) { x_dims_vec[i] = x_.dims()[perm_x[i]]; } - trans_x.Resize(phi::make_ddim(x_dims_vec)); + trans_x.Resize(common::make_ddim(x_dims_vec)); dev_ctx.template Alloc(&trans_x); phi::funcs::TransCompute( perm_x.size(), dev_ctx, x_, &trans_x, perm_x); } else { std::vector perm_out{1, 0}; - auto out_dims_vec = phi::vectorize(out->dims()); + auto out_dims_vec = common::vectorize(out->dims()); for (int i = 0; i < out->dims().size(); ++i) { out_dims_vec[i] = out->dims()[perm_out[i]]; } - trans_out.Resize(phi::make_ddim(out_dims_vec)); + trans_out.Resize(common::make_ddim(out_dims_vec)); dev_ctx.template Alloc(&trans_out); phi::funcs::TransCompute( perm_out.size(), dev_ctx, *out, &trans_out, perm_out); std::vector perm_x{2, 1, 0}; - auto x_dims_vec = phi::vectorize(x_.dims()); + auto x_dims_vec = common::vectorize(x_.dims()); for (int i = 0; i < x_.dims().size(); ++i) { x_dims_vec[i] = x_.dims()[perm_x[i]]; } - trans_x.Resize(phi::make_ddim(x_dims_vec)); + trans_x.Resize(common::make_ddim(x_dims_vec)); dev_ctx.template Alloc(&trans_x); phi::funcs::TransCompute( perm_x.size(), dev_ctx, x_, &trans_x, perm_x); @@ -137,7 +139,7 @@ void OverlapAddKernel(const Context& dev_ctx, restored_out_shape.push_back(seq_length); } - out->Resize(phi::make_ddim(restored_out_shape)); + out->Resize(common::make_ddim(restored_out_shape)); } } diff --git a/paddle/phi/kernels/cpu/prior_box_kernel.cc b/paddle/phi/kernels/cpu/prior_box_kernel.cc index c289d11069992b..c2c98661686485 100644 --- a/paddle/phi/kernels/cpu/prior_box_kernel.cc +++ b/paddle/phi/kernels/cpu/prior_box_kernel.cc @@ -138,7 +138,7 @@ void PriorBoxKernel(const Context& ctx, } DenseTensor var_t; - var_t.Resize(phi::make_ddim({1, static_cast(variances.size())})); + var_t.Resize(common::make_ddim({1, static_cast(variances.size())})); ctx.template Alloc(&var_t); auto var_et = EigenTensor::From(var_t); diff --git a/paddle/phi/kernels/cpu/prune_gate_by_capacity_kernel.cc b/paddle/phi/kernels/cpu/prune_gate_by_capacity_kernel.cc index ed26b4f37dd5cc..7f2717b8ecacef 100644 --- a/paddle/phi/kernels/cpu/prune_gate_by_capacity_kernel.cc +++ b/paddle/phi/kernels/cpu/prune_gate_by_capacity_kernel.cc @@ -13,7 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/prune_gate_by_capacity_kernel.h" -#include "paddle/phi/core/errors.h" +#include "paddle/common/errors.h" #include "paddle/phi/core/kernel_registry.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/randint_kernel.cc b/paddle/phi/kernels/cpu/randint_kernel.cc index 781d5199457f01..c3f1ffe4248ec7 100644 --- a/paddle/phi/kernels/cpu/randint_kernel.cc +++ b/paddle/phi/kernels/cpu/randint_kernel.cc @@ -29,7 +29,7 @@ void RandintKernel(const Context& dev_ctx, DataType dtype UNUSED, DenseTensor* out) { int seed = 0; - out->Resize(phi::make_ddim(shape.GetData())); + out->Resize(common::make_ddim(shape.GetData())); T* data = dev_ctx.template Alloc(out); auto numel = out->numel(); std::shared_ptr engine; diff --git a/paddle/phi/kernels/cpu/random_routing_kernel.cc b/paddle/phi/kernels/cpu/random_routing_kernel.cc index 0e1d450c1894ae..cdeab98f4c1ab3 100644 --- a/paddle/phi/kernels/cpu/random_routing_kernel.cc +++ b/paddle/phi/kernels/cpu/random_routing_kernel.cc @@ -13,7 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/random_routing_kernel.h" -#include "paddle/phi/core/errors.h" +#include "paddle/common/errors.h" #include "paddle/phi/core/kernel_registry.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/repeat_interleave_grad_kernel.cc b/paddle/phi/kernels/cpu/repeat_interleave_grad_kernel.cc index 05f19ac36107ec..b7b33d4290daec 100644 --- a/paddle/phi/kernels/cpu/repeat_interleave_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/repeat_interleave_grad_kernel.cc @@ -90,7 +90,7 @@ void RepeatInterleaveGradKernel(const Context& ctx, for (int i = 0; i < x_grad->dims()[dim]; i++) { std::fill_n(index_vec.begin() + i * repeats, repeats, i); } - index.Resize(phi::make_ddim({index_size})); + index.Resize(common::make_ddim({index_size})); phi::TensorFromVector(index_vec, ctx, &index); const DenseTensor index_copy = index; IndexSelectGradInner(ctx, out_grad, index_copy, x_grad, dim); diff --git a/paddle/phi/kernels/cpu/rnn_functor.h b/paddle/phi/kernels/cpu/rnn_functor.h index 4adb754174dacb..d0f5e5787bd170 100644 --- a/paddle/phi/kernels/cpu/rnn_functor.h +++ b/paddle/phi/kernels/cpu/rnn_functor.h @@ -140,7 +140,7 @@ void DropoutCpuFunctionInplace(const CPUContext& dev_ctx, if (is_test) { return; } - size_t size = phi::product(x->dims()); + size_t size = common::product(x->dims()); auto* mask_data = mask->data(); if (!(*is_has_reset)) { // Special case when dropout_prob is 1.0 diff --git a/paddle/phi/kernels/cpu/rnn_grad_kernel.cc b/paddle/phi/kernels/cpu/rnn_grad_kernel.cc index 48d6ea98c16ded..3e0e4c7a3d7a5a 100644 --- a/paddle/phi/kernels/cpu/rnn_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/rnn_grad_kernel.cc @@ -82,7 +82,7 @@ struct GradCell { if (has_sequence_length) { auto& place = *dev_ctx.eigen_device(); auto mask = EigenMatrix::From( - mask_tensor, phi::make_ddim({mask_tensor.dims()[1], 1})); + mask_tensor, common::make_ddim({mask_tensor.dims()[1], 1})); auto mask_broadcast = mask.broadcast(Eigen::DSizes( 1, static_cast(grad_pre_hidden->dims()[2]))); auto pre_hidden_grad = EigenMatrix::Reshape( @@ -394,7 +394,7 @@ struct GradLayer { std::vector mask_tensor_list; int mask_min_length = time_step; if (has_sequence_length) { - mask_matrix.Resize(phi::make_ddim({time_step, input->dims()[1]})); + mask_matrix.Resize(common::make_ddim({time_step, input->dims()[1]})); CreateMaskMatrix( dev_ctx, sequence_length, &mask_matrix, is_reverse, &mask_min_length); mask_tensor_list = Unbind(mask_matrix); @@ -598,7 +598,7 @@ struct GradLayer { const std::string& mode) { auto& place = *dev_ctx.eigen_device(); auto mask = EigenMatrix::From( - mask_tensor, phi::make_ddim({mask_tensor.dims()[1], 1})); + mask_tensor, common::make_ddim({mask_tensor.dims()[1], 1})); auto mask_broadcast = mask.broadcast( Eigen::DSizes(1, static_cast(grad_output->dims()[2]))); @@ -1121,8 +1121,8 @@ void RnnGradFunc(const CPUContext& dev_ctx, } // squeeze the hidden first dim for (auto& hidden_tensor : hidden_tensor_unbind) { - hidden_tensor.Resize( - phi::slice_ddim(hidden_tensor.dims(), 1, hidden_tensor.dims().size())); + hidden_tensor.Resize(common::slice_ddim( + hidden_tensor.dims(), 1, hidden_tensor.dims().size())); } // add the output tensor to the hidden vector DenseTensor tmp; diff --git a/paddle/phi/kernels/cpu/rnn_kernel.cc b/paddle/phi/kernels/cpu/rnn_kernel.cc index 606a8f3ff400d9..a0035c6db4a75d 100644 --- a/paddle/phi/kernels/cpu/rnn_kernel.cc +++ b/paddle/phi/kernels/cpu/rnn_kernel.cc @@ -220,7 +220,7 @@ struct Layer { // crate the temp input for the X * W_ih^T + Bias_ih const int& hidden_size = weight.dims()[0]; // NOLINT cache_input->Resize( - phi::make_ddim({input.dims()[0], input.dims()[1], hidden_size})); + common::make_ddim({input.dims()[0], input.dims()[1], hidden_size})); if (is_test) { dev_ctx.Alloc(cache_input); } @@ -240,9 +240,9 @@ struct Layer { auto in = EigenMatrix::Reshape(*cache_input, cache_input->dims().size() - 1); - auto bias_ih_tmp = - EigenMatrix::From(bias_ih, phi::make_ddim({1, bias_ih.dims()[0]})); - const int row_num = static_cast(phi::product(cache_input->dims()) / + auto bias_ih_tmp = EigenMatrix::From( + bias_ih, common::make_ddim({1, bias_ih.dims()[0]})); + const int row_num = static_cast(common::product(cache_input->dims()) / cache_input->dims()[2]); in = in + bias_ih_tmp.broadcast(Eigen::DSizes(row_num, 1)); if (is_gru(mode)) { @@ -255,11 +255,11 @@ struct Layer { zero(dev_ctx, &bias_hh_tmp_unbind[2], static_cast(0.0)); auto bias_hh_after_mask = EigenMatrix::From( - bias_hh_tmp, phi::make_ddim({1, bias_hh.dims()[0]})); + bias_hh_tmp, common::make_ddim({1, bias_hh.dims()[0]})); in = in + bias_hh_after_mask.broadcast(Eigen::DSizes(row_num, 1)); } else { - auto bias_hh_no_mask = - EigenMatrix::From(bias_hh, phi::make_ddim({1, bias_hh.dims()[0]})); + auto bias_hh_no_mask = EigenMatrix::From( + bias_hh, common::make_ddim({1, bias_hh.dims()[0]})); in = in + bias_hh_no_mask.broadcast(Eigen::DSizes(row_num, 1)); } } @@ -276,7 +276,7 @@ struct Layer { auto& place = *dev_ctx.eigen_device(); auto out = EigenMatrix::Reshape(*output, output->dims().size() - 1); auto mask = EigenMatrix::From( - mask_tensor, phi::make_ddim({mask_tensor.dims()[1], 1})); + mask_tensor, common::make_ddim({mask_tensor.dims()[1], 1})); auto pre_h = EigenMatrix::Reshape(*init_h, init_h->dims().size() - 1); auto curr_h = EigenMatrix::Reshape(*last_h, last_h->dims().size() - 1); auto mask_broadcast = mask.broadcast( @@ -356,7 +356,7 @@ struct Layer { DenseTensor mask_matrix; int mask_min_length = time_step; if (has_sequence_length) { - mask_matrix.Resize(phi::make_ddim({time_step, input->dims()[1]})); + mask_matrix.Resize(common::make_ddim({time_step, input->dims()[1]})); CreateMaskMatrix( dev_ctx, sequence_length, &mask_matrix, is_reverse, &mask_min_length); @@ -556,7 +556,7 @@ struct Layer { DenseTensor mask_matrix; int mask_min_length = time_step; if (has_sequence_length) { - mask_matrix.Resize(phi::make_ddim({time_step, input->dims()[1]})); + mask_matrix.Resize(common::make_ddim({time_step, input->dims()[1]})); CreateMaskMatrix( dev_ctx, sequence_length, &mask_matrix, is_reverse, &mask_min_length); mask_tensor_list = Unbind(mask_matrix); diff --git a/paddle/phi/kernels/cpu/roi_align_grad_kernel.cc b/paddle/phi/kernels/cpu/roi_align_grad_kernel.cc index 119f4ea1b0ac40..f6599b2ed47333 100644 --- a/paddle/phi/kernels/cpu/roi_align_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/roi_align_grad_kernel.cc @@ -81,7 +81,7 @@ void RoiAlignGradKernel(const Context& dev_ctx, int sampling_ratio, bool aligned, DenseTensor* dx) { - const auto& in_dims = phi::vectorize(x.dims()); + const auto& in_dims = common::vectorize(x.dims()); int channels = in_dims[1]; int height = in_dims[2]; int width = in_dims[3]; @@ -129,9 +129,9 @@ void RoiAlignGradKernel(const Context& dev_ctx, const T* out_grad_data = out_grad.data(); T* dx_data = dev_ctx.template Alloc(dx); - auto in_stride = phi::stride(x.dims()); - auto roi_stride = phi::stride(boxes.dims()); - auto out_stride = phi::stride(out_grad.dims()); + auto in_stride = common::stride(x.dims()); + auto roi_stride = common::stride(boxes.dims()); + auto out_stride = common::stride(out_grad.dims()); T roi_offset = aligned ? T(0.5) : 0; for (int n = 0; n < rois_num; ++n) { diff --git a/paddle/phi/kernels/cpu/roi_align_kernel.cc b/paddle/phi/kernels/cpu/roi_align_kernel.cc index bf303b17fbc8bd..7a0a00f82e7cd4 100644 --- a/paddle/phi/kernels/cpu/roi_align_kernel.cc +++ b/paddle/phi/kernels/cpu/roi_align_kernel.cc @@ -198,9 +198,9 @@ void RoiAlignKernel(const Context& dev_ctx, return; } - auto in_stride = phi::stride(in_dims); - auto roi_stride = phi::stride(boxes.dims()); - auto out_stride = phi::stride(out->dims()); + auto in_stride = common::stride(in_dims); + auto roi_stride = common::stride(boxes.dims()); + auto out_stride = common::stride(out->dims()); const T* input_data = x.data(); DenseTensor roi_batch_id_list = Empty(dev_ctx, {rois_num}); diff --git a/paddle/phi/kernels/cpu/roi_pool_grad_kernel.cc b/paddle/phi/kernels/cpu/roi_pool_grad_kernel.cc index e25a581cbd9dd9..ff1f8578a78b06 100644 --- a/paddle/phi/kernels/cpu/roi_pool_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/roi_pool_grad_kernel.cc @@ -66,10 +66,10 @@ void RoiPoolGradKernel(const Context& dev_ctx, phi::funcs::SetConstant set_zero; set_zero(dev_ctx, dx, static_cast(0)); - auto in_stride = phi::stride(x.dims()); - auto arg_max_stride = phi::stride(arg_max.dims()); - auto roi_stride = phi::stride(boxes.dims()); - auto out_stride = phi::stride(out_grad.dims()); + auto in_stride = common::stride(x.dims()); + auto arg_max_stride = common::stride(arg_max.dims()); + auto roi_stride = common::stride(boxes.dims()); + auto out_stride = common::stride(out_grad.dims()); int channels = static_cast(x.dims()[1]); diff --git a/paddle/phi/kernels/cpu/roi_pool_kernel.cc b/paddle/phi/kernels/cpu/roi_pool_kernel.cc index 9208308ed12e2a..0789fbb4c227fa 100644 --- a/paddle/phi/kernels/cpu/roi_pool_kernel.cc +++ b/paddle/phi/kernels/cpu/roi_pool_kernel.cc @@ -42,10 +42,10 @@ void RoiPoolKernel(const Context& dev_ctx, return; } - auto in_stride = phi::stride(x_dims); - auto arg_max_stride = phi::stride(arg_max->dims()); - auto box_stride = phi::stride(boxes.dims()); - auto out_stride = phi::stride(out->dims()); + auto in_stride = common::stride(x_dims); + auto arg_max_stride = common::stride(arg_max->dims()); + auto box_stride = common::stride(boxes.dims()); + auto out_stride = common::stride(out->dims()); const T* input_data = x.data(); diff --git a/paddle/phi/kernels/cpu/send_u_recv_kernel.cc b/paddle/phi/kernels/cpu/send_u_recv_kernel.cc index 3db7844875f244..9e186aeedfab36 100644 --- a/paddle/phi/kernels/cpu/send_u_recv_kernel.cc +++ b/paddle/phi/kernels/cpu/send_u_recv_kernel.cc @@ -97,11 +97,11 @@ void GraphSendRecvOpKernelLaunchHelper(const Context& ctx, } } else { // Set out dim following out_size. - std::vector dims_ = phi::vectorize(src_dims); + std::vector dims_ = common::vectorize(src_dims); if (!dims_.empty()) { dims_[0] = out_size; } - out->Resize(phi::make_ddim(dims_)); + out->Resize(common::make_ddim(dims_)); memset_size = out_size; for (int i = 1; i < src_dims.size(); ++i) { memset_size *= src_dims[i]; diff --git a/paddle/phi/kernels/cpu/send_ue_recv_kernel.cc b/paddle/phi/kernels/cpu/send_ue_recv_kernel.cc index 0dd727811b3ed8..a53efc2bc17b05 100644 --- a/paddle/phi/kernels/cpu/send_ue_recv_kernel.cc +++ b/paddle/phi/kernels/cpu/send_ue_recv_kernel.cc @@ -118,13 +118,13 @@ void GraphSendUERecvOpKernelLaunchHelper(const Context& ctx, const int& index_size = src_index.dims()[0]; // NOLINT auto out_dims = out->dims(); int64_t memset_size = 1; - std::vector dims_ = phi::vectorize(out_dims); + std::vector dims_ = common::vectorize(out_dims); if (out_size <= 0) { dims_[0] = x.dims()[0]; } else { dims_[0] = out_size; } - out->Resize(phi::make_ddim(dims_)); + out->Resize(common::make_ddim(dims_)); for (auto dim : dims_) { memset_size *= dim; } diff --git a/paddle/phi/kernels/cpu/send_uv_grad_kernel.cc b/paddle/phi/kernels/cpu/send_uv_grad_kernel.cc index c04bdaec0177eb..fb77091d0dbcf4 100644 --- a/paddle/phi/kernels/cpu/send_uv_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/send_uv_grad_kernel.cc @@ -60,7 +60,7 @@ void CalculateGrad(const Context& ctx, } } else { const auto& bcast_info = phi::CalcBCastInfo(out_grad_dims, x_grad_dims); - auto out_grad_dims_1 = phi::vectorize(out_grad_dims); + auto out_grad_dims_1 = common::vectorize(out_grad_dims); std::vector out_grad_dims_2(out_grad_dims_1.begin() + 1, out_grad_dims_1.end()); out_grad_dims_2.emplace(out_grad_dims_2.begin(), x_grad_dims[0]); @@ -117,7 +117,7 @@ void CalculateGrad(const Context& ctx, } } } else { - auto out_grad_dims_1 = phi::vectorize(out_grad_dims); + auto out_grad_dims_1 = common::vectorize(out_grad_dims); std::vector out_grad_dims_2(out_grad_dims_1.begin() + 1, out_grad_dims_1.end()); out_grad_dims_2.emplace(out_grad_dims_2.begin(), x_grad_dims[0]); diff --git a/paddle/phi/kernels/cpu/shuffle_batch_kernel.cc b/paddle/phi/kernels/cpu/shuffle_batch_kernel.cc index f7717d5aee4af1..78ab7492084e0c 100644 --- a/paddle/phi/kernels/cpu/shuffle_batch_kernel.cc +++ b/paddle/phi/kernels/cpu/shuffle_batch_kernel.cc @@ -83,7 +83,7 @@ void ShuffleBatchKernel(const Context& dev_ctx, // std::shuffle(idx_vec.begin(), idx_vec.end(), engine); // ShuffleIdx record shuffle order - shuffleidx->Resize(phi::make_ddim({(int64_t)idx_vec.size()})); + shuffleidx->Resize(common::make_ddim({(int64_t)idx_vec.size()})); auto* shuffleidx_data = dev_ctx.template HostAlloc(shuffleidx); for (size_t i = 0; i < idx_vec.size(); i++) { @@ -99,7 +99,7 @@ void ShuffleBatchKernel(const Context& dev_ctx, x_embed_size * sizeof(T)); } // set new seed - seed_out->Resize(phi::make_ddim({1})); + seed_out->Resize(common::make_ddim({1})); auto* seed_out_data = dev_ctx.template HostAlloc(seed_out); *seed_out_data = engine(); } diff --git a/paddle/phi/kernels/cpu/sparse_weight_embedding_grad_kernel.cc b/paddle/phi/kernels/cpu/sparse_weight_embedding_grad_kernel.cc index d296aba66503b7..f3c724489714af 100644 --- a/paddle/phi/kernels/cpu/sparse_weight_embedding_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/sparse_weight_embedding_grad_kernel.cc @@ -143,7 +143,7 @@ struct SparseWeightEmbeddingSparseGradCPUFunctor { auto d_output_dims = d_output->dims(); auto d_output_dims_2d = - phi::flatten_to_2d(d_output_dims, d_output_dims.size() - 1); + common::flatten_to_2d(d_output_dims, d_output_dims.size() - 1); PADDLE_ENFORCE_EQ(d_table_value->dims(), d_output_dims_2d, phi::errors::InvalidArgument( diff --git a/paddle/phi/kernels/cpu/strided_copy_kernel.cc b/paddle/phi/kernels/cpu/strided_copy_kernel.cc index a25893eb6b571a..19ce258a4313cb 100644 --- a/paddle/phi/kernels/cpu/strided_copy_kernel.cc +++ b/paddle/phi/kernels/cpu/strided_copy_kernel.cc @@ -29,8 +29,8 @@ void StridedCopyKernel(const Context& dev_ctx, int64_t offset, DenseTensor* out) { phi::DenseTensorMeta meta = input.meta(); - meta.strides = phi::make_ddim(out_stride); - meta.dims = phi::make_ddim(dims); + meta.strides = common::make_ddim(out_stride); + meta.dims = common::make_ddim(dims); meta.offset = offset; out->set_meta(meta); diff --git a/paddle/phi/kernels/cpu/temporal_shift_grad_kernel.cc b/paddle/phi/kernels/cpu/temporal_shift_grad_kernel.cc index 42d0acd901c71a..1707517f57455c 100644 --- a/paddle/phi/kernels/cpu/temporal_shift_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/temporal_shift_grad_kernel.cc @@ -14,8 +14,8 @@ #include "paddle/phi/kernels/temporal_shift_grad_kernel.h" +#include "paddle/common/layout.h" #include "paddle/phi/backends/cpu/cpu_context.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/core/kernel_registry.h" namespace phi { @@ -92,7 +92,7 @@ void TemporalShiftGradKernel(const Context& dev_ctx, auto* input_grad = x_grad; auto* output_grad = &out_grad; int t = seg_num; - const DataLayout data_layout = phi::StringToDataLayout(data_format_str); + const DataLayout data_layout = common::StringToDataLayout(data_format_str); const int nt = static_cast(output_grad->dims()[0]); const int c = static_cast(data_layout == DataLayout::kNCHW @@ -114,8 +114,8 @@ void TemporalShiftGradKernel(const Context& dev_ctx, const int c2 = static_cast(static_cast(c) * 2.f * shift_ratio); DDim in_grad_dims = - (data_layout == DataLayout::kNCHW ? phi::make_ddim({nt, c, h, w}) - : phi::make_ddim({nt, h, w, c})); + (data_layout == DataLayout::kNCHW ? common::make_ddim({nt, c, h, w}) + : common::make_ddim({nt, h, w, c})); const T* output_grad_data = output_grad->data(); input_grad->Resize(in_grad_dims); diff --git a/paddle/phi/kernels/cpu/temporal_shift_kernel.cc b/paddle/phi/kernels/cpu/temporal_shift_kernel.cc index 4e183bf81b4997..8e7084264382ce 100644 --- a/paddle/phi/kernels/cpu/temporal_shift_kernel.cc +++ b/paddle/phi/kernels/cpu/temporal_shift_kernel.cc @@ -14,8 +14,8 @@ #include "paddle/phi/kernels/temporal_shift_kernel.h" +#include "paddle/common/layout.h" #include "paddle/phi/backends/cpu/cpu_context.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/core/kernel_registry.h" namespace phi { @@ -92,7 +92,7 @@ void TemporalShiftKernel(const Context& dev_ctx, auto* input = &x; auto* output = out; int t = seg_num; - const DataLayout data_layout = phi::StringToDataLayout(data_format_str); + const DataLayout data_layout = common::StringToDataLayout(data_format_str); const int nt = static_cast(input->dims()[0]); const int c = static_cast( @@ -111,8 +111,8 @@ void TemporalShiftKernel(const Context& dev_ctx, const int c2 = static_cast(static_cast(c) * 2.f * shift_ratio); DDim out_dims = - (data_layout == DataLayout::kNCHW ? phi::make_ddim({nt, c, h, w}) - : phi::make_ddim({nt, h, w, c})); + (data_layout == DataLayout::kNCHW ? common::make_ddim({nt, c, h, w}) + : common::make_ddim({nt, h, w, c})); const T* input_data = input->data(); output->Resize(out_dims); T* output_data = dev_ctx.template Alloc(output); diff --git a/paddle/phi/kernels/cpu/top_k_grad_kernel.cc b/paddle/phi/kernels/cpu/top_k_grad_kernel.cc index d98ca1702e3b56..1e7f6b890df290 100644 --- a/paddle/phi/kernels/cpu/top_k_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/top_k_grad_kernel.cc @@ -76,7 +76,7 @@ void TopkGradKernel(const Context& dev_ctx, // assign the out_grad to input_grad directly const int64_t input_height = - phi::product(phi::slice_ddim(in_dims, 0, in_dims.size() - 1)); + common::product(common::slice_ddim(in_dims, 0, in_dims.size() - 1)); const int64_t input_width = in_dims[in_dims.size() - 1]; // init the output grad with 0, because some input elements has no grad @@ -120,8 +120,8 @@ void TopkGradKernel(const Context& dev_ctx, ndims, dev_ctx, out_grad, &trans_dO, trans); funcs::TransCompute( ndims, dev_ctx, indices, &trans_ind, trans); - const int64_t input_height = phi::product( - phi::slice_ddim(trans_in_dims, 0, trans_in_dims.size() - 1)); + const int64_t input_height = common::product( + common::slice_ddim(trans_in_dims, 0, trans_in_dims.size() - 1)); const int64_t input_width = trans_in_dims[trans_in_dims.size() - 1]; // Assign the out_grad to tranpose input_grad diff --git a/paddle/phi/kernels/cpu/top_k_kernel.cc b/paddle/phi/kernels/cpu/top_k_kernel.cc index 858be275734d42..d769613bef92b0 100644 --- a/paddle/phi/kernels/cpu/top_k_kernel.cc +++ b/paddle/phi/kernels/cpu/top_k_kernel.cc @@ -172,7 +172,7 @@ void TopkKernel(const Context& dev_ctx, const auto& out_dims = out->dims(); if (axis + 1 == in_dims.size()) { const int64_t& input_height = - phi::product(phi::slice_ddim(in_dims, 0, in_dims.size() - 1)); + common::product(common::slice_ddim(in_dims, 0, in_dims.size() - 1)); const int64_t& input_width = in_dims[in_dims.size() - 1]; FullTopK(input_height, input_width, @@ -214,8 +214,8 @@ void TopkKernel(const Context& dev_ctx, funcs::TransCompute( ndims, dev_ctx, *input, &trans_inp, trans); - const int64_t input_height = - phi::product(phi::slice_ddim(trans_dims, 0, trans_dims.size() - 1)); + const int64_t input_height = common::product( + common::slice_ddim(trans_dims, 0, trans_dims.size() - 1)); const int64_t input_width = trans_dims[trans_dims.size() - 1]; // Allocate the temp tensor to the save the topk indices, values diff --git a/paddle/phi/kernels/cpu/triangular_solve_kernel.cc b/paddle/phi/kernels/cpu/triangular_solve_kernel.cc index 06c897b2199845..6245eb90426405 100644 --- a/paddle/phi/kernels/cpu/triangular_solve_kernel.cc +++ b/paddle/phi/kernels/cpu/triangular_solve_kernel.cc @@ -14,8 +14,8 @@ #include "paddle/phi/kernels/triangular_solve_kernel.h" +#include "paddle/common/ddim.h" #include "paddle/phi/backends/cpu/cpu_context.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/expand_kernel.h" @@ -46,7 +46,7 @@ void TriangularSolveKernel(const Context& dev_ctx, const T* x_bst_data = x_bst.data(); ExpandKernel(dev_ctx, x, x_bst_dims, &x_bst); - out->Resize(phi::make_ddim(y_bst_dims_vec)); + out->Resize(common::make_ddim(y_bst_dims_vec)); T* out_data = dev_ctx.template Alloc(out); IntArray y_bst_dims(y_bst_dims_vec); ExpandKernel(dev_ctx, y, y_bst_dims, out); diff --git a/paddle/phi/kernels/cpu/uniform_kernel.cc b/paddle/phi/kernels/cpu/uniform_kernel.cc index d850dc5074e033..5a85675bdeffa0 100644 --- a/paddle/phi/kernels/cpu/uniform_kernel.cc +++ b/paddle/phi/kernels/cpu/uniform_kernel.cc @@ -27,7 +27,7 @@ void UniformKernel(const Context &dev_ctx, const Scalar &max, int seed, DenseTensor *out) { - out->Resize(phi::make_ddim(shape.GetData())); + out->Resize(common::make_ddim(shape.GetData())); T *data = dev_ctx.template Alloc(out); auto size = out->numel(); std::shared_ptr engine; diff --git a/paddle/phi/kernels/cpu/unique_consecutive_functor.h b/paddle/phi/kernels/cpu/unique_consecutive_functor.h index 2daee69eed92dc..cf7086f80f5a6a 100644 --- a/paddle/phi/kernels/cpu/unique_consecutive_functor.h +++ b/paddle/phi/kernels/cpu/unique_consecutive_functor.h @@ -60,18 +60,18 @@ static void UniqueConsecutiveFlattenedTensor(const Context& context, } out_vec.resize(output_size); - out->Resize(phi::make_ddim({output_size})); + out->Resize(common::make_ddim({output_size})); auto* out_data = context.template Alloc(out); std::copy(out_vec.begin(), out_vec.end(), out_data); if (return_inverse) { - inverse->Resize(phi::make_ddim({in.numel()})); + inverse->Resize(common::make_ddim({in.numel()})); auto* inverse_data = context.template Alloc(inverse); std::copy(inverse_vec.begin(), inverse_vec.end(), inverse_data); } if (return_counts) { - count->Resize(phi::make_ddim({out->numel()})); + count->Resize(common::make_ddim({out->numel()})); auto* counts_data = context.template Alloc(count); std::copy(counts_vec.begin(), counts_vec.end(), counts_data); } @@ -156,17 +156,17 @@ static void UniqueConsecutiveDim(const Context& context, std::iota(permute.begin(), permute.end(), 0); permute[axis] = 0; permute[0] = axis; - std::vector in_trans_dims_vec(phi::vectorize(in.dims())); + std::vector in_trans_dims_vec(common::vectorize(in.dims())); in_trans_dims_vec[axis] = in.dims()[0]; in_trans_dims_vec[0] = in.dims()[axis]; DenseTensor in_trans; - DDim in_trans_dims = phi::make_ddim(in_trans_dims_vec); + DDim in_trans_dims = common::make_ddim(in_trans_dims_vec); in_trans.Resize(in_trans_dims); context.template Alloc(&in_trans); phi::funcs::TransCompute( in.dims().size(), context, in, &in_trans, permute); // reshape tensor: eg. [dim1, dim0, dim2] -> [dim1, dim0*dim2] - DDim in_trans_flat_dims = phi::flatten_to_2d(in_trans_dims, 1); + DDim in_trans_flat_dims = common::flatten_to_2d(in_trans_dims, 1); in_trans.Resize(in_trans_flat_dims); std::vector sorted_indices_vec(in_trans.dims()[0]); @@ -202,10 +202,10 @@ static void UniqueConsecutiveDim(const Context& context, DenseTensor out_trans; std::vector out_trans_dims_vec = in_trans_dims_vec; out_trans_dims_vec[0] = input_unbind.size(); - out_trans.Resize(phi::make_ddim(out_trans_dims_vec)); + out_trans.Resize(common::make_ddim(out_trans_dims_vec)); context.template Alloc(&out_trans); std::swap(out_trans_dims_vec[0], out_trans_dims_vec[axis]); - out->Resize(phi::make_ddim(out_trans_dims_vec)); + out->Resize(common::make_ddim(out_trans_dims_vec)); context.template Alloc(out); concat_functor(context, input_unbind, 0, &out_trans); phi::funcs::TransCompute( diff --git a/paddle/phi/kernels/cpu/unique_consecutive_kernel.cc b/paddle/phi/kernels/cpu/unique_consecutive_kernel.cc index 8c3a14a5edf76e..dc0b96ec839973 100644 --- a/paddle/phi/kernels/cpu/unique_consecutive_kernel.cc +++ b/paddle/phi/kernels/cpu/unique_consecutive_kernel.cc @@ -17,8 +17,8 @@ #include "paddle/phi/kernels/cpu/unique_consecutive_functor.h" #include "paddle/phi/kernels/unique_consecutive_kernel.h" +#include "paddle/common/errors.h" #include "paddle/phi/backends/cpu/cpu_context.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/utils/data_type.h" diff --git a/paddle/phi/kernels/dist_grad_kernel.cc b/paddle/phi/kernels/dist_grad_kernel.cc index e6ef962c665c28..088a4fe4ffd266 100644 --- a/paddle/phi/kernels/dist_grad_kernel.cc +++ b/paddle/phi/kernels/dist_grad_kernel.cc @@ -69,7 +69,7 @@ void DistGradKernel(const Context& dev_ctx, // the dims of output iternally, so we Resize x/y_grad twice. auto res_x = GetReduceDims(x_grad_tmp.dims(), x.dims()); if (!std::get<0>(res_x).empty()) { - x_grad->Resize(phi::make_ddim(std::get<1>(res_x))); + x_grad->Resize(common::make_ddim(std::get<1>(res_x))); SumKernel( dev_ctx, x_grad_tmp, std::get<0>(res_x), x.dtype(), false, x_grad); x_grad->Resize(x.dims()); @@ -82,7 +82,7 @@ void DistGradKernel(const Context& dev_ctx, ScaleKernel(dev_ctx, x_grad_tmp, -1.0, 0.0, false, &y_grad_tmp); auto res_y = GetReduceDims(y_grad_tmp.dims(), y.dims()); if (!std::get<0>(res_y).empty()) { - y_grad->Resize(phi::make_ddim(std::get<1>(res_y))); + y_grad->Resize(common::make_ddim(std::get<1>(res_y))); SumKernel( dev_ctx, y_grad_tmp, std::get<0>(res_y), y.dtype(), false, y_grad); y_grad->Resize(y.dims()); diff --git a/paddle/phi/kernels/empty_kernel.cc b/paddle/phi/kernels/empty_kernel.cc index 8df5e9a543eb25..d2391a5702d4b1 100644 --- a/paddle/phi/kernels/empty_kernel.cc +++ b/paddle/phi/kernels/empty_kernel.cc @@ -12,10 +12,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/phi/kernels/empty_kernel.h" +#include "paddle/common/macros.h" #include "paddle/phi/backends/all_context.h" #include "paddle/phi/common/complex.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/core/macros.h" namespace phi { @@ -24,7 +24,7 @@ void EmptyKernel(const Context& dev_ctx, const IntArray& shape, DataType dtype UNUSED, DenseTensor* out) { - out->Resize(phi::make_ddim(shape.GetData())); + out->Resize(common::make_ddim(shape.GetData())); dev_ctx.template Alloc(out); } diff --git a/paddle/phi/kernels/flatten_grad_kernel.cc b/paddle/phi/kernels/flatten_grad_kernel.cc index 42d137ba4f4190..ebe1b1d24e50a5 100644 --- a/paddle/phi/kernels/flatten_grad_kernel.cc +++ b/paddle/phi/kernels/flatten_grad_kernel.cc @@ -27,7 +27,7 @@ void FlattenGradKernel(const Context& dev_ctx, DenseTensor* x_grad) { auto xshape_dims = xshape.dims(); dev_ctx.Alloc(x_grad, out_grad.dtype()); - auto x_dims = phi::slice_ddim(xshape_dims, 1, xshape_dims.size()); + auto x_dims = common::slice_ddim(xshape_dims, 1, xshape_dims.size()); phi::Copy(dev_ctx, out_grad, dev_ctx.GetPlace(), false, x_grad); x_grad->Resize(x_dims); } diff --git a/paddle/phi/kernels/full_kernel.cc b/paddle/phi/kernels/full_kernel.cc index 31bcbc2eeea949..cd603dd57e64d1 100644 --- a/paddle/phi/kernels/full_kernel.cc +++ b/paddle/phi/kernels/full_kernel.cc @@ -31,7 +31,7 @@ void FullBatchSizeLikeKernel(const Context& dev_ctx, // set the correct batch size for the LoDTensor. auto odims = out->dims(); odims[out_batch_size_dim] = static_cast(x.lod().back().size()) - 1; - FullKernel(dev_ctx, phi::vectorize(odims), val, dtype, out); + FullKernel(dev_ctx, common::vectorize(odims), val, dtype, out); } FullLikeKernel(dev_ctx, x, val, dtype, out); } diff --git a/paddle/phi/kernels/funcs/affine_grid_utils.h b/paddle/phi/kernels/funcs/affine_grid_utils.h index 1e6701d0c7e833..b973d75a9becdc 100644 --- a/paddle/phi/kernels/funcs/affine_grid_utils.h +++ b/paddle/phi/kernels/funcs/affine_grid_utils.h @@ -45,7 +45,7 @@ inline void GetIdxMap4D(int n, DenseTensor* grid, const Context& dev_ctx) { auto& place = *dev_ctx.eigen_device(); - grid->Resize(phi::make_ddim({n, h, w, 3})); + grid->Resize(common::make_ddim({n, h, w, 3})); dev_ctx.template Alloc(grid); auto grid_t = EigenTensor::From(*grid); // Get indexes of height with shape [height, width, 1] @@ -59,7 +59,7 @@ inline void GetIdxMap4D(int n, auto w_idx_t = EigenTensor::From(w_idx); // Get constant ones tensor with shape [height, width, 1] DenseTensor ones; - ones.Resize(phi::make_ddim({h, w, 1})); + ones.Resize(common::make_ddim({h, w, 1})); dev_ctx.template Alloc(&ones); phi::funcs::SetConstant()(dev_ctx, &ones, static_cast(1)); @@ -67,22 +67,22 @@ inline void GetIdxMap4D(int n, // Get grid tensor with shape [n, h, w, 3] by concatenating h_idx, w_idx and // ones DenseTensor w_idx_map; - w_idx_map.Resize(phi::make_ddim({h, w, 1})); + w_idx_map.Resize(common::make_ddim({h, w, 1})); dev_ctx.template Alloc(&w_idx_map); auto w_idx_map_t = EigenTensor::From(w_idx_map); DenseTensor h_idx_map; - h_idx_map.Resize(phi::make_ddim({h, w, 1})); + h_idx_map.Resize(common::make_ddim({h, w, 1})); dev_ctx.template Alloc(&h_idx_map); auto h_idx_map_t = EigenTensor::From(h_idx_map); DenseTensor w_h_idx_map; - w_h_idx_map.Resize(phi::make_ddim({h, w, 2})); + w_h_idx_map.Resize(common::make_ddim({h, w, 2})); dev_ctx.template Alloc(&w_h_idx_map); auto w_h_idx_map_t = EigenTensor::From(w_h_idx_map); DenseTensor w_h_one_idx_map; - w_h_one_idx_map.Resize(phi::make_ddim({h, w, 3})); + w_h_one_idx_map.Resize(common::make_ddim({h, w, 3})); dev_ctx.template Alloc(&w_h_one_idx_map); auto w_h_one_idx_map_t = EigenTensor::From(w_h_one_idx_map); @@ -109,7 +109,7 @@ inline void GetIdxMap5D(int n, DenseTensor* grid, const Context& dev_ctx) { auto& place = *dev_ctx.eigen_device(); - grid->Resize(phi::make_ddim({n, d, h, w, 4})); + grid->Resize(common::make_ddim({n, d, h, w, 4})); dev_ctx.template Alloc(grid); auto grid_t = EigenTensor::From(*grid); // Get indexes of height with shape [depth, height, width, 1] @@ -127,7 +127,7 @@ inline void GetIdxMap5D(int n, auto w_idx_t = EigenTensor::From(w_idx); // Get constant ones tensor with shape [depth, height, width, 1] DenseTensor ones; - ones.Resize(phi::make_ddim({d, h, w, 1})); + ones.Resize(common::make_ddim({d, h, w, 1})); dev_ctx.template Alloc(&ones); phi::funcs::SetConstant()(dev_ctx, &ones, static_cast(1)); @@ -135,32 +135,32 @@ inline void GetIdxMap5D(int n, // Get grid tensor with shape [n, d, h, w, 4] by concatenating d_idx, h_idx, // w_idx and ones DenseTensor w_idx_map; - w_idx_map.Resize(phi::make_ddim({d, h, w, 1})); + w_idx_map.Resize(common::make_ddim({d, h, w, 1})); dev_ctx.template Alloc(&w_idx_map); auto w_idx_map_t = EigenTensor::From(w_idx_map); DenseTensor h_idx_map; - h_idx_map.Resize(phi::make_ddim({d, h, w, 1})); + h_idx_map.Resize(common::make_ddim({d, h, w, 1})); dev_ctx.template Alloc(&h_idx_map); auto h_idx_map_t = EigenTensor::From(h_idx_map); DenseTensor d_idx_map; - d_idx_map.Resize(phi::make_ddim({d, h, w, 1})); + d_idx_map.Resize(common::make_ddim({d, h, w, 1})); dev_ctx.template Alloc(&d_idx_map); auto d_idx_map_t = EigenTensor::From(d_idx_map); DenseTensor w_h_idx_map; - w_h_idx_map.Resize(phi::make_ddim({d, h, w, 2})); + w_h_idx_map.Resize(common::make_ddim({d, h, w, 2})); dev_ctx.template Alloc(&w_h_idx_map); auto w_h_idx_map_t = EigenTensor::From(w_h_idx_map); DenseTensor w_h_d_idx_map; - w_h_d_idx_map.Resize(phi::make_ddim({d, h, w, 3})); + w_h_d_idx_map.Resize(common::make_ddim({d, h, w, 3})); dev_ctx.template Alloc(&w_h_d_idx_map); auto w_h_d_idx_map_t = EigenTensor::From(w_h_d_idx_map); DenseTensor w_h_d_one_idx_map; - w_h_d_one_idx_map.Resize(phi::make_ddim({d, h, w, 4})); + w_h_d_one_idx_map.Resize(common::make_ddim({d, h, w, 4})); dev_ctx.template Alloc(&w_h_d_one_idx_map); auto w_h_d_one_idx_map_t = EigenTensor::From(w_h_d_one_idx_map); diff --git a/paddle/phi/kernels/funcs/axis_utils.h b/paddle/phi/kernels/funcs/axis_utils.h index 368c4a9e14061c..41bbd4f048c6b4 100644 --- a/paddle/phi/kernels/funcs/axis_utils.h +++ b/paddle/phi/kernels/funcs/axis_utils.h @@ -14,7 +14,7 @@ limitations under the License. */ #pragma once -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" namespace phi { namespace funcs { diff --git a/paddle/phi/kernels/funcs/batch_norm_utils.h b/paddle/phi/kernels/funcs/batch_norm_utils.h index 64a20ee5d2e098..c7f25fd5f548d7 100644 --- a/paddle/phi/kernels/funcs/batch_norm_utils.h +++ b/paddle/phi/kernels/funcs/batch_norm_utils.h @@ -31,30 +31,30 @@ inline void ResizeToChannelFirst(const DeviceContext& context, // input transformed_input->Resize(input->dims()); - auto in_dims_vec = vectorize(input->dims()); + auto in_dims_vec = common::vectorize(input->dims()); in_dims_vec[1] = input->dims()[4]; in_dims_vec[2] = input->dims()[1]; in_dims_vec[3] = input->dims()[2]; in_dims_vec[4] = input->dims()[3]; - transformed_input->Resize(make_ddim(in_dims_vec)); + transformed_input->Resize(common::make_ddim(in_dims_vec)); context.template Alloc(transformed_input); } else if (dim == 2) { // input transformed_input->Resize(input->dims()); - auto in_dims_vec = vectorize(input->dims()); + auto in_dims_vec = common::vectorize(input->dims()); in_dims_vec[1] = input->dims()[3]; in_dims_vec[2] = input->dims()[1]; in_dims_vec[3] = input->dims()[2]; - transformed_input->Resize(make_ddim(in_dims_vec)); + transformed_input->Resize(common::make_ddim(in_dims_vec)); context.template Alloc(transformed_input); } else if (dim == 1) { transformed_input->Resize(input->dims()); - auto in_dims_vec = vectorize(input->dims()); + auto in_dims_vec = common::vectorize(input->dims()); in_dims_vec[1] = input->dims()[2]; in_dims_vec[2] = input->dims()[1]; - transformed_input->Resize(make_ddim(in_dims_vec)); + transformed_input->Resize(common::make_ddim(in_dims_vec)); context.template Alloc(transformed_input); } } @@ -68,31 +68,31 @@ inline void ResizeToChannelLast(const DeviceContext& context, // input transformed_input->Resize(input->dims()); - auto in_dims_vec = vectorize(input->dims()); + auto in_dims_vec = common::vectorize(input->dims()); in_dims_vec[1] = input->dims()[2]; in_dims_vec[2] = input->dims()[3]; in_dims_vec[3] = input->dims()[4]; in_dims_vec[4] = input->dims()[1]; - transformed_input->Resize(make_ddim(in_dims_vec)); + transformed_input->Resize(common::make_ddim(in_dims_vec)); context.template Alloc(transformed_input); } else if (dim == 2) { // input transformed_input->Resize(input->dims()); - auto in_dims_vec = vectorize(input->dims()); + auto in_dims_vec = common::vectorize(input->dims()); in_dims_vec[1] = input->dims()[2]; in_dims_vec[2] = input->dims()[3]; in_dims_vec[3] = input->dims()[1]; - transformed_input->Resize(make_ddim(in_dims_vec)); + transformed_input->Resize(common::make_ddim(in_dims_vec)); context.template Alloc(transformed_input); } else if (dim == 1) { transformed_input->Resize(input->dims()); - auto in_dims_vec = vectorize(input->dims()); + auto in_dims_vec = common::vectorize(input->dims()); in_dims_vec[1] = input->dims()[2]; in_dims_vec[2] = input->dims()[1]; - transformed_input->Resize(make_ddim(in_dims_vec)); + transformed_input->Resize(common::make_ddim(in_dims_vec)); context.template Alloc(transformed_input); } } diff --git a/paddle/phi/kernels/funcs/blas/blas.cc b/paddle/phi/kernels/funcs/blas/blas.cc index a43005cf86939f..9b6ac51317a95e 100644 --- a/paddle/phi/kernels/funcs/blas/blas.cc +++ b/paddle/phi/kernels/funcs/blas/blas.cc @@ -27,7 +27,7 @@ MatDescriptor CreateMatrixDescriptor(const DDim &tensor_dim, tensor_dim.size())); MatDescriptor retv; if (num_flatten_cols > 1) { - auto flatten_dim = phi::flatten_to_2d(tensor_dim, num_flatten_cols); + auto flatten_dim = common::flatten_to_2d(tensor_dim, num_flatten_cols); retv.height_ = flatten_dim[0]; retv.width_ = flatten_dim[1]; } else { @@ -35,7 +35,7 @@ MatDescriptor CreateMatrixDescriptor(const DDim &tensor_dim, retv.height_ = tensor_dim[0]; retv.width_ = tensor_dim[1]; } else { - auto dim_vec = phi::vectorize(tensor_dim); + auto dim_vec = common::vectorize(tensor_dim); retv.batch_size_ = 1; for (size_t i = 0; i < dim_vec.size() - 2; ++i) { retv.batch_size_ *= dim_vec[i]; diff --git a/paddle/phi/kernels/funcs/blas/blaslt_impl.cu.h b/paddle/phi/kernels/funcs/blas/blaslt_impl.cu.h index 0fca9de54b2ba9..4e1db73c69e64d 100644 --- a/paddle/phi/kernels/funcs/blas/blaslt_impl.cu.h +++ b/paddle/phi/kernels/funcs/blas/blaslt_impl.cu.h @@ -1042,8 +1042,8 @@ struct LinearWithCublasLt : public CublasLtBase { const bool trans_x, const bool trans_y, const MatmulFusedType fused_type) { - auto planner = phi::funcs::MatmulPlanner(vectorize(x->dims()), - vectorize(y->dims()), + auto planner = phi::funcs::MatmulPlanner(common::vectorize(x->dims()), + common::vectorize(y->dims()), trans_x, trans_y, phi::CppTypeToDataType::Type(), @@ -1080,8 +1080,8 @@ struct LinearGradWithCublasLt : public CublasLtBase { const bool use_addto, const bool no_exchange, // exchange x_desc and y_desc for grad. bool grad_for_dx = true) { - auto planner = phi::funcs::MatmulPlanner(vectorize(x->dims()), - vectorize(y->dims()), + auto planner = phi::funcs::MatmulPlanner(common::vectorize(x->dims()), + common::vectorize(y->dims()), trans_x, trans_y, phi::CppTypeToDataType::Type(), diff --git a/paddle/phi/kernels/funcs/broadcast_function.h b/paddle/phi/kernels/funcs/broadcast_function.h index a1f9c1eb4346cb..822801e10c357c 100644 --- a/paddle/phi/kernels/funcs/broadcast_function.h +++ b/paddle/phi/kernels/funcs/broadcast_function.h @@ -34,12 +34,12 @@ enum BroadcastType { kMixed = 1, kBroadcast = 2, kElementwise = 3 }; template struct BroadcastTypeClassifier { int64_t numel{0}; - int broadcast_num{0}; // Not used for XPU - bool all_elementwise{true}; // Not used for XPU - phi::Array use_broadcast; // Not used for XPU - phi::Array configs; - phi::Array ins_data; - phi::Array<_ptr_ OutT *, NumOuts> outs_data; + int broadcast_num{0}; // Not used for XPU + bool all_elementwise{true}; // Not used for XPU + Array use_broadcast; // Not used for XPU + Array configs; + Array ins_data; + Array<_ptr_ OutT *, NumOuts> outs_data; BroadcastTypeClassifier() {} BroadcastTypeClassifier(const std::vector &ins, @@ -289,11 +289,11 @@ template __device__ void VectorizedBroadcastKernelImpl( - const phi::Array &ins, - phi::Array<_ptr_ OutT *, NumOuts> outs, - const phi::Array &use_broadcast, + const Array &ins, + Array<_ptr_ OutT *, NumOuts> outs, + const Array &use_broadcast, const uint32_t numel, - const phi::Array &configs, + const Array &configs, int num, int block_offset, int read_lens, @@ -349,11 +349,11 @@ template __global__ void VectorizedBroadcastKernel( - phi::Array ins, - phi::Array<_ptr_ OutT *, NumOuts> outs, - phi::Array use_broadcast, + Array ins, + Array<_ptr_ OutT *, NumOuts> outs, + Array use_broadcast, uint32_t numel, - phi::Array configs, + Array configs, int main_offset, int tail_tid, int read_lens, @@ -580,7 +580,7 @@ static void SliceTensor(DenseTensor *x, const DenseTensor *share, const std::vector &out_compute_dims, int64_t offset) { - auto new_dim = make_ddim(out_compute_dims); + auto new_dim = common::make_ddim(out_compute_dims); DenseTensorMeta meta(share->dtype(), new_dim, share->layout(), diff --git a/paddle/phi/kernels/funcs/common_shape.h b/paddle/phi/kernels/funcs/common_shape.h index 8249d5bf22efbc..dea6e9f6ab3e0b 100644 --- a/paddle/phi/kernels/funcs/common_shape.h +++ b/paddle/phi/kernels/funcs/common_shape.h @@ -28,7 +28,7 @@ inline void SetXShape(const DenseTensor &x, DenseTensor *xshape) { for (int i = 0; i < in_dims.size(); ++i) { xshape_dims[i + 1] = in_dims[i]; } - xshape->ResizeAndAllocate(phi::make_ddim(xshape_dims)); + xshape->ResizeAndAllocate(common::make_ddim(xshape_dims)); xshape->ResetLoD(x.meta().lod); } @@ -114,7 +114,7 @@ static DDim ExtendDims2Rank(const DDim &in_dims, int rank) { for (int i = in_dims.size() - 1, j = rank - 1; i >= 0; --i, --j) { shapes[j] = in_dims[i]; } - return make_ddim(shapes); + return common::make_ddim(shapes); } template @@ -181,8 +181,8 @@ static inline std::vector MatrixGetBroadcastBatchPortion( // batch_size of matrix static inline std::tuple, std::vector> MatrixGetBroadcastDims(const DenseTensor &x, const DenseTensor &y) { - std::vector x_dims_vec = phi::vectorize(x.dims()); - std::vector y_dims_vec = phi::vectorize(y.dims()); + std::vector x_dims_vec = common::vectorize(x.dims()); + std::vector y_dims_vec = common::vectorize(y.dims()); std::vector::const_iterator f1 = x_dims_vec.begin(); std::vector::const_iterator l1 = x_dims_vec.end() - 2; @@ -212,7 +212,7 @@ inline DDim GetOutputDims(const DDim &s_dims, const DDim &l_dims) { if (s_dims.size() > l_dims.size()) { return GetOutputDims(l_dims, s_dims); } - std::vector shapes = phi::vectorize(l_dims); + std::vector shapes = common::vectorize(l_dims); for (int i = s_dims.size() - 1, j = l_dims.size() - 1; i >= 0; --i, --j) { int64_t s = s_dims[i]; int64_t l = l_dims[j]; @@ -230,7 +230,7 @@ inline DDim GetOutputDims(const DDim &s_dims, const DDim &l_dims) { } } } - return phi::make_ddim(shapes); + return common::make_ddim(shapes); } inline int64_t CalStride(phi::DDim dim) { @@ -274,7 +274,7 @@ inline void FCOutputSize(const DDim &in_dims, std::vector &out_dims, // NOLINT int in_num_col_dims, bool padding_weights) { - auto in_mat_dims = phi::flatten_to_2d(in_dims, in_num_col_dims); + auto in_mat_dims = common::flatten_to_2d(in_dims, in_num_col_dims); auto w_dims0 = padding_weights ? w_dims[0] - 4 : w_dims[0]; auto w_dims1 = padding_weights ? w_dims[1] - 4 : w_dims[1]; PADDLE_ENFORCE_EQ( @@ -288,7 +288,7 @@ inline void FCOutputSize(const DDim &in_dims, in_mat_dims[1], in_mat_dims, w_dims0, - phi::make_ddim({w_dims0, w_dims1}))); + common::make_ddim({w_dims0, w_dims1}))); out_dims.reserve(static_cast(in_num_col_dims + 1)); for (int i = 0; i < in_num_col_dims; ++i) { diff --git a/paddle/phi/kernels/funcs/compound_functors.h b/paddle/phi/kernels/funcs/compound_functors.h index 121597bca68731..823dcd70a2f3c8 100644 --- a/paddle/phi/kernels/funcs/compound_functors.h +++ b/paddle/phi/kernels/funcs/compound_functors.h @@ -17,7 +17,7 @@ limitations under the License. */ #include #include #include -#include "paddle/phi/core/macros.h" +#include "paddle/common/macros.h" namespace phi { namespace funcs { diff --git a/paddle/phi/kernels/funcs/concat_funcs.h b/paddle/phi/kernels/funcs/concat_funcs.h index db965c2ef9b654..6a2dbf953b9b25 100644 --- a/paddle/phi/kernels/funcs/concat_funcs.h +++ b/paddle/phi/kernels/funcs/concat_funcs.h @@ -14,8 +14,8 @@ #pragma once +#include "paddle/common/errors.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" namespace phi { namespace funcs { diff --git a/paddle/phi/kernels/funcs/cufft_util.h b/paddle/phi/kernels/funcs/cufft_util.h index 3a4a3ef5e59149..52dfb8733f8a5e 100644 --- a/paddle/phi/kernels/funcs/cufft_util.h +++ b/paddle/phi/kernels/funcs/cufft_util.h @@ -15,8 +15,8 @@ #pragma once #include +#include "paddle/common/ddim.h" #include "paddle/phi/backends/dynload/cufft.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/enforce.h" #include "paddle/phi/kernels/funcs/fft.h" #include "paddle/phi/kernels/funcs/fft_key.h" diff --git a/paddle/phi/kernels/funcs/cumprod.h b/paddle/phi/kernels/funcs/cumprod.h index 4eefd4559c33a2..fad43f4acef72a 100644 --- a/paddle/phi/kernels/funcs/cumprod.h +++ b/paddle/phi/kernels/funcs/cumprod.h @@ -13,7 +13,7 @@ // limitations under the License. #pragma once -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" #include "paddle/phi/core/dense_tensor.h" namespace phi { diff --git a/paddle/phi/kernels/funcs/data_layout_transform.cc b/paddle/phi/kernels/funcs/data_layout_transform.cc index b949b7945a0469..a9779d8d78943d 100644 --- a/paddle/phi/kernels/funcs/data_layout_transform.cc +++ b/paddle/phi/kernels/funcs/data_layout_transform.cc @@ -16,10 +16,10 @@ #include "glog/logging.h" +#include "paddle/common/layout.h" #include "paddle/phi/backends/context_pool.h" #include "paddle/phi/backends/onednn/onednn_context.h" #include "paddle/phi/common/bfloat16.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/dense_tensor.h" @@ -57,7 +57,7 @@ void* GetDataFromTensor(const DenseTensor& tensor, // 0-D now. dnnl::memory::desc make_memory_desc(const phi::DenseTensor& ref_tensor, phi::DataLayout target_layout) { - auto ref_dims = vectorize(ref_tensor.dims()); + auto ref_dims = common::vectorize(ref_tensor.dims()); auto ref_type = ToOneDNNDataType(ref_tensor.dtype()); PADDLE_ENFORCE_NE(ref_type, OneDNNDataType::undef, @@ -84,7 +84,7 @@ void TransDataLayoutFromOneDNN(DataLayout in_layout, auto& pool = DeviceContextPool::Instance(); auto* dev_ctx = dynamic_cast(pool.Get(place)); auto& cpu_engine = dev_ctx->GetEngine(); - auto in_dims = vectorize(in.dims()); + auto in_dims = common::vectorize(in.dims()); auto md_dims = !in_dims.empty() ? in_dims : std::vector{1}; const auto src_mem_desc = @@ -102,7 +102,7 @@ void TransDataLayoutFromOneDNN(DataLayout in_layout, // Note(0x45f): Using initialized() to support slice Tensors // with shapes like [0, 0, 0]. if (in.initialized() && ((in.mem_desc() != out->mem_desc()) || always_copy)) { - auto in_tz = vectorize(in.dims()); + auto in_tz = common::vectorize(in.dims()); auto in_type = ToOneDNNDataType(in.dtype()); void* in_data = GetDataFromTensor(in, in_type); diff --git a/paddle/phi/kernels/funcs/data_layout_transform.h b/paddle/phi/kernels/funcs/data_layout_transform.h index 146e53d1c4be3b..4bcc96d9c2ab70 100644 --- a/paddle/phi/kernels/funcs/data_layout_transform.h +++ b/paddle/phi/kernels/funcs/data_layout_transform.h @@ -18,8 +18,8 @@ #include "dnnl.hpp" // NOLINT #endif +#include "paddle/common/layout.h" #include "paddle/phi/common/data_type.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/dense_tensor.h" @@ -44,7 +44,7 @@ inline OneDNNMemoryFormat ToOneDNNFormat(const DataLayout& layout) { default: PADDLE_THROW( errors::InvalidArgument("Fail to convert layout %s to oneDNN format.", - ::phi::DataLayoutToString(layout))); + ::common::DataLayoutToString(layout))); } } diff --git a/paddle/phi/kernels/funcs/detail/activation_functions.h b/paddle/phi/kernels/funcs/detail/activation_functions.h index f1352df226094b..758503563680be 100644 --- a/paddle/phi/kernels/funcs/detail/activation_functions.h +++ b/paddle/phi/kernels/funcs/detail/activation_functions.h @@ -18,9 +18,9 @@ limitations under the License. */ #include #include +#include "paddle/common/macros.h" #include "paddle/phi/backends/cpu/cpu_info.h" #include "paddle/phi/core/hostdevice.h" -#include "paddle/phi/core/macros.h" namespace phi { namespace funcs { namespace detail { diff --git a/paddle/phi/kernels/funcs/detail/strided_memcpy.h b/paddle/phi/kernels/funcs/detail/strided_memcpy.h index 0cd07fdfd0e1ae..03e3bdde05ad09 100644 --- a/paddle/phi/kernels/funcs/detail/strided_memcpy.h +++ b/paddle/phi/kernels/funcs/detail/strided_memcpy.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include "paddle/common/ddim.h" #include "paddle/phi/common/memory_utils.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/device_context.h" #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) diff --git a/paddle/phi/kernels/funcs/diag_functor.h b/paddle/phi/kernels/funcs/diag_functor.h index 1862f5ec91b4bc..6fe54363e6f0e2 100644 --- a/paddle/phi/kernels/funcs/diag_functor.h +++ b/paddle/phi/kernels/funcs/diag_functor.h @@ -112,7 +112,7 @@ DenseTensor BatchDiag(const Context& dev_ctx, const DenseTensor& x, int batch) { for (int i = 0; i < num_dims - 1; ++i) { out_shape.push_back(x.dims()[i]); } - out.Resize(phi::make_ddim(out_shape)); + out.Resize(common::make_ddim(out_shape)); int order = x.dims()[num_dims - 1]; int stride_out = order * order; int stride_in = order + 1; diff --git a/paddle/phi/kernels/funcs/diagonal.h b/paddle/phi/kernels/funcs/diagonal.h index a30fb79f8c8b04..5504a337e88f2e 100644 --- a/paddle/phi/kernels/funcs/diagonal.h +++ b/paddle/phi/kernels/funcs/diagonal.h @@ -70,7 +70,7 @@ DenseTensor Diagonal(const DeviceContext& context, int64_t dim2) { auto* input_data = input->data(); auto input_dims = input->dims(); - auto input_stride = phi::stride(input_dims); + auto input_stride = common::stride(input_dims); auto dim1_ = dim1 < 0 ? input_dims.size() + dim1 : dim1; auto dim2_ = dim2 < 0 ? input_dims.size() + dim2 : dim2; auto len1 = input_dims[std::min(dim1_, dim2_)]; @@ -89,8 +89,8 @@ DenseTensor Diagonal(const DeviceContext& context, int diag_size = len2 < len1 ? len2 : len1; if (diag_size > 0) { - auto ret_strides = vectorize(input_stride); - auto ret_dims = vectorize(input_dims); + auto ret_strides = common::vectorize(input_stride); + auto ret_dims = common::vectorize(input_dims); ret_strides.erase(ret_strides.begin() + std::max(dim1_, dim2_)); ret_strides.erase(ret_strides.begin() + std::min(dim1_, dim2_)); ret_dims.erase(ret_dims.begin() + std::max(dim1_, dim2_)); @@ -102,15 +102,15 @@ DenseTensor Diagonal(const DeviceContext& context, ret_strides.push_back(stride1 + stride2); ret_dims.push_back(diag_size); DenseTensor diag; - DDim diag_dims = phi::make_ddim(ret_dims); - auto dig_stride = phi::stride(diag_dims); + DDim diag_dims = common::make_ddim(ret_dims); + auto dig_stride = common::stride(diag_dims); diag.Resize(diag_dims); auto diag_data = context.template Alloc(&diag); int64_t pos = std::abs(offset) * offset_stride; int64_t dim_size = ret_strides.size(); #if defined(__NVCC__) || defined(__HIPCC__) - thrust::device_vector diag_vec(vectorize(dig_stride)); + thrust::device_vector diag_vec(common::vectorize(dig_stride)); const int64_t* diag_arr = thrust::raw_pointer_cast(diag_vec.data()); thrust::device_vector ret_vec(ret_strides); const int64_t* ret_arr = thrust::raw_pointer_cast(ret_vec.data()); diff --git a/paddle/phi/kernels/funcs/dims_simplifier.h b/paddle/phi/kernels/funcs/dims_simplifier.h index 35621ed0abddb3..036cd2cc812c9b 100644 --- a/paddle/phi/kernels/funcs/dims_simplifier.h +++ b/paddle/phi/kernels/funcs/dims_simplifier.h @@ -14,7 +14,7 @@ limitations under the License. */ #pragma once -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" #include "paddle/phi/core/dense_tensor.h" #include "glog/logging.h" @@ -39,15 +39,15 @@ struct BroadcastDimsSimplifier { N = std::max(static_cast(ins.size()), 2); in_dims.resize(N); rank = dims.size(); - out_dims = phi::vectorize(dims); + out_dims = common::vectorize(dims); if (ins.size() == 1) { // When ins.size() = 1, broadcast input to output. - in_dims[0] = phi::vectorize(ins[0]->dims()); + in_dims[0] = common::vectorize(ins[0]->dims()); // Add out_dims to in_dims to avoid errors in dims merging. in_dims[1] = out_dims; } else { for (int j = 0; j < N; ++j) { - in_dims[j] = phi::vectorize(ins[j]->dims()); + in_dims[j] = common::vectorize(ins[j]->dims()); } } ExtendInputDimensions(axis); @@ -122,8 +122,8 @@ struct BroadcastDimsSimplifier { out_idx, out_dims[out_idx], in_dim[in_idx], - phi::make_ddim(in_dim), - phi::make_ddim(out_dims))); + common::make_ddim(in_dim), + common::make_ddim(out_dims))); } } in_dim.resize(rank); diff --git a/paddle/phi/kernels/funcs/dropout_impl.cu.h b/paddle/phi/kernels/funcs/dropout_impl.cu.h index 14696b45c78db1..3b0df468847138 100644 --- a/paddle/phi/kernels/funcs/dropout_impl.cu.h +++ b/paddle/phi/kernels/funcs/dropout_impl.cu.h @@ -280,7 +280,7 @@ void DropoutFwGPUKernelDriver( if (!is_test && mask) { auto* mask_data = mask->data(); - size_t size = phi::product(mask->dims()); + size_t size = common::product(mask->dims()); if (dropout_prob == 1.0f) { #ifdef PADDLE_WITH_HIP diff --git a/paddle/phi/kernels/funcs/elementwise_base.h b/paddle/phi/kernels/funcs/elementwise_base.h index 8ddb3f406ddfe3..c92acdaf4180be 100644 --- a/paddle/phi/kernels/funcs/elementwise_base.h +++ b/paddle/phi/kernels/funcs/elementwise_base.h @@ -38,8 +38,7 @@ namespace phi { /* Packing scalar type T(float, int etc.) into Array type for supporting multiple-output feature in elementwise system.*/ template -using ConditionalT = - typename std::conditional_t>; +using ConditionalT = typename std::conditional_t>; namespace funcs { using DDim = phi::DDim; @@ -624,7 +623,7 @@ struct SameDimsElementwisePrimitiveCaller { template struct ElementwiseWriteDataCallerBc { __device__ __forceinline__ void operator()( - phi::Array<_ptr_ OutT *, NumOuts> outs, + Array<_ptr_ OutT *, NumOuts> outs, ConditionalT src[VecSize], kps::IndexType block_offset, int num, @@ -647,7 +646,7 @@ struct ElementwiseWriteDataCallerBc { template struct ElementwiseWriteDataCallerBc { - __device__ __forceinline__ void operator()(phi::Array<_ptr_ OutT *, 1> outs, + __device__ __forceinline__ void operator()(Array<_ptr_ OutT *, 1> outs, OutT src[VecSize], kps::IndexType block_offset, int num, @@ -664,8 +663,8 @@ template __device__ void VectorizedElementwiseKernelImpl( - const phi::Array &in, - phi::Array<_ptr_ OutT *, NumOuts> outs, + const Array &in, + Array<_ptr_ OutT *, NumOuts> outs, kps::IndexType offset, int num, int read_lens, @@ -690,8 +689,8 @@ __device__ void VectorizedElementwiseKernelImpl( template __global__ void VectorizedElementwiseKernel( - phi::Array ins, - phi::Array<_ptr_ OutT *, NumOuts> outs, + Array ins, + Array<_ptr_ OutT *, NumOuts> outs, kps::IndexType numel, kps::IndexType main_offset, int read_lens, @@ -731,8 +730,8 @@ void LaunchElementwiseKernel(const KPDevice &ctx, // For large tensor numel * sizeof(T) > 2^31, we must use int64_t as index // type. int64_t numel = (*outs)[0]->numel(); - phi::Array ins_data; - phi::Array<_ptr_ OutT *, NumOuts> outs_data; + Array ins_data; + Array<_ptr_ OutT *, NumOuts> outs_data; using Traits = phi::funcs::FunctionTraits; using ArgsT = typename Traits::ArgsTuple; diff --git a/paddle/phi/kernels/funcs/elementwise_functor.h b/paddle/phi/kernels/funcs/elementwise_functor.h index 4cc12cf641ad9c..eaf527fbba9f6b 100644 --- a/paddle/phi/kernels/funcs/elementwise_functor.h +++ b/paddle/phi/kernels/funcs/elementwise_functor.h @@ -14,12 +14,12 @@ limitations under the License. */ #pragma once +#include "paddle/common/macros.h" #include "paddle/phi/common/bfloat16.h" #include "paddle/phi/common/complex.h" #include "paddle/phi/common/float16.h" #include "paddle/phi/core/enforce.h" #include "paddle/phi/core/hostdevice.h" -#include "paddle/phi/core/macros.h" #if defined(__xpu__) #include diff --git a/paddle/phi/kernels/funcs/elementwise_grad_base.h b/paddle/phi/kernels/funcs/elementwise_grad_base.h index 5ff70c86d5fe8b..8e5e45b861a3ae 100644 --- a/paddle/phi/kernels/funcs/elementwise_grad_base.h +++ b/paddle/phi/kernels/funcs/elementwise_grad_base.h @@ -244,8 +244,8 @@ void CommonElementwiseBroadcastBackward(const CPUContext &ctx, } VLOG(3) << "CommonElementwiseBroadcastBackward xdims:" - << phi::make_ddim(x_dims_array) - << " ydim:" << phi::make_ddim(y_dims_array); + << common::make_ddim(x_dims_array) + << " ydim:" << common::make_ddim(y_dims_array); CommonGradBroadcastCPU(x, y, @@ -393,7 +393,7 @@ void ElemwiseGradComputeNoBroadcast(const DeviceContext &dev_ctx, DenseTensor *dy, DX_OP dx_op, DY_OP dy_op) { - size_t N = static_cast(phi::product(x_dim)); + size_t N = static_cast(common::product(x_dim)); phi::funcs::ForRange for_range(dev_ctx, N); for_range(ElemwiseGradNoBroadcast{ x.data(), @@ -1677,8 +1677,8 @@ void CommonElementwiseBroadcastBackward(const GPUContext &ctx, } VLOG(3) << "CommonElementwiseBroadcastBackward xdims:" - << phi::make_ddim(x_dims_array) - << " ydim:" << phi::make_ddim(y_dims_array); + << common::make_ddim(x_dims_array) + << " ydim:" << common::make_ddim(y_dims_array); CommonGradBroadcastCUDA(x, y, diff --git a/paddle/phi/kernels/funcs/elementwise_utils.h b/paddle/phi/kernels/funcs/elementwise_utils.h index 3790044346dc42..b94a8fbd53a6d6 100644 --- a/paddle/phi/kernels/funcs/elementwise_utils.h +++ b/paddle/phi/kernels/funcs/elementwise_utils.h @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include "paddle/common/ddim.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/enforce.h" @@ -75,6 +76,7 @@ inline void GetMidDims(const DDim &x_dims, inline DDim TrimTrailingSingularDims(const DDim &dims) { // Remove trailing dimensions of size 1 for y + using common::make_dim; auto actual_dims_size = dims.size(); for (; actual_dims_size != 0; --actual_dims_size) { if (dims[actual_dims_size - 1] != 1) break; @@ -86,9 +88,9 @@ inline DDim TrimTrailingSingularDims(const DDim &dims) { trim_dims[i] = dims[i]; } if (trim_dims.size() == 0) { - return DDim(phi::make_dim()); + return DDim(make_dim()); } - DDim actual_dims = phi::make_ddim(trim_dims); + DDim actual_dims = common::make_ddim(trim_dims); return actual_dims; } diff --git a/paddle/phi/kernels/funcs/fc_functor.cu b/paddle/phi/kernels/funcs/fc_functor.cu index 716d5c3979459b..84a8cc309516bb 100644 --- a/paddle/phi/kernels/funcs/fc_functor.cu +++ b/paddle/phi/kernels/funcs/fc_functor.cu @@ -397,8 +397,8 @@ void FCInt8Functor::operator()( const int8_t* W = w_tensor->data(); DenseTensor quant_x_tensor, quant_y_tensor; - quant_x_tensor.Resize(phi::make_ddim({M, K})); - quant_y_tensor.Resize(phi::make_ddim({M, N})); + quant_x_tensor.Resize(common::make_ddim({M, K})); + quant_y_tensor.Resize(common::make_ddim({M, N})); context.template Alloc(&quant_x_tensor, quant_x_tensor.numel() * sizeof(int8_t)); context.template Alloc(&quant_y_tensor, @@ -417,7 +417,7 @@ void FCInt8Functor::operator()( context, quant_x_tensor, *w_tensor, false, false, &quant_y_tensor); DenseTensor scale_weights_dev; - scale_weights_dev.Resize(phi::make_ddim({N})); + scale_weights_dev.Resize(common::make_ddim({N})); context.template Alloc(&scale_weights_dev, scale_weights_dev.numel() * sizeof(float)); float* scale_weights_dev_ptr = scale_weights_dev.data(); diff --git a/paddle/phi/kernels/funcs/fft.cc b/paddle/phi/kernels/funcs/fft.cc index 31ea37f5b5037e..97502787b69381 100644 --- a/paddle/phi/kernels/funcs/fft.cc +++ b/paddle/phi/kernels/funcs/fft.cc @@ -75,7 +75,7 @@ void exec_fft(const phi::CPUContext& ctx, for (int i = 0; i < signal_ndim; i++) { collapsed_input_shape_.push_back(in_sizes[axes[i]]); } - phi::DDim collapsed_input_shape = phi::make_ddim(collapsed_input_shape_); + phi::DDim collapsed_input_shape = common::make_ddim(collapsed_input_shape_); transposed_input.Resize(collapsed_input_shape); DenseTensor& collapsed_input = transposed_input; @@ -87,7 +87,7 @@ void exec_fft(const phi::CPUContext& ctx, for (int i = 0; i < signal_ndim; i++) { collapsed_output_shape_.push_back(out_sizes[axes[i]]); } - phi::DDim collapsed_output_shape = phi::make_ddim(collapsed_output_shape_); + phi::DDim collapsed_output_shape = common::make_ddim(collapsed_output_shape_); DenseTensor collapsed_output; collapsed_output.Resize(collapsed_output_shape); ctx.Alloc(&collapsed_output); @@ -99,8 +99,8 @@ void exec_fft(const phi::CPUContext& ctx, signal_sizes[1 + i] = std::max(collapsed_input_shape[1 + i], collapsed_output_shape[1 + i]); } - const phi::DDim input_stride = phi::stride(collapsed_input_shape); - const phi::DDim output_stride = phi::stride(collapsed_output_shape); + const phi::DDim input_stride = common::stride(collapsed_input_shape); + const phi::DDim output_stride = common::stride(collapsed_output_shape); DftiDescriptor desc = plan_mkl_fft(x.dtype(), out->dtype(), @@ -220,9 +220,9 @@ struct FFTC2CFunctor { using C = std::complex; const auto& input_dim = x.dims(); - const std::vector in_sizes = phi::vectorize(input_dim); + const std::vector in_sizes = common::vectorize(input_dim); std::vector in_strides = - phi::vectorize(phi::stride(input_dim)); + common::vectorize(common::stride(input_dim)); const int64_t data_size = sizeof(C); std::transform(in_strides.begin(), in_strides.end(), @@ -263,9 +263,9 @@ struct FFTR2CFunctor { using C = std::complex; const auto& input_dim = x.dims(); - const std::vector in_sizes = phi::vectorize(input_dim); + const std::vector in_sizes = common::vectorize(input_dim); std::vector in_strides = - phi::vectorize(phi::stride(input_dim)); + common::vectorize(common::stride(input_dim)); { const int64_t data_size = sizeof(R); std::transform(in_strides.begin(), @@ -275,9 +275,9 @@ struct FFTR2CFunctor { } const auto& output_dim = out->dims(); - const std::vector out_sizes = phi::vectorize(output_dim); + const std::vector out_sizes = common::vectorize(output_dim); std::vector out_strides = - phi::vectorize(phi::stride(output_dim)); + common::vectorize(common::stride(output_dim)); { const int64_t data_size = sizeof(C); std::transform(out_strides.begin(), @@ -320,9 +320,9 @@ struct FFTC2RFunctor { using C = std::complex; const auto& input_dim = x.dims(); - const std::vector in_sizes = phi::vectorize(input_dim); + const std::vector in_sizes = common::vectorize(input_dim); std::vector in_strides = - phi::vectorize(phi::stride(input_dim)); + common::vectorize(common::stride(input_dim)); { const int64_t data_size = sizeof(C); std::transform(in_strides.begin(), @@ -332,9 +332,9 @@ struct FFTC2RFunctor { } const auto& output_dim = out->dims(); - const std::vector out_sizes = phi::vectorize(output_dim); + const std::vector out_sizes = common::vectorize(output_dim); std::vector out_strides = - phi::vectorize(phi::stride(output_dim)); + common::vectorize(common::stride(output_dim)); { const int64_t data_size = sizeof(R); std::transform(out_strides.begin(), diff --git a/paddle/phi/kernels/funcs/fft.cu b/paddle/phi/kernels/funcs/fft.cu index e13a79b335ac0e..c70f615e80fa4d 100644 --- a/paddle/phi/kernels/funcs/fft.cu +++ b/paddle/phi/kernels/funcs/fft.cu @@ -17,8 +17,8 @@ #include "paddle/phi/kernels/funcs/fft.h" #include "paddle/phi/kernels/funcs/fft_cache.h" +#include "paddle/common/ddim.h" #include "paddle/phi/backends/gpu/gpu_context.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/kernels/assign_kernel.h" #include "paddle/phi/kernels/complex_kernel.h" #include "paddle/phi/kernels/empty_kernel.h" @@ -150,7 +150,7 @@ void exec_fft(const phi::GPUContext& ctx, for (int i = 0; i < signal_ndim; i++) { collapsed_input_shape_.push_back(in_sizes[axes[i]]); } - phi::DDim collapsed_input_shape = phi::make_ddim(collapsed_input_shape_); + phi::DDim collapsed_input_shape = common::make_ddim(collapsed_input_shape_); transposed_input.Resize(collapsed_input_shape); DenseTensor& collapsed_input = transposed_input; @@ -162,7 +162,7 @@ void exec_fft(const phi::GPUContext& ctx, for (int i = 0; i < signal_ndim; i++) { collapsed_output_shape_.push_back(out_sizes[axes[i]]); } - phi::DDim collapsed_output_shape = phi::make_ddim(collapsed_output_shape_); + phi::DDim collapsed_output_shape = common::make_ddim(collapsed_output_shape_); DenseTensor collapsed_output; collapsed_output.Resize(collapsed_output_shape); ctx.Alloc(&collapsed_output); @@ -267,7 +267,7 @@ struct FFTC2CFunctor { } } - std::vector out_dims = phi::vectorize(x.dims()); + std::vector out_dims = common::vectorize(x.dims()); detail::exec_normalization( ctx, *out, out, normalization, out_dims, axes); } @@ -281,7 +281,7 @@ struct FFTC2RFunctor { const std::vector& axes, FFTNormMode normalization, bool forward) { - std::vector out_dims = phi::vectorize(out->dims()); + std::vector out_dims = common::vectorize(out->dims()); if (detail::use_optimized_fft_path(axes)) { DenseTensor x_copy = Assign(ctx, x); @@ -325,7 +325,7 @@ struct FFTR2CFunctor { forward); } - const auto in_dims = phi::vectorize(x.dims()); + const auto in_dims = common::vectorize(x.dims()); detail::exec_normalization( ctx, *out, out, normalization, in_dims, axes); } diff --git a/paddle/phi/kernels/funcs/fft_fill_conj.h b/paddle/phi/kernels/funcs/fft_fill_conj.h index 91d859020f88b9..ab6d351986ecc2 100644 --- a/paddle/phi/kernels/funcs/fft_fill_conj.h +++ b/paddle/phi/kernels/funcs/fft_fill_conj.h @@ -142,10 +142,10 @@ void FFTFillConj(const DeviceContext& ctx, DenseTensor* dst, const std::vector& axes) { std::vector src_strides_v = - phi::vectorize(phi::stride(src->dims())); + common::vectorize(common::stride(src->dims())); std::vector dst_strides_v = - phi::vectorize(phi::stride(dst->dims())); - std::vector dst_shape_v = phi::vectorize(dst->dims()); + common::vectorize(common::stride(dst->dims())); + std::vector dst_shape_v = common::vectorize(dst->dims()); const auto src_data = src->data(); auto dst_data = dst->data(); const auto last_axis = axes.back(); diff --git a/paddle/phi/kernels/funcs/fft_key.h b/paddle/phi/kernels/funcs/fft_key.h index 5893cfc6ba019f..8a577754cf051e 100644 --- a/paddle/phi/kernels/funcs/fft_key.h +++ b/paddle/phi/kernels/funcs/fft_key.h @@ -102,8 +102,8 @@ static FFTConfigKey create_fft_configkey(const DenseTensor& input, auto out_size = output.dims()[i]; signal_size[i] = std::max(in_size, out_size); } - FFTConfigKey key(phi::vectorize(input.dims()), - phi::vectorize(output.dims()), + FFTConfigKey key(common::vectorize(input.dims()), + common::vectorize(output.dims()), signal_size, fft_type, value_type); diff --git a/paddle/phi/kernels/funcs/for_range.h b/paddle/phi/kernels/funcs/for_range.h index 484fbd21dc7709..7b6f672f47f1b5 100644 --- a/paddle/phi/kernels/funcs/for_range.h +++ b/paddle/phi/kernels/funcs/for_range.h @@ -13,9 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include "paddle/common/macros.h" #include "paddle/phi/backends/all_context.h" #include "paddle/phi/backends/gpu/gpu_launch_config.h" -#include "paddle/phi/core/macros.h" namespace phi { namespace funcs { diff --git a/paddle/phi/kernels/funcs/fused_gemm_epilogue.h b/paddle/phi/kernels/funcs/fused_gemm_epilogue.h index eb5f0fa540f8d3..c07ff2e48864f1 100644 --- a/paddle/phi/kernels/funcs/fused_gemm_epilogue.h +++ b/paddle/phi/kernels/funcs/fused_gemm_epilogue.h @@ -394,11 +394,11 @@ void ComputeFusedGemmEpilogueForward(const phi::GPUContext& dev_ctx, if (activation == "relu") { phi::DataType rs_type = phi::DataType::BOOL; size_t reserve_space_size = - phi::product(reserve_space->dims()) * SizeOf(rs_type); + common::product(reserve_space->dims()) * SizeOf(rs_type); dev_ctx.Alloc(reserve_space, rs_type, reserve_space_size); } else { size_t reserve_space_size = - phi::product(reserve_space->dims()) * sizeof(T); + common::product(reserve_space->dims()) * sizeof(T); dev_ctx.Alloc(reserve_space, reserve_space_size); } diff --git a/paddle/phi/kernels/funcs/gather.cu.h b/paddle/phi/kernels/funcs/gather.cu.h index 2acb49446d7bbb..3d4a5256f5fa97 100644 --- a/paddle/phi/kernels/funcs/gather.cu.h +++ b/paddle/phi/kernels/funcs/gather.cu.h @@ -135,8 +135,8 @@ void GPUGatherNd(const phi::GPUContext& ctx, // final dim int64_t end_size = index_dims[index_dims_size - 1]; // remain dim - auto remain_ddim = phi::slice_ddim(index_dims, 0, index_dims_size - 1); - int64_t remain_numel = phi::product(remain_ddim); + auto remain_ddim = common::slice_ddim(index_dims, 0, index_dims_size - 1); + int64_t remain_numel = common::product(remain_ddim); // slice size int64_t slice_size = 1; for (int64_t i = end_size; i < input_dims_size; ++i) { @@ -250,7 +250,7 @@ void GatherV2CUDAFunction(const DenseTensor* input, outer_dim_size *= input_dim[i]; out_dim_vec.push_back(input_dim[i]); } - auto out_dim = phi::make_ddim(out_dim_vec); + auto out_dim = common::make_ddim(out_dim_vec); out->Resize(out_dim); auto* out_data = ctx.Alloc(out); diff --git a/paddle/phi/kernels/funcs/gather.h b/paddle/phi/kernels/funcs/gather.h index 50f7f4fa0322cb..519bc9fb962c9c 100644 --- a/paddle/phi/kernels/funcs/gather.h +++ b/paddle/phi/kernels/funcs/gather.h @@ -18,10 +18,10 @@ limitations under the License. */ #include #include +#include "paddle/common/ddim.h" +#include "paddle/common/macros.h" #include "paddle/phi/common/place.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/macros.h" #include "paddle/phi/kernels/funcs/math_function.h" namespace phi { namespace funcs { @@ -111,8 +111,8 @@ void CPUGatherNd(const phi::CPUContext& ctx UNUSED, // final dim int64_t end_size = index_dims[index_dims_size - 1]; // remain dim - auto remain_ddim = phi::slice_ddim(index_dims, 0, index_dims_size - 1); - int64_t remain_numel = phi::product(remain_ddim); + auto remain_ddim = common::slice_ddim(index_dims, 0, index_dims_size - 1); + int64_t remain_numel = common::product(remain_ddim); // slice size int64_t slice_size = 1; for (int64_t i = end_size; i < input_dims_size; ++i) { @@ -195,7 +195,7 @@ void GatherV2Function(const phi::CPUContext& ctx, outer_dim_size *= input_dim[i]; out_dim_vec.push_back(input_dim[i]); } - auto out_dim = phi::make_ddim(out_dim_vec); + auto out_dim = common::make_ddim(out_dim_vec); out->Resize(out_dim); auto* out_data = ctx.Alloc(out); diff --git a/paddle/phi/kernels/funcs/gather_scatter_functor.cc b/paddle/phi/kernels/funcs/gather_scatter_functor.cc index be07c68b0fd338..7be86351c47ff6 100644 --- a/paddle/phi/kernels/funcs/gather_scatter_functor.cc +++ b/paddle/phi/kernels/funcs/gather_scatter_functor.cc @@ -16,7 +16,7 @@ limitations under the License. */ #include "glog/logging.h" -#include "paddle/phi/core/macros.h" +#include "paddle/common/macros.h" namespace phi { namespace funcs { diff --git a/paddle/phi/kernels/funcs/im2col.h b/paddle/phi/kernels/funcs/im2col.h index 73b2866924d1e9..c6b8d22b8c60c4 100644 --- a/paddle/phi/kernels/funcs/im2col.h +++ b/paddle/phi/kernels/funcs/im2col.h @@ -16,10 +16,10 @@ limitations under the License. */ #include -#include "paddle/phi/common/layout.h" +#include "paddle/common/errors.h" +#include "paddle/common/layout.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" namespace phi { namespace funcs { diff --git a/paddle/phi/kernels/funcs/index_calculator.h b/paddle/phi/kernels/funcs/index_calculator.h index 13697e443e16de..4e306cb87a480d 100644 --- a/paddle/phi/kernels/funcs/index_calculator.h +++ b/paddle/phi/kernels/funcs/index_calculator.h @@ -34,8 +34,7 @@ constexpr int kMaxRank = phi::DDim::kMaxRank; namespace details { // Convert dims from vector to array template -static inline phi::Array VectorToArray( - const VectorLikeType& vec) { +static inline Array VectorToArray(const VectorLikeType& vec) { PADDLE_ENFORCE_LE( vec.size(), ElementCount, @@ -44,7 +43,7 @@ static inline phi::Array VectorToArray( vec.size(), ElementCount)); size_t n = static_cast(vec.size()); - phi::Array ret; + Array ret; for (size_t i = 0; i < n; ++i) { ret[i] = vec[i]; } @@ -99,11 +98,11 @@ struct IndexCalculator { } int dim; - phi::Array dims; - phi::Array strides; - phi::Array reduce_strides; + Array dims; + Array strides; + Array reduce_strides; #ifndef PADDLE_WITH_XPU_KP - phi::Array divmoders; + Array divmoders; #endif }; diff --git a/paddle/phi/kernels/funcs/index_put_utils.h b/paddle/phi/kernels/funcs/index_put_utils.h index 09da00d7cca147..e6b70e8eb7305f 100644 --- a/paddle/phi/kernels/funcs/index_put_utils.h +++ b/paddle/phi/kernels/funcs/index_put_utils.h @@ -15,12 +15,12 @@ #pragma once #include +#include "paddle/common/array.h" #include "paddle/phi/backends/context_pool.h" #include "paddle/phi/common/int_array.h" #include "paddle/phi/common/memory_utils.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/utils/array.h" #include "paddle/phi/kernels/cast_kernel.h" #include "paddle/phi/kernels/expand_kernel.h" #include "paddle/phi/kernels/nonzero_kernel.h" @@ -46,7 +46,7 @@ phi::DenseTensor GetReshapeAndExpandTensor(const Context& dev_ctx, const phi::DDim& res_dim, const phi::DDim& bd_dim, int index) { - std::vector before_dims = phi::vectorize(tensor.dims()); + std::vector before_dims = common::vectorize(tensor.dims()); std::vector mid_dims(res_dim.size(), 1); if (index == 0) { @@ -58,13 +58,13 @@ phi::DenseTensor GetReshapeAndExpandTensor(const Context& dev_ctx, } phi::DenseTensor mid_tensor(tensor.dtype()); - mid_tensor.Resize(phi::make_ddim(mid_dims)); + mid_tensor.Resize(common::make_ddim(mid_dims)); ReshapeInferKernel(dev_ctx, tensor, IntArray(mid_dims), &mid_tensor); phi::DenseTensor res_tensor(tensor.dtype()); res_tensor.Resize(res_dim); ExpandKernel( - dev_ctx, mid_tensor, IntArray(phi::vectorize(res_dim)), &res_tensor); + dev_ctx, mid_tensor, IntArray(common::vectorize(res_dim)), &res_tensor); return res_tensor; } @@ -86,7 +86,7 @@ std::vector DealWithBoolIndices( phi::errors::InvalidArgument("the only bool tensor in indices should " "have number of dimension at least 1")); phi::DenseTensor nonzero_indices(phi::DataType::INT64); - nonzero_indices.Resize(phi::make_ddim({-1, rank})); + nonzero_indices.Resize(common::make_ddim({-1, rank})); NonZeroKernel(dev_ctx, *indices_v[i], &nonzero_indices); if (nonzero_indices.numel() == 0) { @@ -99,7 +99,7 @@ std::vector DealWithBoolIndices( for (int i = 0; i < rank; ++i) { tmp_indices_v->emplace_back( DenseTensor(phi::DataType::INT64) - .Resize(phi::make_ddim({nonzero_indices.dims()[0]}))); + .Resize(common::make_ddim({nonzero_indices.dims()[0]}))); } for (int i = 0; i < rank; ++i) { integer_indices[i] = &((*tmp_indices_v)[i + tmp_ix]); @@ -173,7 +173,7 @@ static phi::DDim BroadCastTensorsDims( } target_dims[target_rank - index - 1] = target_dim_size; } - return phi::make_ddim(target_dims); + return common::make_ddim(target_dims); } template @@ -207,7 +207,7 @@ void DealWithIndices(const Context& dev_ctx, std::vector* res_dim_v) { size_t total_dims = x.dims().size(); if (int_indices_v.size() < total_dims) { - std::vector tmp_x_dims = phi::vectorize(x.dims()); + std::vector tmp_x_dims = common::vectorize(x.dims()); int len_bd_dim = bd_dim.size(); res_dim_v->insert(res_dim_v->end(), tmp_x_dims.begin() + int_indices_v.size(), @@ -225,7 +225,7 @@ void DealWithIndices(const Context& dev_ctx, reshaped_indices_v.insert( reshaped_indices_v.end(), range_tensor_v.begin(), range_tensor_v.end()); - phi::DDim res_dim = phi::make_ddim(*res_dim_v); + phi::DDim res_dim = common::make_ddim(*res_dim_v); for (size_t i = 0; i < reshaped_indices_v.size(); ++i) { tmp_res_indices_v->emplace_back( @@ -261,7 +261,7 @@ void DealWithIndices(const Context& dev_ctx, ExpandKernel( dev_ctx, int_indices_v_tmp[i], - IntArray(phi::vectorize(bd_dim)), + IntArray(common::vectorize(bd_dim)), &(*tmp_res_indices_v)[i]); } else { tmp_res_indices_v->emplace_back(int_indices_v_tmp[i]); @@ -323,7 +323,7 @@ phi::DenseTensor GetRangeCudaTensor(const Context& dev_ctx, int64_t N, phi::DataType dtype) { phi::DenseTensor res(dtype); - res.Resize(phi::make_ddim({N})); + res.Resize(common::make_ddim({N})); DenseTensor* p_res = &res; T* out = dev_ctx.template Alloc(p_res); auto config = phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, N); @@ -346,7 +346,7 @@ phi::DenseTensor GetRangeTensor(const Context& dev_ctx, int64_t N, phi::DataType dtype) { phi::DenseTensor res(dtype); - res.Resize(phi::make_ddim({N})); + res.Resize(common::make_ddim({N})); DenseTensor* p_res = &res; T* out = dev_ctx.template Alloc(p_res); range_kernel(N, out); diff --git a/paddle/phi/kernels/funcs/interpolate_function.h b/paddle/phi/kernels/funcs/interpolate_function.h index 23731285926da4..bbfc54e5e2dc03 100644 --- a/paddle/phi/kernels/funcs/interpolate_function.h +++ b/paddle/phi/kernels/funcs/interpolate_function.h @@ -14,9 +14,9 @@ #pragma once +#include "paddle/common/ddim.h" +#include "paddle/common/layout.h" #include "paddle/phi/backends/context_pool.h" -#include "paddle/phi/common/layout.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #if defined(__NVCC__) || defined(__HIPCC__) @@ -87,8 +87,8 @@ inline std::vector get_new_shape( for (size_t i = 0; i < list_new_shape_tensor.size(); ++i) { auto tensor = list_new_shape_tensor[i]; phi::DeviceContext* dev_ctx = pool.Get(tensor->place()); - PADDLE_ENFORCE_EQ(tensor->dims() == phi::make_ddim({1}) || - tensor->dims() == phi::make_ddim({}), + PADDLE_ENFORCE_EQ(tensor->dims() == common::make_ddim({1}) || + tensor->dims() == common::make_ddim({}), true, errors::InvalidArgument( "The shape of dimension tensor should be [1] or []," diff --git a/paddle/phi/kernels/funcs/jit/CMakeLists.txt b/paddle/phi/kernels/funcs/jit/CMakeLists.txt index fd44ca308107cf..248bdf1c215c32 100644 --- a/paddle/phi/kernels/funcs/jit/CMakeLists.txt +++ b/paddle/phi/kernels/funcs/jit/CMakeLists.txt @@ -29,7 +29,7 @@ endif() cc_test( jit_kernel_test SRCS test.cc - DEPS phi) + DEPS phi common) if(NOT WIN32) set(cuda_less12_and_gcc_greater12 false) @@ -40,7 +40,7 @@ if(NOT WIN32) endif() endif() if(NOT cuda_less12_and_gcc_greater12) - cc_binary(jit_kernel_benchmark SRCS benchmark.cc DEPS phi) + cc_binary(jit_kernel_benchmark SRCS benchmark.cc DEPS phi common) endif() endif() if(WITH_TESTING AND TEST jit_kernel_test) diff --git a/paddle/phi/kernels/funcs/jit/kernel_base.h b/paddle/phi/kernels/funcs/jit/kernel_base.h index 78bedf184975cd..b8a638b48fc8df 100644 --- a/paddle/phi/kernels/funcs/jit/kernel_base.h +++ b/paddle/phi/kernels/funcs/jit/kernel_base.h @@ -15,7 +15,7 @@ #pragma once #include -#include "paddle/phi/core/macros.h" +#include "paddle/common/macros.h" #include "paddle/phi/kernels/funcs/jit/macro.h" namespace phi { diff --git a/paddle/phi/kernels/funcs/jit/registry.h b/paddle/phi/kernels/funcs/jit/registry.h index e9b371312548f6..26849a66097058 100644 --- a/paddle/phi/kernels/funcs/jit/registry.h +++ b/paddle/phi/kernels/funcs/jit/registry.h @@ -19,8 +19,8 @@ #include #include // for std::move +#include "paddle/common/macros.h" #include "paddle/phi/common/place.h" -#include "paddle/phi/core/macros.h" #include "paddle/phi/kernels/funcs/jit/kernel_base.h" #include "paddle/phi/kernels/funcs/jit/kernel_pool.h" diff --git a/paddle/phi/kernels/funcs/layer_norm_impl.cu.h b/paddle/phi/kernels/funcs/layer_norm_impl.cu.h index 1a52e57e45f236..6a82875819161b 100644 --- a/paddle/phi/kernels/funcs/layer_norm_impl.cu.h +++ b/paddle/phi/kernels/funcs/layer_norm_impl.cu.h @@ -26,11 +26,11 @@ namespace cub = hipcub; #include "glog/logging.h" +#include "paddle/common/ddim.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_device_function.h" #include "paddle/phi/backends/gpu/gpu_dnn.h" #include "paddle/phi/common/memory_utils.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/kernels/funcs/aligned_vector.h" namespace phi { diff --git a/paddle/phi/kernels/funcs/math_function.cc b/paddle/phi/kernels/funcs/math_function.cc index bfb2dc6d78e17f..8642d9d9e602e3 100644 --- a/paddle/phi/kernels/funcs/math_function.cc +++ b/paddle/phi/kernels/funcs/math_function.cc @@ -103,8 +103,8 @@ void TransposeNormal::operator()( phi::DenseTensor* out, const std::vector& axis) { const int rank = static_cast(axis.size()); - auto in_stride = phi::stride(in.dims()); - auto out_stride = phi::stride(out->dims()); + auto in_stride = common::stride(in.dims()); + auto out_stride = common::stride(out->dims()); const T* in_ptr = in.data(); T* out_ptr = out->data(); @@ -191,7 +191,7 @@ void set_constant_with_place( phi::DenseTensor*); auto* kernel_fn = kernel.GetVariadicKernelFn(); (*kernel_fn)(context, - phi::IntArray(phi::vectorize(tensor->dims())), + phi::IntArray(common::vectorize(tensor->dims())), phi::Scalar(value), tensor->dtype(), tensor); diff --git a/paddle/phi/kernels/funcs/math_function.cu b/paddle/phi/kernels/funcs/math_function.cu index bdd97616e0a660..16d4aed2f10618 100644 --- a/paddle/phi/kernels/funcs/math_function.cu +++ b/paddle/phi/kernels/funcs/math_function.cu @@ -220,8 +220,8 @@ void TransposeNormal::operator()( phi::DenseTensor* out, const std::vector& axis) { const int rank = axis.size(); - auto in_stride = phi::stride(in.dims()); - auto out_stride = phi::stride(out->dims()); + auto in_stride = common::stride(in.dims()); + auto out_stride = common::stride(out->dims()); auto* in_ptr = in.data(); auto* out_ptr = out->data(); diff --git a/paddle/phi/kernels/funcs/matrix_reduce.cc b/paddle/phi/kernels/funcs/matrix_reduce.cc index 34d84070497fc5..e20d98984eb5aa 100644 --- a/paddle/phi/kernels/funcs/matrix_reduce.cc +++ b/paddle/phi/kernels/funcs/matrix_reduce.cc @@ -28,9 +28,10 @@ class MatrixReduceSumFunctor { DenseTensor* out) { // For example: in's dim = [5, 3, 2, 7, 3] ; out's dim = [3, 1, 7, 3] // out_reduce_dim should be [0, 2] - const std::vector in_dims = phi::vectorize(in.dims()); + const std::vector in_dims = common::vectorize(in.dims()); auto in_size = in_dims.size(); - const std::vector out_dims = phi::vectorize(out->dims()); + const std::vector out_dims = + common::vectorize(out->dims()); auto out_size = out_dims.size(); std::vector out_bst_dims(in_size); @@ -39,7 +40,7 @@ class MatrixReduceSumFunctor { std::copy(out_dims.data(), out_dims.data() + out_size, out_bst_dims.data() + in_size - out_size); - out->Resize(phi::make_ddim(out_bst_dims)); + out->Resize(common::make_ddim(out_bst_dims)); std::vector out_reduce_dims; for (size_t idx = 0; idx <= in_size - 3; idx++) { diff --git a/paddle/phi/kernels/funcs/matrix_reduce.cu b/paddle/phi/kernels/funcs/matrix_reduce.cu index 5c3ebd6bb01671..f4305914c41713 100644 --- a/paddle/phi/kernels/funcs/matrix_reduce.cu +++ b/paddle/phi/kernels/funcs/matrix_reduce.cu @@ -26,9 +26,9 @@ class MatrixReduceSumFunctor { DenseTensor* out) { // For example: in's dim = [5, 3, 2, 7, 3] ; out's dim = [3, 1, 7, 3] // out_reduce_dim should be [0, 2] - const std::vector in_dims = phi::vectorize(in.dims()); + const std::vector in_dims = common::vectorize(in.dims()); auto in_size = in_dims.size(); - const std::vector out_dims = phi::vectorize(out->dims()); + const std::vector out_dims = common::vectorize(out->dims()); auto out_size = out_dims.size(); std::vector out_bst_dims(in_size); @@ -37,7 +37,7 @@ class MatrixReduceSumFunctor { std::copy(out_dims.data(), out_dims.data() + out_size, out_bst_dims.data() + in_size - out_size); - out->Resize(phi::make_ddim(out_bst_dims)); + out->Resize(common::make_ddim(out_bst_dims)); std::vector out_reduce_dims; for (size_t idx = 0; idx <= in_size - 3; idx++) { diff --git a/paddle/phi/kernels/funcs/matrix_solve.cu b/paddle/phi/kernels/funcs/matrix_solve.cu index 0655bb4d958969..0bd1522e9f58ee 100644 --- a/paddle/phi/kernels/funcs/matrix_solve.cu +++ b/paddle/phi/kernels/funcs/matrix_solve.cu @@ -64,7 +64,7 @@ void MatrixSolveFunctor::operator()(const Context& context, // because cuBlas assumes column-major while Paddle uses row-majar. DenseTensor tmp_b(b.type()); const auto& new_dims_vec = getNewDimsVec(b_dims); - tmp_b.Resize(phi::make_ddim(new_dims_vec)); + tmp_b.Resize(common::make_ddim(new_dims_vec)); context.template Alloc(&tmp_b); phi::funcs::TransposeNormal trans; std::vector new_axis = getNewAxis(b_rank); diff --git a/paddle/phi/kernels/funcs/matrix_solve.h b/paddle/phi/kernels/funcs/matrix_solve.h index 3856c06c1b25fc..f8225bd482385e 100644 --- a/paddle/phi/kernels/funcs/matrix_solve.h +++ b/paddle/phi/kernels/funcs/matrix_solve.h @@ -68,7 +68,7 @@ static std::vector getNewAxis(const int b_rank) { // for Resize static std::vector getNewDimsVec(const DDim& b_dims) { - std::vector b_dims_vec = phi::vectorize(b_dims); + std::vector b_dims_vec = common::vectorize(b_dims); int size = b_dims_vec.size(); if (size >= 2) { // swap the last 2 elements in b_dims_vec diff --git a/paddle/phi/kernels/funcs/maxouting.h b/paddle/phi/kernels/funcs/maxouting.h index c6242318a3c0dc..99b781ebba0aae 100644 --- a/paddle/phi/kernels/funcs/maxouting.h +++ b/paddle/phi/kernels/funcs/maxouting.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include "paddle/common/macros.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/macros.h" namespace phi { namespace funcs { diff --git a/paddle/phi/kernels/funcs/mode.h b/paddle/phi/kernels/funcs/mode.h index 632b0ce7e15104..d6cf68c092317e 100644 --- a/paddle/phi/kernels/funcs/mode.h +++ b/paddle/phi/kernels/funcs/mode.h @@ -152,7 +152,7 @@ static void GetModebySort(const phi::GPUContext& dev_ctx, T* out_tensor, int64_t* indices_tensor) { DenseTensor input_tmp; - input_tmp.Resize(phi::make_ddim({num_rows, num_cols})); + input_tmp.Resize(common::make_ddim({num_rows, num_cols})); T* input_tmp_data = dev_ctx.Alloc(&input_tmp); phi::Copy(dev_ctx, *input_tensor, dev_ctx.GetPlace(), false, &input_tmp); diff --git a/paddle/phi/kernels/funcs/nanmedian_utils.h b/paddle/phi/kernels/funcs/nanmedian_utils.h index edcdc10b885956..6acbe25bf75bb2 100644 --- a/paddle/phi/kernels/funcs/nanmedian_utils.h +++ b/paddle/phi/kernels/funcs/nanmedian_utils.h @@ -56,7 +56,7 @@ void PostprocessMedianGradKernel(const Context& dev_ctx, } } - input->Resize(make_ddim(reshape_back)); + input->Resize(common::make_ddim(reshape_back)); funcs::TransCompute( static_cast(trans_back.size()), dev_ctx, *input, x, trans_back); } @@ -104,7 +104,7 @@ void PreprocessMedianKernel(const Context& dev_ctx, dev_ctx.template Alloc(x); funcs::TransCompute(ndims, dev_ctx, input, x, perm); - x->Resize(make_ddim(reshape)); + x->Resize(common::make_ddim(reshape)); } } // namespace funcs diff --git a/paddle/phi/kernels/funcs/norm_utils.cu.h b/paddle/phi/kernels/funcs/norm_utils.cu.h index ecd2ac50509f6b..0d8fa486cc065a 100644 --- a/paddle/phi/kernels/funcs/norm_utils.cu.h +++ b/paddle/phi/kernels/funcs/norm_utils.cu.h @@ -24,7 +24,7 @@ limitations under the License. */ #include namespace cub = hipcub; #endif -#include "paddle/phi/common/layout.h" +#include "paddle/common/layout.h" #include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/reduce_function.h" diff --git a/paddle/phi/kernels/funcs/norm_utils.h b/paddle/phi/kernels/funcs/norm_utils.h index 5c898549b353ea..c3a3b07ae08cca 100644 --- a/paddle/phi/kernels/funcs/norm_utils.h +++ b/paddle/phi/kernels/funcs/norm_utils.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#include "paddle/phi/common/layout.h" -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" +#include "paddle/common/layout.h" namespace phi { namespace funcs { diff --git a/paddle/phi/kernels/funcs/pooling.h b/paddle/phi/kernels/funcs/pooling.h index bf2409d2e502b8..1ffd747735543c 100644 --- a/paddle/phi/kernels/funcs/pooling.h +++ b/paddle/phi/kernels/funcs/pooling.h @@ -18,10 +18,10 @@ limitations under the License. */ #include #include +#include "paddle/common/macros.h" // import FLT_MAX #include "paddle/phi/common/amp_type_traits.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/hostdevice.h" -#include "paddle/phi/core/macros.h" // import FLT_MAX #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #include "paddle/phi/backends/gpu/gpu_decls.h" @@ -427,7 +427,7 @@ inline void UpdatePadding(std::vector* paddings, const std::vector& strides, const std::vector& kernel_size) { // set padding size == data_dims.size() * 2 - auto data_shape = vectorize(data_dims); + auto data_shape = common::vectorize(data_dims); if (static_cast(paddings->size()) == data_dims.size()) { for (int i = 0; i < data_dims.size(); ++i) { T copy_pad = *(paddings->begin() + 2 * i); diff --git a/paddle/phi/kernels/funcs/reduce_function.h b/paddle/phi/kernels/funcs/reduce_function.h index 1bbdd019a7c4bd..564c02c9f9f79b 100644 --- a/paddle/phi/kernels/funcs/reduce_function.h +++ b/paddle/phi/kernels/funcs/reduce_function.h @@ -57,10 +57,10 @@ using dim3 = phi::kps::dim3; #endif +#include "paddle/common/array.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_utils.h" -#include "paddle/phi/core/utils/array.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/eigen/eigen_function.h" #include "paddle/phi/kernels/funcs/math_function.h" @@ -288,8 +288,8 @@ struct ReduceConfig { const KPDevice& dev_ctx, phi::DenseTensor* tmp) { if (should_reduce_again) { - tmp->Resize( - phi::make_ddim({static_cast(left_num * grid.z * grid.y)})); + tmp->Resize(common::make_ddim( + {static_cast(left_num * grid.z * grid.y)})); tmp_data = dev_ctx.Alloc(tmp); } } @@ -1060,7 +1060,7 @@ void ReduceKernel(const KPDevice& dev_ctx, #endif dev_ctx.Alloc(y); - auto x_dim = phi::vectorize(x.dims()); + auto x_dim = common::vectorize(x.dims()); if (x_dim.size() == 0) { std::vector inputs = {&x}; @@ -1238,13 +1238,13 @@ void ReduceFunctor(const Context& context, DDim out_dims = output->dims(); if (keep_dim && x_rank > 1) { const int kDelFlag = -2; - auto dims_vector = phi::vectorize(out_dims); + auto dims_vector = common::vectorize(out_dims); for (size_t i = 0; i < dims_ref.size(); ++i) { dims_vector[dims_ref[i]] = kDelFlag; } dims_vector.erase(remove(dims_vector.begin(), dims_vector.end(), kDelFlag), dims_vector.end()); - out_dims = phi::make_ddim(dims_vector); + out_dims = common::make_ddim(dims_vector); } auto& place = *context.eigen_device(); Functor functor; diff --git a/paddle/phi/kernels/funcs/reduce_functor.h b/paddle/phi/kernels/funcs/reduce_functor.h index df36bee5f98ff3..ee319b060d0957 100644 --- a/paddle/phi/kernels/funcs/reduce_functor.h +++ b/paddle/phi/kernels/funcs/reduce_functor.h @@ -14,7 +14,7 @@ #pragma once -#include "paddle/phi/core/macros.h" +#include "paddle/common/macros.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/eigen/eigen_function.h" namespace phi { diff --git a/paddle/phi/kernels/funcs/reduce_grad_functions.h b/paddle/phi/kernels/funcs/reduce_grad_functions.h index e06ae3986b9ded..c6c13b5fac64ba 100644 --- a/paddle/phi/kernels/funcs/reduce_grad_functions.h +++ b/paddle/phi/kernels/funcs/reduce_grad_functions.h @@ -36,7 +36,7 @@ void ReduceGradFunctor(const Context& dev_ctx, auto x_grad = phi::EigenTensor::From(*output); auto x_rank = static_cast(x.dimensions().size()); auto x_dims = input0.dims(); - auto reduced_dims_v = phi::vectorize(x_dims); + auto reduced_dims_v = common::vectorize(x_dims); std::vector dims_ref = dims; Eigen::array broadcast_dim; for (size_t i = 0; i < D; ++i) broadcast_dim[i] = 1; @@ -50,7 +50,7 @@ void ReduceGradFunctor(const Context& dev_ctx, broadcast_dim[dims_ref[i]] = x_dims[dims_ref[i]]; broad_cast_times *= x_dims[dims_ref[i]]; } - auto reduced_dims = phi::make_ddim(reduced_dims_v); + auto reduced_dims = common::make_ddim(reduced_dims_v); auto x_reduce = EigenTensor::From(input1, reduced_dims); auto x_reduce_grad = EigenTensor::From(input2, reduced_dims); diff --git a/paddle/phi/kernels/funcs/repeat_tensor2index_tensor.h b/paddle/phi/kernels/funcs/repeat_tensor2index_tensor.h index b66bf39b99e98c..9d2600183651f4 100644 --- a/paddle/phi/kernels/funcs/repeat_tensor2index_tensor.h +++ b/paddle/phi/kernels/funcs/repeat_tensor2index_tensor.h @@ -45,7 +45,7 @@ void RepeatsTensor2IndexTensor(const Context& ctx, std::fill_n(index_vec.begin() + offset, repeats_data[i], i); offset += repeats_data[i]; } - index->Resize(phi::make_ddim({index_size})); + index->Resize(common::make_ddim({index_size})); phi::TensorFromVector(index_vec, ctx, index); } diff --git a/paddle/phi/kernels/funcs/scatter.cu.h b/paddle/phi/kernels/funcs/scatter.cu.h index 0f437db10b9332..8ef33b0ec49845 100644 --- a/paddle/phi/kernels/funcs/scatter.cu.h +++ b/paddle/phi/kernels/funcs/scatter.cu.h @@ -229,8 +229,8 @@ void GPUScatterNdAdd(const phi::GPUContext& ctx, // final dim int64_t end_size = index_dims[index_dims_size - 1]; // remain dim - auto remain_ddim = phi::slice_ddim(index_dims, 0, index_dims_size - 1); - int64_t remain_numel = phi::product(remain_ddim); + auto remain_ddim = common::slice_ddim(index_dims, 0, index_dims_size - 1); + int64_t remain_numel = common::product(remain_ddim); // slice size int64_t slice_size = 1; for (int64_t i = end_size; i < output_dims_size; ++i) { diff --git a/paddle/phi/kernels/funcs/scatter.h b/paddle/phi/kernels/funcs/scatter.h index 5934f57b47ddec..ab4af24b70c94a 100644 --- a/paddle/phi/kernels/funcs/scatter.h +++ b/paddle/phi/kernels/funcs/scatter.h @@ -17,8 +17,8 @@ limitations under the License. */ #include #include +#include "paddle/common/ddim.h" #include "paddle/phi/common/place.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/eigen/common.h" @@ -276,8 +276,8 @@ void ScatterNdAdd(const phi::CPUContext& ctx, // final dim int64_t end_size = index_dims[index_dims_size - 1]; // remain dim - auto remain_ddim = phi::slice_ddim(index_dims, 0, index_dims_size - 1); - int64_t remain_numel = phi::product(remain_ddim); + auto remain_ddim = common::slice_ddim(index_dims, 0, index_dims_size - 1); + int64_t remain_numel = common::product(remain_ddim); // slice size int64_t slice_size = 1; for (int64_t i = end_size; i < output_dims_size; ++i) { diff --git a/paddle/phi/kernels/funcs/segment_pooling.cc b/paddle/phi/kernels/funcs/segment_pooling.cc index ad8c942e10b28a..9af1211b9a144f 100644 --- a/paddle/phi/kernels/funcs/segment_pooling.cc +++ b/paddle/phi/kernels/funcs/segment_pooling.cc @@ -56,7 +56,7 @@ class SegmentPoolFunctor { Tensor in_t = input.Slice(last_idx, idx); int64_t h = idx - last_idx; - auto in_e = EigenMatrix::From(in_t, phi::make_ddim({h, w})); + auto in_e = EigenMatrix::From(in_t, common::make_ddim({h, w})); auto out_e = EigenVector::Flatten(out_t); auto reduce_dim = Eigen::array({{0}}); diff --git a/paddle/phi/kernels/funcs/select_impl.cu.h b/paddle/phi/kernels/funcs/select_impl.cu.h index 96b7942cf27094..1afcad9f0f918c 100644 --- a/paddle/phi/kernels/funcs/select_impl.cu.h +++ b/paddle/phi/kernels/funcs/select_impl.cu.h @@ -25,9 +25,9 @@ namespace cub = hipcub; #endif #include +#include "paddle/common/ddim.h" #include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/common/memory_utils.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/primitive/kernel_primitives.h" @@ -444,10 +444,10 @@ void SelectKernel(const KPDevice &dev_ctx, std::vector out_dim = {static_cast(total_true_num)}; if (SelectData == 1) { - out->Resize(phi::make_ddim(out_dim)); + out->Resize(common::make_ddim(out_dim)); } else if (SelectData == 0) { // == 0 where_index out_dim.push_back(static_cast(rank)); - out->Resize(phi::make_ddim(out_dim)); + out->Resize(common::make_ddim(out_dim)); } auto out_data = dev_ctx.template Alloc(out); // 3.2 get true data's index according to cond_data and cumsum_data diff --git a/paddle/phi/kernels/funcs/selected_rows_functor.cc b/paddle/phi/kernels/funcs/selected_rows_functor.cc index fef0b1dbff25d6..1233490acba539 100644 --- a/paddle/phi/kernels/funcs/selected_rows_functor.cc +++ b/paddle/phi/kernels/funcs/selected_rows_functor.cc @@ -19,7 +19,7 @@ limitations under the License. */ #include #include -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" #include "paddle/phi/core/mixed_vector.h" #ifdef PADDLE_WITH_XPU @@ -561,7 +561,7 @@ struct MergeAddImpl { out.set_height(input_height); DenseTensor* out_tensor = out.mutable_value(); - out_tensor->Resize(phi::make_ddim( + out_tensor->Resize(common::make_ddim( {static_cast(merged_row_set.size()), input_width})); auto* out_data = context.template Alloc(out_tensor); @@ -677,8 +677,8 @@ struct MergeAdd { out.set_rows(merge_rows); out.set_height(input.height()); DenseTensor* out_tensor = out.mutable_value(); - out_tensor->Resize( - phi::make_ddim({static_cast(merge_rows.size()), input_width})); + out_tensor->Resize(common::make_ddim( + {static_cast(merge_rows.size()), input_width})); context.template Alloc(out_tensor); std::unordered_map rows_to_id; @@ -768,7 +768,7 @@ struct MergeAdd { out.set_height(input_height); DenseTensor* out_tensor = out.mutable_value(); - out_tensor->Resize(phi::make_ddim( + out_tensor->Resize(common::make_ddim( {static_cast(merged_row_set.size()), input_width})); context.template Alloc(out_tensor); @@ -877,7 +877,7 @@ struct MergeAverage { out.set_height(input_height); DenseTensor* out_tensor = out.mutable_value(); - out_tensor->Resize(phi::make_ddim( + out_tensor->Resize(common::make_ddim( {static_cast(merged_row_set.size()), input_width})); auto* out_data = context.template Alloc(out_tensor); diff --git a/paddle/phi/kernels/funcs/selected_rows_functor.cu b/paddle/phi/kernels/funcs/selected_rows_functor.cu index c74cda2e2bd443..b8617c2c9209ae 100644 --- a/paddle/phi/kernels/funcs/selected_rows_functor.cu +++ b/paddle/phi/kernels/funcs/selected_rows_functor.cu @@ -399,8 +399,8 @@ struct MergeAddImpl { out.set_rows(merge_rows); out.set_height(input.height()); DenseTensor* out_tensor = out.mutable_value(); - out_tensor->Resize( - phi::make_ddim({static_cast(merge_rows.size()), input_width})); + out_tensor->Resize(common::make_ddim( + {static_cast(merge_rows.size()), input_width})); context.template Alloc(out_tensor); phi::funcs::SetConstant constant_functor; @@ -471,8 +471,8 @@ struct MergeAddImpl { out.set_height(input_height); DenseTensor* out_tensor = out.mutable_value(); - out_tensor->Resize( - phi::make_ddim({static_cast(merge_rows.size()), input_width})); + out_tensor->Resize(common::make_ddim( + {static_cast(merge_rows.size()), input_width})); context.template Alloc(out_tensor); phi::funcs::SetConstant constant_functor; diff --git a/paddle/phi/kernels/funcs/sequence2batch.cc b/paddle/phi/kernels/funcs/sequence2batch.cc index a00af7177cc882..3e30bca02d8a4f 100644 --- a/paddle/phi/kernels/funcs/sequence2batch.cc +++ b/paddle/phi/kernels/funcs/sequence2batch.cc @@ -26,8 +26,8 @@ class CopyMatrixRowsFunctor { phi::DenseTensor* dst, bool is_src_index) { size_t* index = index_lod.data(); - const auto& src_dims = vectorize(src.dims()); - const auto& dst_dims = vectorize(dst->dims()); + const auto& src_dims = common::vectorize(src.dims()); + const auto& dst_dims = common::vectorize(dst->dims()); PADDLE_ENFORCE_EQ(src_dims.size(), 2UL, phi::errors::InvalidArgument( diff --git a/paddle/phi/kernels/funcs/sequence_pooling.cc b/paddle/phi/kernels/funcs/sequence_pooling.cc index 8d7d3b506136da..004bef522ab16a 100644 --- a/paddle/phi/kernels/funcs/sequence_pooling.cc +++ b/paddle/phi/kernels/funcs/sequence_pooling.cc @@ -415,7 +415,7 @@ class SequencePoolFunctor { phi::DenseTensor in_t = input.Slice(static_cast(lod[i]), static_cast(lod[i + 1])); int64_t h = static_cast(lod[i + 1] - lod[i]); - auto in_e = EigenMatrix::From(in_t, phi::make_ddim({h, w})); + auto in_e = EigenMatrix::From(in_t, common::make_ddim({h, w})); auto out_e = EigenVector::Flatten(out_t); if (pooltype == "AVERAGE") { out_e.device(place) = in_e.mean(Eigen::array({{0}})); diff --git a/paddle/phi/kernels/funcs/sequence_pooling.cu b/paddle/phi/kernels/funcs/sequence_pooling.cu index 4bc4b11692d5c4..551525a67bb7a5 100644 --- a/paddle/phi/kernels/funcs/sequence_pooling.cu +++ b/paddle/phi/kernels/funcs/sequence_pooling.cu @@ -15,8 +15,8 @@ limitations under the License. */ #include #include +#include "paddle/common/macros.h" #include "paddle/phi/backends/gpu/gpu_primitives.h" -#include "paddle/phi/core/macros.h" #include "paddle/phi/core/mixed_vector.h" #include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/sequence_pooling.h" diff --git a/paddle/phi/kernels/funcs/slice.h b/paddle/phi/kernels/funcs/slice.h index 38b127541650be..ca438c5067849c 100644 --- a/paddle/phi/kernels/funcs/slice.h +++ b/paddle/phi/kernels/funcs/slice.h @@ -14,7 +14,7 @@ #pragma once -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/enforce.h" #include "paddle/phi/kernels/funcs/eigen/common.h" @@ -74,7 +74,7 @@ DenseTensor Slice(const Context& dev_ctx, std::vector ends) { DenseTensor ret; std::vector new_axes = axes; - std::vector out_shape = phi::vectorize(x.dims()); + std::vector out_shape = common::vectorize(x.dims()); size_t rank = out_shape.size(); PADDLE_ENFORCE_EQ( axes.size(), @@ -105,7 +105,7 @@ DenseTensor Slice(const Context& dev_ctx, offset[new_axes[i]] = starts[i]; extends[new_axes[i]] = ends[i] - starts[i]; } - ret.Resize(phi::make_ddim(out_shape)); + ret.Resize(common::make_ddim(out_shape)); dev_ctx.template Alloc(&ret); switch (rank) { SLICE_RANK_CASE(1); @@ -140,14 +140,14 @@ static void Slice(const Context& ctx, extents[i] = in_dims[i]; } - std::vector out_shape_vec = vectorize(in_dims); + std::vector out_shape_vec = common::vectorize(in_dims); for (size_t i = 0; i < axes_vec.size(); ++i) { offsets[axes_vec[i]] = begin_vec[i]; extents[axes_vec[i]] = end_vec[i] - begin_vec[i]; out_shape_vec[axes_vec[i]] = end_vec[i] - begin_vec[i]; } - DDim out_dims(make_ddim(out_shape_vec)); + DDim out_dims(common::make_ddim(out_shape_vec)); out->Resize(out_dims); ctx.template Alloc(out); diff --git a/paddle/phi/kernels/funcs/slice_utils.h b/paddle/phi/kernels/funcs/slice_utils.h index 04e5c11aabeed3..a78dcf5419cd3f 100644 --- a/paddle/phi/kernels/funcs/slice_utils.h +++ b/paddle/phi/kernels/funcs/slice_utils.h @@ -14,7 +14,7 @@ limitations under the License. */ #pragma once #include -#include +#include #include #include #include "paddle/phi/core/flags.h" @@ -210,7 +210,7 @@ inline DDim GetDecreasedDims(const DDim slice_dims, // slice. This will remove in release 2.6. new_shape.push_back(1); } - decreased_dims = phi::make_ddim(new_shape); + decreased_dims = common::make_ddim(new_shape); } return decreased_dims; } diff --git a/paddle/phi/kernels/funcs/softmax.cu b/paddle/phi/kernels/funcs/softmax.cu index 2ca97cd4ac2055..c7dfd0c0978c00 100644 --- a/paddle/phi/kernels/funcs/softmax.cu +++ b/paddle/phi/kernels/funcs/softmax.cu @@ -35,7 +35,7 @@ void SoftmaxCUDNNFunctor::operator()( // ------------------- cudnn descriptors --------------------- ScopedTensorDescriptor xDesc; ScopedTensorDescriptor yDesc; - std::vector cudnn_tensor_dims = phi::vectorize(X->dims()); + std::vector cudnn_tensor_dims = common::vectorize(X->dims()); DataLayout layout = DataLayout::kNCHW; if (cudnn_tensor_dims.size() == 5) { layout = DataLayout::kNCDHW; @@ -88,7 +88,7 @@ void SoftmaxGradCUDNNFunctor::operator()( ScopedTensorDescriptor yDesc; ScopedTensorDescriptor dyDesc; ScopedTensorDescriptor dxDesc; - std::vector cudnn_tensor_dims = phi::vectorize(Y->dims()); + std::vector cudnn_tensor_dims = common::vectorize(Y->dims()); DataLayout layout = DataLayout::kNCHW; if (cudnn_tensor_dims.size() == 5) { layout = DataLayout::kNCDHW; diff --git a/paddle/phi/kernels/funcs/sparse/common_shape.h b/paddle/phi/kernels/funcs/sparse/common_shape.h index e4c836d1162523..0f207d099e8e71 100644 --- a/paddle/phi/kernels/funcs/sparse/common_shape.h +++ b/paddle/phi/kernels/funcs/sparse/common_shape.h @@ -16,7 +16,7 @@ limitations under the License. */ #include -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" namespace phi { namespace funcs { @@ -33,9 +33,9 @@ inline const DDim InferDenseDims(const DDim& x_dims, memcpy(&dense_dim_vec[1], x_dims.Get() + sparse_dim, dense_dim * sizeof(x_dims[0])); - values_dims = phi::make_ddim(dense_dim_vec); + values_dims = common::make_ddim(dense_dim_vec); } else { - values_dims = phi::make_ddim({non_zero_num}); + values_dims = common::make_ddim({non_zero_num}); } return values_dims; } diff --git a/paddle/phi/kernels/funcs/sparse/convolution.h b/paddle/phi/kernels/funcs/sparse/convolution.h index e6f3a573088b28..7048ca1a127f5c 100644 --- a/paddle/phi/kernels/funcs/sparse/convolution.h +++ b/paddle/phi/kernels/funcs/sparse/convolution.h @@ -14,7 +14,7 @@ limitations under the License. */ #pragma once -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/blas/blas.h" diff --git a/paddle/phi/kernels/funcs/sparse/flatten_indices.h b/paddle/phi/kernels/funcs/sparse/flatten_indices.h index 9a031b8cc12ca4..4edcd839572dbb 100644 --- a/paddle/phi/kernels/funcs/sparse/flatten_indices.h +++ b/paddle/phi/kernels/funcs/sparse/flatten_indices.h @@ -16,7 +16,7 @@ limitations under the License. */ #include -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" namespace phi { namespace funcs { diff --git a/paddle/phi/kernels/funcs/sparse/softmax.cu.h b/paddle/phi/kernels/funcs/sparse/softmax.cu.h index 72f99bd6331c4b..b75f870970a314 100644 --- a/paddle/phi/kernels/funcs/sparse/softmax.cu.h +++ b/paddle/phi/kernels/funcs/sparse/softmax.cu.h @@ -40,7 +40,7 @@ inline DenseTensor GetOffsets(const Context& dev_ctx, } } - const IntArray strides_shape(phi::vectorize(indices.dims())); + const IntArray strides_shape(common::vectorize(indices.dims())); DenseTensor strides = phi::Empty(dev_ctx, strides_shape); auto strides_ptr = strides.data(); memory_utils::Copy(dev_ctx.GetPlace(), @@ -125,10 +125,10 @@ std::tuple ComputePoolMax( }); auto new_sz = thrust::distance(thrust_ptr(pool_sizes.data()), new_end.second); - pool_sizes.Resize(phi::make_ddim({new_sz})); + pool_sizes.Resize(common::make_ddim({new_sz})); DenseTensor pool_offsets; - pool_offsets.Resize(phi::make_ddim({new_sz})); + pool_offsets.Resize(common::make_ddim({new_sz})); dev_ctx.template Alloc(&pool_offsets); phi::Copy(dev_ctx, pool_sizes, dev_ctx.GetPlace(), false, &pool_offsets); diff --git a/paddle/phi/kernels/funcs/sparse/softmax.h b/paddle/phi/kernels/funcs/sparse/softmax.h index fcb45def6c1fae..2a820461c4181b 100644 --- a/paddle/phi/kernels/funcs/sparse/softmax.h +++ b/paddle/phi/kernels/funcs/sparse/softmax.h @@ -14,7 +14,7 @@ limitations under the License. */ #pragma once -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" #include "paddle/phi/core/tensor_utils.h" namespace phi { diff --git a/paddle/phi/kernels/funcs/sparse/sparse_blas_impl.cu.h b/paddle/phi/kernels/funcs/sparse/sparse_blas_impl.cu.h index fde5cb1768d47c..3502dbfc9ceda4 100644 --- a/paddle/phi/kernels/funcs/sparse/sparse_blas_impl.cu.h +++ b/paddle/phi/kernels/funcs/sparse/sparse_blas_impl.cu.h @@ -16,11 +16,11 @@ #include "glog/logging.h" +#include "paddle/common/ddim.h" #include "paddle/phi/backends/dynload/cusparse.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/common/float16.h" #include "paddle/phi/common/memory_utils.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/enforce.h" #include "paddle/phi/core/sparse_coo_tensor.h" @@ -65,7 +65,7 @@ template inline void CreateCsrDescriptor(const phi::SparseCsrTensor& x, const phi::GPUContext& dev_ctx, cusparseSpMatDescr_t* descriptor) { - std::vector xdim_vec = phi::vectorize(x.dims()); + std::vector xdim_vec = common::vectorize(x.dims()); auto x_ndims = xdim_vec.size(); PADDLE_ENFORCE_GE( x_ndims, @@ -120,7 +120,7 @@ template inline void CreateCooDescriptor(const phi::SparseCooTensor& x, const phi::GPUContext& dev_ctx, cusparseSpMatDescr_t* descriptor) { - std::vector xdim_vec = phi::vectorize(x.dims()); + std::vector xdim_vec = common::vectorize(x.dims()); auto x_ndims = xdim_vec.size(); PADDLE_ENFORCE_GE( x_ndims, @@ -214,7 +214,7 @@ class CuSparseDnMatDescriptor { explicit CuSparseDnMatDescriptor(const phi::DenseTensor& x, const phi::GPUContext& dev_ctx) : dev_ctx_(dev_ctx) { - std::vector xdim_vec = phi::vectorize(x.dims()); + std::vector xdim_vec = common::vectorize(x.dims()); auto x_ndims = xdim_vec.size(); PADDLE_ENFORCE_GE( x_ndims, @@ -278,7 +278,7 @@ class CuSparseDnVecDescriptor { explicit CuSparseDnVecDescriptor(const phi::DenseTensor& x, const phi::GPUContext& dev_ctx) : dev_ctx_(dev_ctx) { - std::vector xdim_vec = phi::vectorize(x.dims()); + std::vector xdim_vec = common::vectorize(x.dims()); auto x_ndims = xdim_vec.size(); PADDLE_ENFORCE_GE(x_ndims, 1, diff --git a/paddle/phi/kernels/funcs/sparse/sparse_blas_impl.hip.h b/paddle/phi/kernels/funcs/sparse/sparse_blas_impl.hip.h index cbd42be3cb6d49..6b6c8c58385cd8 100644 --- a/paddle/phi/kernels/funcs/sparse/sparse_blas_impl.hip.h +++ b/paddle/phi/kernels/funcs/sparse/sparse_blas_impl.hip.h @@ -14,11 +14,11 @@ #pragma once +#include "paddle/common/ddim.h" #include "paddle/phi/backends/dynload/rocsparse.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/common/data_type.h" #include "paddle/phi/common/memory_utils.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/enforce.h" #include "paddle/phi/core/sparse_coo_tensor.h" @@ -64,7 +64,7 @@ template inline void CreateCsrDescriptor(const phi::SparseCsrTensor& x, const phi::GPUContext& dev_ctx, rocsparse_spmat_descr* descriptor) { - std::vector xdim_vec = phi::vectorize(x.dims()); + std::vector xdim_vec = common::vectorize(x.dims()); auto x_ndims = xdim_vec.size(); PADDLE_ENFORCE_GE( x_ndims, @@ -115,7 +115,7 @@ template inline void CreateCooDescriptor(const phi::SparseCooTensor& x, const phi::GPUContext& dev_ctx, rocsparse_spmat_descr* descriptor) { - std::vector xdim_vec = phi::vectorize(x.dims()); + std::vector xdim_vec = common::vectorize(x.dims()); auto x_ndims = xdim_vec.size(); PADDLE_ENFORCE_GE( x_ndims, @@ -203,7 +203,7 @@ class RocSparseDnMatDescriptor { explicit RocSparseDnMatDescriptor(const phi::DenseTensor& x, const phi::GPUContext& dev_ctx) : dev_ctx_(dev_ctx) { - std::vector xdim_vec = phi::vectorize(x.dims()); + std::vector xdim_vec = common::vectorize(x.dims()); auto x_ndims = xdim_vec.size(); PADDLE_ENFORCE_GE( x_ndims, diff --git a/paddle/phi/kernels/funcs/strided_memcpy.h b/paddle/phi/kernels/funcs/strided_memcpy.h index de38e40d317e19..b91ab85c55b33c 100644 --- a/paddle/phi/kernels/funcs/strided_memcpy.h +++ b/paddle/phi/kernels/funcs/strided_memcpy.h @@ -12,8 +12,8 @@ limitations under the License. */ #pragma once #include +#include "paddle/common/macros.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/macros.h" #include "paddle/phi/kernels/funcs/detail/strided_memcpy.h" namespace phi { @@ -146,12 +146,12 @@ inline void StridedMemcpyWithAxis0( const phi::DenseTensor& input, const std::vector& shape_refer, std::vector* outputs) { - const phi::DDim in_stride = stride_numel(input.dims()); + const phi::DDim in_stride = common::stride_numel(input.dims()); const int axis = 0; size_t input_offset = 0; for (size_t i = 0; i < outputs->size(); ++i) { - auto out_stride = stride_numel(shape_refer[i]->dims()); + auto out_stride = common::stride_numel(shape_refer[i]->dims()); auto out = outputs->at(i); if (out != nullptr && out->initialized() && out->numel() > 0) { StridedNumelCopyWithAxis(dev_ctx, diff --git a/paddle/phi/kernels/funcs/strided_slice.h b/paddle/phi/kernels/funcs/strided_slice.h index 4a88c1e0660b79..06503f80342d76 100644 --- a/paddle/phi/kernels/funcs/strided_slice.h +++ b/paddle/phi/kernels/funcs/strided_slice.h @@ -17,7 +17,7 @@ #include #include -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/enforce.h" #include "paddle/phi/core/tensor_array.h" @@ -212,7 +212,7 @@ void StridedSliceCompute(const Context& dev_ctx, out_dims_vector.data(), axes.size(), false); - DDim out_dims(phi::make_ddim(out_dims_vector)); + DDim out_dims(common::make_ddim(out_dims_vector)); std::vector reverse_vector(starts_.size(), 0); StridedSliceFunctor(starts_.data(), @@ -260,7 +260,7 @@ void StridedSliceCompute(const Context& dev_ctx, if (new_out_shape.size() == 0) { new_out_shape.push_back(1); } - out_dims_origin = phi::make_ddim(new_out_shape); + out_dims_origin = common::make_ddim(new_out_shape); } bool need_reverse = false; @@ -307,7 +307,7 @@ void StridedSliceCompute(const Context& dev_ctx, const std::vector& decrease_axis, TensorArray* out) { const int64_t size = x.size(); - auto in_dims = phi::make_ddim({size}); + auto in_dims = common::make_ddim({size}); auto starts_ = starts.GetData(); auto ends_ = ends.GetData(); @@ -329,7 +329,7 @@ void StridedSliceCompute(const Context& dev_ctx, out_dims_vector.data(), axes.size(), false); - DDim out_dims(phi::make_ddim(out_dims_vector)); + DDim out_dims(common::make_ddim(out_dims_vector)); std::vector reverse_vector(starts_.size(), 0); StridedSliceFunctor(starts_.data(), @@ -377,7 +377,7 @@ void StridedSliceCompute(const Context& dev_ctx, if (new_out_shape.size() == 0) { new_out_shape.push_back(1); } - out_dims_origin = phi::make_ddim(new_out_shape); + out_dims_origin = common::make_ddim(new_out_shape); } bool need_reverse = false; @@ -547,7 +547,7 @@ void StridedSliceGradCompute(const Context& dev_ctx, // calculate the output shape. when set it to inplace OP, there may be // some problems. const int64_t size = x.size(); - DDim out_dims = phi::make_ddim({size}); + DDim out_dims = common::make_ddim({size}); auto starts_ = starts.GetData(); auto ends_ = ends.GetData(); diff --git a/paddle/phi/kernels/funcs/top_k_function_cuda.h b/paddle/phi/kernels/funcs/top_k_function_cuda.h index b6d6b0cffc667f..31502804f7f4e1 100644 --- a/paddle/phi/kernels/funcs/top_k_function_cuda.h +++ b/paddle/phi/kernels/funcs/top_k_function_cuda.h @@ -1043,7 +1043,7 @@ bool SortTopk(const phi::GPUContext& ctx, Tensor input_indices; const std::vector dims = {num_rows, num_cols}; - auto dim = phi::make_ddim(dims); + auto dim = common::make_ddim(dims); input_indices.Resize(dim); ctx.template Alloc(&input_indices); size_t temp_storage_bytes = -1; @@ -1255,7 +1255,7 @@ bool SortTopk(const phi::GPUContext& ctx, static_cast(temp_indices)); std::vector odims = {static_cast(num_rows), static_cast(k)}; - auto dim = phi::make_ddim(odims); + auto dim = common::make_ddim(odims); auto e_values = phi::EigenMatrix::From(*out_tensor, dim); auto e_tmp_values = phi::EigenMatrix::From(static_cast(temp_values)); diff --git a/paddle/phi/kernels/funcs/transpose_function.cu.h b/paddle/phi/kernels/funcs/transpose_function.cu.h index 5bc42a8b69f0e7..173bef120fb606 100644 --- a/paddle/phi/kernels/funcs/transpose_function.cu.h +++ b/paddle/phi/kernels/funcs/transpose_function.cu.h @@ -1454,8 +1454,8 @@ inline void PermuteWithEigen( phi::DenseTensor temp_in; temp_in.ShareBufferWith(in); - temp_in.Resize(phi::make_ddim(simplifier.GetSrcDims())); - out->Resize(phi::make_ddim(simplifier.GetDstDims())); + temp_in.Resize(common::make_ddim(simplifier.GetSrcDims())); + out->Resize(common::make_ddim(simplifier.GetDstDims())); TransCompute( simplifier.GetRank(), ctx, temp_in, out, simplifier.GetPerm()); @@ -1476,7 +1476,7 @@ void TransposeGPUKernelDriver(const phi::GPUContext& ctx, bool ret = TransposeSimple::Run(ctx, in, perm, out, numel); if (!ret) { auto simplifier = phi::funcs::PermuteDimsSimplifier( - rank, numel, perm, phi::vectorize(in.dims())); + rank, numel, perm, common::vectorize(in.dims())); auto* tuner = phi::autotune::MakeTransposeTuner(PermuteWithEigen); tuner->AddCallBack(PermuteAndTranspose); diff --git a/paddle/phi/kernels/funcs/unique_functor.h b/paddle/phi/kernels/funcs/unique_functor.h index 806d7cca84851d..ade7cf2d8a0daf 100644 --- a/paddle/phi/kernels/funcs/unique_functor.h +++ b/paddle/phi/kernels/funcs/unique_functor.h @@ -71,7 +71,7 @@ struct UniqueOpFunctor { if (count_ != nullptr) { // Resize the count tensor dims to allocate the memory - count_->Resize(phi::make_ddim({static_cast(uniq.size())})); + count_->Resize(common::make_ddim({static_cast(uniq.size())})); IndexT* count_data = context_.template Alloc(count_); // init count_data to 0 memset(count_data, 0, uniq.size() * sizeof(IndexT)); @@ -101,7 +101,7 @@ struct UniqueOpFunctor { } } - out_->Resize(phi::make_ddim({static_cast(uniq.size())})); + out_->Resize(common::make_ddim({static_cast(uniq.size())})); auto* out_data = context_.template Alloc(out_); std::memcpy(out_data, uniq.data(), uniq.size() * sizeof(InT)); } @@ -141,12 +141,12 @@ static void UniqueFlattendTensor(const Context& context, bool return_counts) { const InT* in_data = in.data(); std::set unique(in_data, in_data + in.numel()); - out->Resize(phi::make_ddim({static_cast(unique.size())})); + out->Resize(common::make_ddim({static_cast(unique.size())})); auto* out_data = context.template Alloc(out); std::copy(unique.begin(), unique.end(), out_data); if (return_index) { - indices->Resize(phi::make_ddim({out->numel()})); + indices->Resize(common::make_ddim({out->numel()})); auto indices_data = context.template Alloc(indices); std::unordered_map indices_map; indices_map.reserve(out->numel()); @@ -160,7 +160,7 @@ static void UniqueFlattendTensor(const Context& context, } if (return_inverse) { - index->Resize(phi::make_ddim({in.numel()})); + index->Resize(common::make_ddim({in.numel()})); auto inverse_data = context.template Alloc(index); std::unordered_map inverse_map; inverse_map.reserve(out->numel()); @@ -173,7 +173,7 @@ static void UniqueFlattendTensor(const Context& context, } if (return_counts) { - count->Resize(phi::make_ddim({out->numel()})); + count->Resize(common::make_ddim({out->numel()})); auto count_data = context.template Alloc(count); std::unordered_map counts_map; counts_map.reserve(out->numel()); @@ -240,16 +240,16 @@ static void UniqueDim(const Context& context, std::iota(permute.begin(), permute.end(), 0); permute[axis] = 0; permute[0] = axis; - std::vector in_trans_dims_vec(phi::vectorize(in.dims())); + std::vector in_trans_dims_vec(common::vectorize(in.dims())); in_trans_dims_vec[axis] = in.dims()[0]; in_trans_dims_vec[0] = in.dims()[axis]; DenseTensor in_trans; - phi::DDim in_trans_dims = phi::make_ddim(in_trans_dims_vec); + phi::DDim in_trans_dims = common::make_ddim(in_trans_dims_vec); in_trans.Resize(in_trans_dims); context.template Alloc(&in_trans); TransCompute(in.dims().size(), context, in, &in_trans, permute); // reshape tensor: eg. [dim1, dim0, dim2] -> [dim1, dim0*dim2] - phi::DDim in_trans_flat_dims = phi::flatten_to_2d(in_trans_dims, 1); + phi::DDim in_trans_flat_dims = common::flatten_to_2d(in_trans_dims, 1); in_trans.Resize(in_trans_flat_dims); // sort indices @@ -304,10 +304,10 @@ static void UniqueDim(const Context& context, DenseTensor out_trans; std::vector out_trans_dims_vec = in_trans_dims_vec; out_trans_dims_vec[0] = input_unbind.size(); - out_trans.Resize(phi::make_ddim(out_trans_dims_vec)); + out_trans.Resize(common::make_ddim(out_trans_dims_vec)); context.template Alloc(&out_trans); std::swap(out_trans_dims_vec[0], out_trans_dims_vec[axis]); - out->Resize(phi::make_ddim(out_trans_dims_vec)); + out->Resize(common::make_ddim(out_trans_dims_vec)); context.template Alloc(out); concat_functor(context, input_unbind, 0, &out_trans); TransCompute( diff --git a/paddle/phi/kernels/funcs/unsqueeze.h b/paddle/phi/kernels/funcs/unsqueeze.h index b15e781b25117b..a8fc8dc8495449 100644 --- a/paddle/phi/kernels/funcs/unsqueeze.h +++ b/paddle/phi/kernels/funcs/unsqueeze.h @@ -14,7 +14,7 @@ #pragma once -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" #include "paddle/phi/core/dense_tensor.h" // TODO(paddle-dev): Remove this file when we can call related Kernel directly @@ -100,7 +100,7 @@ inline DDim GetOutputSqueezeShape(const std::vector squeeze_dims, output_shape.push_back(in_dims[i]); } } - return phi::make_ddim(output_shape); + return common::make_ddim(output_shape); } inline DDim GetUnsqueezeShape(const std::vector unsqz_dims, @@ -149,13 +149,13 @@ inline DDim GetUnsqueezeShape(const std::vector unsqz_dims, } } - return phi::make_ddim(output_shape); + return common::make_ddim(output_shape); } inline const DenseTensor Unsqueeze(const DenseTensor& x, int axis = 0) { // don't copy data, only change the dims DenseTensor out(x); - std::vector out_shape = phi::vectorize(x.dims()); + std::vector out_shape = common::vectorize(x.dims()); if (axis >= 0) { auto index = (out_shape.begin() + axis); out_shape.insert(index, 1); @@ -163,7 +163,7 @@ inline const DenseTensor Unsqueeze(const DenseTensor& x, int axis = 0) { auto index = (out_shape.end() + axis + 1); out_shape.insert(index, 1); } - out.Resize(phi::make_ddim(out_shape)); + out.Resize(common::make_ddim(out_shape)); return out; } diff --git a/paddle/phi/kernels/funcs/values_vectors_functor.h b/paddle/phi/kernels/funcs/values_vectors_functor.h index 512155b94bfb39..0de31efaa19b7d 100644 --- a/paddle/phi/kernels/funcs/values_vectors_functor.h +++ b/paddle/phi/kernels/funcs/values_vectors_functor.h @@ -14,8 +14,8 @@ #pragma once #ifdef PADDLE_WITH_CUDA +#include "paddle/common/errors.h" #include "paddle/phi/backends/dynload/cusolver.h" -#include "paddle/phi/core/errors.h" #endif // PADDLE_WITH_CUDA #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/backends/gpu/gpu_context.h" @@ -281,16 +281,16 @@ struct MatrixEighFunctor { input.type() == phi::DataType::COMPLEX128) { lrwork = std::max(1, static_cast(rwork_opt)); - rwork_tensor.Resize(phi::make_ddim({lrwork})); + rwork_tensor.Resize(common::make_ddim({lrwork})); rwork_data = dev_ctx.template Alloc(&rwork_tensor); } DenseTensor iwork_tensor, work_tensor; - iwork_tensor.Resize(phi::make_ddim({liwork})); + iwork_tensor.Resize(common::make_ddim({liwork})); int *iwork_data = dev_ctx.template Alloc(&iwork_tensor); - work_tensor.Resize(phi::make_ddim({lwork})); + work_tensor.Resize(common::make_ddim({lwork})); T *work_data = dev_ctx.template Alloc(&work_tensor); for (auto i = 0; i < batch_size; i++) { diff --git a/paddle/phi/kernels/funcs/vol2col.cc b/paddle/phi/kernels/funcs/vol2col.cc index b5d6086feda770..b7c6a1fd6c1e83 100644 --- a/paddle/phi/kernels/funcs/vol2col.cc +++ b/paddle/phi/kernels/funcs/vol2col.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include "paddle/phi/kernels/funcs/vol2col.h" #include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/core/enforce.h" namespace phi { namespace funcs { diff --git a/paddle/phi/kernels/funcs/vol2col.h b/paddle/phi/kernels/funcs/vol2col.h index 283ab3ea065635..bd909927952d04 100644 --- a/paddle/phi/kernels/funcs/vol2col.h +++ b/paddle/phi/kernels/funcs/vol2col.h @@ -16,8 +16,8 @@ limitations under the License. */ #include +#include "paddle/common/errors.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/errors.h" namespace phi { namespace funcs { diff --git a/paddle/phi/kernels/fusion/cpu/distributed_fused_lamb_init_kernel.cc b/paddle/phi/kernels/fusion/cpu/distributed_fused_lamb_init_kernel.cc index 3cb37ccf2ed89d..bbcb61bd454765 100644 --- a/paddle/phi/kernels/fusion/cpu/distributed_fused_lamb_init_kernel.cc +++ b/paddle/phi/kernels/fusion/cpu/distributed_fused_lamb_init_kernel.cc @@ -13,7 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/distributed_fused_lamb_init_kernel.h" -#include "paddle/phi/core/errors.h" +#include "paddle/common/errors.h" #include "paddle/phi/core/kernel_registry.h" namespace phi { diff --git a/paddle/phi/kernels/fusion/cpu/fused_softmax_mask_upper_triangle_kernel.cc b/paddle/phi/kernels/fusion/cpu/fused_softmax_mask_upper_triangle_kernel.cc index b9ded16d1b0958..6257e9c451aaa7 100644 --- a/paddle/phi/kernels/fusion/cpu/fused_softmax_mask_upper_triangle_kernel.cc +++ b/paddle/phi/kernels/fusion/cpu/fused_softmax_mask_upper_triangle_kernel.cc @@ -13,7 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/fused_softmax_mask_upper_triangle_kernel.h" -#include "paddle/phi/core/errors.h" +#include "paddle/common/errors.h" #include "paddle/phi/core/kernel_registry.h" namespace phi { diff --git a/paddle/phi/kernels/fusion/cpu/fusion_gru_kernel.cc b/paddle/phi/kernels/fusion/cpu/fusion_gru_kernel.cc index 3b140091fc69c4..deac38bf8dbab9 100644 --- a/paddle/phi/kernels/fusion/cpu/fusion_gru_kernel.cc +++ b/paddle/phi/kernels/fusion/cpu/fusion_gru_kernel.cc @@ -16,9 +16,9 @@ #include #include +#include "paddle/common/errors.h" #include "paddle/phi/common/float16.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/blas/blas.h" @@ -33,7 +33,7 @@ namespace fusion { auto x_lod = x.lod(); \ auto x_dims = x.dims(); /* T x M*/ \ auto x_mat_dims = (x_dims.size() == 3 && x_dims[1] == 1) \ - ? phi::flatten_to_2d(x_dims, 1) \ + ? common::flatten_to_2d(x_dims, 1) \ : x_dims; \ auto wh_dims = weight_h.dims(); /* D x 3D*/ \ const int total_T = x_mat_dims[0]; \ diff --git a/paddle/phi/kernels/fusion/cpu/fusion_repeated_fc_relu_kernel.cc b/paddle/phi/kernels/fusion/cpu/fusion_repeated_fc_relu_kernel.cc index b65cf71bf93859..b52871620e30a2 100644 --- a/paddle/phi/kernels/fusion/cpu/fusion_repeated_fc_relu_kernel.cc +++ b/paddle/phi/kernels/fusion/cpu/fusion_repeated_fc_relu_kernel.cc @@ -51,7 +51,7 @@ void FusionRepeatedFCReluKernel(const Context& dev_ctx, DenseTensor* out) { int weight_sz = static_cast(w.size()); - auto i_dims = phi::vectorize(x.dims()); + auto i_dims = common::vectorize(x.dims()); const auto& w_dims = w[0]->dims(); phi::jit::matmul_attr_t attr; attr.m = i_dims[0]; diff --git a/paddle/phi/kernels/fusion/cpu/fusion_seqconv_eltadd_relu_kernel.cc b/paddle/phi/kernels/fusion/cpu/fusion_seqconv_eltadd_relu_kernel.cc index fbe2ea8d12bc27..4ff18849316d8a 100644 --- a/paddle/phi/kernels/fusion/cpu/fusion_seqconv_eltadd_relu_kernel.cc +++ b/paddle/phi/kernels/fusion/cpu/fusion_seqconv_eltadd_relu_kernel.cc @@ -15,9 +15,9 @@ #include // for min, max #include +#include "paddle/common/errors.h" #include "paddle/phi/common/float16.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/blas/blas.h" @@ -37,8 +37,8 @@ void FusionSeqConvEltAddReluKernel(const Context& dev_ctx, DenseTensor* out, DenseTensor* col_mat) { auto x_lod = x.lod(); - auto x_dims = phi::vectorize(x.dims()); - auto w_dims = phi::vectorize(filter.dims()); + auto x_dims = common::vectorize(x.dims()); + auto w_dims = common::vectorize(filter.dims()); PADDLE_ENFORCE_EQ( bias.numel(), w_dims[1], diff --git a/paddle/phi/kernels/fusion/cpu/fusion_seqexpand_concat_fc_kernel.cc b/paddle/phi/kernels/fusion/cpu/fusion_seqexpand_concat_fc_kernel.cc index d5eb7894455f1d..d96940a8c1c2f6 100644 --- a/paddle/phi/kernels/fusion/cpu/fusion_seqexpand_concat_fc_kernel.cc +++ b/paddle/phi/kernels/fusion/cpu/fusion_seqexpand_concat_fc_kernel.cc @@ -14,10 +14,10 @@ #include +#include "paddle/common/errors.h" #include "paddle/phi/backends/cpu/cpu_info.h" #include "paddle/phi/common/float16.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/blas/blas.h" diff --git a/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention.cu b/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention.cu index cc4fd467dfc20b..f9c3cb0e7c7610 100644 --- a/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention.cu +++ b/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention.cu @@ -14,8 +14,8 @@ #include "glog/logging.h" +#include "paddle/common/errors.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/autogen/memory_efficient_attention.h" #include "paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/gemm_kernel_utils.h" diff --git a/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/generate_variable_forward_kernels.py b/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/generate_variable_forward_kernels.py index 07e710e52d206a..ac68a611539d20 100644 --- a/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/generate_variable_forward_kernels.py +++ b/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/generate_variable_forward_kernels.py @@ -214,7 +214,7 @@ def parse_args(): cutlass::Status status; size_t workspace_size = fmha.get_workspace_size(args); phi::DenseTensor workspace; - workspace.Resize(phi::make_ddim({{static_cast(workspace_size)}})); + workspace.Resize(common::make_ddim({{static_cast(workspace_size)}})); ctx.template Alloc(&workspace); status = fmha.initialize(args, workspace.data()); if (status != cutlass::Status::kSuccess) {{ diff --git a/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention_utils.h b/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention_utils.h index 65dfb1bc8eced4..43afbdb55707dd 100644 --- a/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention_utils.h +++ b/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention_utils.h @@ -14,7 +14,7 @@ #pragma once -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" namespace phi { namespace fusion { diff --git a/paddle/phi/kernels/fusion/gpu/cast_with_ptr.h b/paddle/phi/kernels/fusion/gpu/cast_with_ptr.h index 5ae8aed256ccdd..a4fdcb10e1b189 100644 --- a/paddle/phi/kernels/fusion/gpu/cast_with_ptr.h +++ b/paddle/phi/kernels/fusion/gpu/cast_with_ptr.h @@ -14,9 +14,9 @@ #pragma once +#include "paddle/common/ddim.h" #include "paddle/phi/api/include/tensor.h" #include "paddle/phi/backends/gpu/gpu_launch_config.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/enforce.h" #include "paddle/phi/kernels/funcs/elementwise_base.h" @@ -37,9 +37,9 @@ static void VecCastKernel(const phi::GPUContext &ctx, auto main_offset = n / (VecSize * thread) * VecSize * thread; auto stream = ctx.stream(); using FunctorT = CastFunctor; - phi::Array in_arr; + Array in_arr; in_arr[0] = reinterpret_cast(x); - phi::Array<_ptr_ OutT *, 1> out_arr; + Array<_ptr_ OutT *, 1> out_arr; out_arr[0] = y; phi::funcs::VectorizedElementwiseKernel <<>>( diff --git a/paddle/phi/kernels/fusion/gpu/conv_fusion_kernel.cu b/paddle/phi/kernels/fusion/gpu/conv_fusion_kernel.cu index 3fd94b8e3b46ee..e8127fbdae3993 100644 --- a/paddle/phi/kernels/fusion/gpu/conv_fusion_kernel.cu +++ b/paddle/phi/kernels/fusion/gpu/conv_fusion_kernel.cu @@ -23,12 +23,12 @@ #include "glog/logging.h" +#include "paddle/common/ddim.h" #include "paddle/phi/backends/context_pool.h" #include "paddle/phi/backends/dynload/cudnn.h" #include "paddle/phi/backends/gpu/cuda/cudnn_desc.h" #include "paddle/phi/common/backend.h" #include "paddle/phi/common/data_type.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/kernels/impl/conv_cudnn_impl.h" #include "paddle/utils/optional.h" @@ -221,7 +221,7 @@ class CudnnConvDescManager { phi::UpdatePaddingAndDilation(&paddings, &dilations, padding_algorithm, - make_ddim(in_data_dims), + common::make_ddim(in_data_dims), strides, ksize); @@ -400,8 +400,8 @@ void Conv2dFusionKernel(const Context& ctx, paddings_t, dilations_t, padding_algorithm, - phi::vectorize(input.dims()), - phi::vectorize(filter.dims()), + common::vectorize(input.dims()), + common::vectorize(filter.dims()), strides, compute_format); @@ -409,7 +409,7 @@ void Conv2dFusionKernel(const Context& ctx, const int input_rank = input.dims().size(); auto unsys_pad_process = [&](const std::vector& new_input_shape_vec, const std::vector& input_pad) { - DDim new_input_shape(make_ddim(new_input_shape_vec)); + DDim new_input_shape(common::make_ddim(new_input_shape_vec)); transformed_input.Resize(new_input_shape); ctx.template Alloc(&transformed_input); @@ -528,10 +528,10 @@ void Conv2dFusionKernel(const Context& ctx, }; auto cudnn_cache_info = CudnnConvDescManager::Instance()->GetCudnnCacheInfo( - phi::vectorize(transformed_input.dims()), - phi::vectorize(filter.dims()), + common::vectorize(transformed_input.dims()), + common::vectorize(filter.dims()), b_dims, - phi::vectorize(output->dims()), + common::vectorize(output->dims()), conv_attr_cache->paddings, strides, conv_attr_cache->dilations, diff --git a/paddle/phi/kernels/fusion/gpu/distributed_fused_lamb_init_kernel.cu b/paddle/phi/kernels/fusion/gpu/distributed_fused_lamb_init_kernel.cu index 3ae7f0682bc75b..7182a13bcf0fcd 100644 --- a/paddle/phi/kernels/fusion/gpu/distributed_fused_lamb_init_kernel.cu +++ b/paddle/phi/kernels/fusion/gpu/distributed_fused_lamb_init_kernel.cu @@ -275,7 +275,7 @@ static void ShareBufferForNonInitedTensor(DenseTensor *origin, DDim fused_out_dim = fused_out->dims(); auto fused_out_numel = fused_out->numel(); - auto numel = phi::product(dims); + auto numel = common::product(dims); *origin = fused_out->Resize({fused_out_numel}) .Slice(numel_offset, numel + numel_offset); origin->Resize(dims); diff --git a/paddle/phi/kernels/fusion/gpu/fused_embedding_eltwise_layernorm_kernel.cu b/paddle/phi/kernels/fusion/gpu/fused_embedding_eltwise_layernorm_kernel.cu index 71e778ca6574e4..1833788a6b8c97 100644 --- a/paddle/phi/kernels/fusion/gpu/fused_embedding_eltwise_layernorm_kernel.cu +++ b/paddle/phi/kernels/fusion/gpu/fused_embedding_eltwise_layernorm_kernel.cu @@ -15,11 +15,11 @@ #include #include +#include "paddle/common/errors.h" #include "paddle/phi/common/float16.h" #include "paddle/phi/common/memory_utils.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/emb_eltwise_layer_norm_functor.h" diff --git a/paddle/phi/kernels/fusion/gpu/fused_fc_elementwise_layernorm_kernel.cu b/paddle/phi/kernels/fusion/gpu/fused_fc_elementwise_layernorm_kernel.cu index f7f8faa329d60f..2d3b2938a09a07 100644 --- a/paddle/phi/kernels/fusion/gpu/fused_fc_elementwise_layernorm_kernel.cu +++ b/paddle/phi/kernels/fusion/gpu/fused_fc_elementwise_layernorm_kernel.cu @@ -27,11 +27,11 @@ namespace cub = hipcub; #include #endif +#include "paddle/common/errors.h" #include "paddle/phi/backends/gpu/gpu_device_function.h" #include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/common/float16.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/blas/blas.h" @@ -420,7 +420,7 @@ void FusedFCElementwiseLayerNormKernel( auto w_dims = w.dims(); int N = w_dims[1]; int K = w_dims[0]; - int M = phi::product(x.dims()) / K; + int M = common::product(x.dims()) / K; const T* x_data = x.data(); const T* w_data = w.data(); diff --git a/paddle/phi/kernels/fusion/gpu/fused_scale_bias_add_relu_kernel.cu b/paddle/phi/kernels/fusion/gpu/fused_scale_bias_add_relu_kernel.cu index ff5edd689f7f3b..6bb7950a823aba 100644 --- a/paddle/phi/kernels/fusion/gpu/fused_scale_bias_add_relu_kernel.cu +++ b/paddle/phi/kernels/fusion/gpu/fused_scale_bias_add_relu_kernel.cu @@ -77,8 +77,8 @@ void FusedScaleBiasAddReluKernel(const Context& dev_ctx, auto tensor_format_math = CUDNN_DATA_FLOAT; auto compute_dtype = CUDNN_DATA_FLOAT; - auto dim_x = - phi::backends::gpu::TransformDimOrder(phi::vectorize(x1.dims())); + auto dim_x = phi::backends::gpu::TransformDimOrder( + common::vectorize(x1.dims())); std::vector dim_c(dim_x.size(), 1); dim_c[1] = dim_x[1]; // [1, C, 1, 1] diff --git a/paddle/phi/kernels/fusion/gpu/fused_scale_bias_relu_conv_bn_kernel.cu b/paddle/phi/kernels/fusion/gpu/fused_scale_bias_relu_conv_bn_kernel.cu index c0d35cbf718abc..04ed9ebaf146bf 100644 --- a/paddle/phi/kernels/fusion/gpu/fused_scale_bias_relu_conv_bn_kernel.cu +++ b/paddle/phi/kernels/fusion/gpu/fused_scale_bias_relu_conv_bn_kernel.cu @@ -85,7 +85,7 @@ void FusedScaleBiasReluConvBnstatsImpl( auto filter_dims = w_transformed.dims(); DDim in_data_dims = slice_ddim(in_dims, 1, in_dims.size() - 1); DDim filter_data_dims = slice_ddim(filter_dims, 1, filter_dims.size() - 1); - std::vector ksize = phi::vectorize(filter_data_dims); + std::vector ksize = common::vectorize(filter_data_dims); phi::UpdatePaddingAndDilation(&paddings_vec, &dilations_vec, padding_algorithm, @@ -122,12 +122,12 @@ void FusedScaleBiasReluConvBnstatsImpl( auto compute_dtype = CUDNN_DATA_FLOAT; // get dims in CUDNN manner: [N, C, H, W] - auto dim_x = - phi::backends::gpu::TransformDimOrder(phi::vectorize(in_dims)); + auto dim_x = phi::backends::gpu::TransformDimOrder( + common::vectorize(in_dims)); auto dim_filt = phi::backends::gpu::TransformDimOrder( - phi::vectorize(filter_dims)); + common::vectorize(filter_dims)); auto dim_y = phi::backends::gpu::TransformDimOrder( - phi::vectorize(output->dims())); + common::vectorize(output->dims())); std::vector dim_scale(dim_x.size(), 1); dim_scale[1] = dim_x[1]; // [1, C, 1, 1] std::vector dim_sum(dim_x.size(), 1); // [1, K, 1, 1] @@ -323,7 +323,7 @@ void BNFinalizeImpl(const Context& dev_ctx, auto tensor_format = phi::backends::gpu::ToCudnnDataType(eq_scale->dtype()); auto compute_dtype = CUDNN_DATA_FLOAT; // create tensor descriptors - auto dim_input = phi::vectorize(sum_tensor.dims()); + auto dim_input = common::vectorize(sum_tensor.dims()); std::vector dim_c = {1, dim_input[0], 1, 1}; // [1, C, 1, 1] std::vector dim_scalar = {1, 1, 1, 1}; std::vector stride_scalar = {1, 1, 1, 1}; @@ -555,7 +555,7 @@ void FusedScaleBiasReluConvBnKernel(const Context& dev_ctx, if (accumulation_count == 0) { // dim_out = [N, H, W, C] // accumulation_count = N * H * W - auto dim_out = phi::vectorize(out->dims()); + auto dim_out = common::vectorize(out->dims()); accumulation_count = dim_out[0] * dim_out[1] * dim_out[2]; } diff --git a/paddle/phi/kernels/fusion/gpu/fused_softmax_mask_upper_triangle_grad_kernel.cu b/paddle/phi/kernels/fusion/gpu/fused_softmax_mask_upper_triangle_grad_kernel.cu index 6c7fe36d364576..a08af5a5b89581 100644 --- a/paddle/phi/kernels/fusion/gpu/fused_softmax_mask_upper_triangle_grad_kernel.cu +++ b/paddle/phi/kernels/fusion/gpu/fused_softmax_mask_upper_triangle_grad_kernel.cu @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/common/errors.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/core/generator.h" #include "paddle/phi/core/kernel_registry.h" diff --git a/paddle/phi/kernels/fusion/gpu/fused_softmax_mask_upper_triangle_kernel.cu b/paddle/phi/kernels/fusion/gpu/fused_softmax_mask_upper_triangle_kernel.cu index 30e5599aac2363..a0b7cf5b2689ce 100644 --- a/paddle/phi/kernels/fusion/gpu/fused_softmax_mask_upper_triangle_kernel.cu +++ b/paddle/phi/kernels/fusion/gpu/fused_softmax_mask_upper_triangle_kernel.cu @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/common/errors.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/core/generator.h" #include "paddle/phi/core/kernel_registry.h" diff --git a/paddle/phi/kernels/fusion/gpu/fusion_transpose_flatten_concat_kernel.cu b/paddle/phi/kernels/fusion/gpu/fusion_transpose_flatten_concat_kernel.cu index b71f814fd4c985..422b1eade55769 100644 --- a/paddle/phi/kernels/fusion/gpu/fusion_transpose_flatten_concat_kernel.cu +++ b/paddle/phi/kernels/fusion/gpu/fusion_transpose_flatten_concat_kernel.cu @@ -15,10 +15,10 @@ #include #include +#include "paddle/common/errors.h" #include "paddle/phi/backends/gpu/gpu_dnn.h" #include "paddle/phi/common/float16.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/common_shape.h" diff --git a/paddle/phi/kernels/fusion/gpu/multihead_matmul_kernel.cu b/paddle/phi/kernels/fusion/gpu/multihead_matmul_kernel.cu index 06f28d387b3b33..c970f50eb117ad 100644 --- a/paddle/phi/kernels/fusion/gpu/multihead_matmul_kernel.cu +++ b/paddle/phi/kernels/fusion/gpu/multihead_matmul_kernel.cu @@ -15,9 +15,9 @@ #include #include +#include "paddle/common/errors.h" #include "paddle/phi/common/float16.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/blas/blas.h" @@ -349,9 +349,9 @@ void MultiheadMatmulKernel(const Context &dev_ctx, phi::DenseTensor temp_out_tensor; auto temp_out_dims = - phi::make_ddim({batch, seq_len, 3, head_number, head_size}); + common::make_ddim({batch, seq_len, 3, head_number, head_size}); temp_out_tensor.Resize( - {batch * seq_len, phi::product(temp_out_dims) / (batch * seq_len)}); + {batch * seq_len, common::product(temp_out_dims) / (batch * seq_len)}); auto *temp_out_data = dev_ctx.template Alloc( &temp_out_tensor, temp_out_tensor.numel() * sizeof(T)); diff --git a/paddle/phi/kernels/fusion/gpu/skip_layernorm_kernel.cu b/paddle/phi/kernels/fusion/gpu/skip_layernorm_kernel.cu index c180311755cd97..1bb5c5dfb6301c 100644 --- a/paddle/phi/kernels/fusion/gpu/skip_layernorm_kernel.cu +++ b/paddle/phi/kernels/fusion/gpu/skip_layernorm_kernel.cu @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/phi/core/errors.h" +#include "paddle/common/errors.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/skip_layernorm_functor.h" diff --git a/paddle/phi/kernels/fusion/onednn/fc_kernel.cc b/paddle/phi/kernels/fusion/onednn/fc_kernel.cc index 368a1a616eab84..6eed95b9b1c9a4 100644 --- a/paddle/phi/kernels/fusion/onednn/fc_kernel.cc +++ b/paddle/phi/kernels/fusion/onednn/fc_kernel.cc @@ -13,10 +13,10 @@ // limitations under the License. #include +#include "paddle/common/errors.h" #include "paddle/phi/backends/onednn/onednn_reuse.h" #include "paddle/phi/core/compat/convert_utils.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/core/expect.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/utils/data_type.h" @@ -84,8 +84,8 @@ class FCOneDNNHandler dev_ctx_(dev_ctx) { this->memory_key_ = dev_ctx.GetInputsName("W")[0]; - auto x_vec_dims = phi::vectorize(x->dims()); - auto weights_vec_dims = phi::vectorize(weights->dims()); + auto x_vec_dims = common::vectorize(x->dims()); + auto weights_vec_dims = common::vectorize(weights->dims()); int MB = 1; for (int i = 0; i < in_num_col_dims; ++i) { @@ -382,7 +382,7 @@ void RecomputeOutputDims(const int in_num_col_dims, output_dims, in_num_col_dims, padding_weights); - out->Resize(phi::make_ddim(output_dims)); + out->Resize(common::make_ddim(output_dims)); out->set_lod(x->lod()); } @@ -436,8 +436,8 @@ void RunKernel(const phi::OneDNNContext& dev_ctx, phi::funcs::CreateKey(dev_ctx, dev_ctx.GetInputsName("Input")[0], dev_ctx.GetInputsName("W")[0], - phi::vectorize(input.dims()), - phi::vectorize(w.dims()))); + common::vectorize(input.dims()), + common::vectorize(w.dims()))); auto inner_product_cache = std::static_pointer_cast(dev_ctx.GetBlob(cache_key)); @@ -547,7 +547,7 @@ void RunKernel(const phi::OneDNNContext& dev_ctx, } const auto out_md = - dst_memory_p->get_desc().reshape(phi::vectorize(out->dims())); + dst_memory_p->get_desc().reshape(common::vectorize(out->dims())); if (dev_ctx.HasDnnAttr("fused_reshape2_shape")) { phi::funcs::SetOutMemDescWithReshape2FuseSupport( diff --git a/paddle/phi/kernels/fusion/onednn/fused_conv_kernel.cc b/paddle/phi/kernels/fusion/onednn/fused_conv_kernel.cc index 6cbf2c2c05f7dc..9c19c9a202c161 100644 --- a/paddle/phi/kernels/fusion/onednn/fused_conv_kernel.cc +++ b/paddle/phi/kernels/fusion/onednn/fused_conv_kernel.cc @@ -145,7 +145,7 @@ KernelKey ConvGetKernelTypeForVar(const GetKernelTypeForVarContext* ctx) { (tensor.layout() != phi::DataLayout::ONEDNN)) { auto it = attrs.find("data_format"); const std::string data_format = PADDLE_GET_CONST(std::string, it->second); - auto dl = phi::StringToDataLayout(data_format); + auto dl = common::StringToDataLayout(data_format); // Some models may have intentionally set "AnyLayout" for conv // op. Treat this as NCHW (default data_format value) if (dl != phi::DataLayout::kAnyLayout) { diff --git a/paddle/phi/kernels/fusion/onednn/fused_matmul_kernel.cc b/paddle/phi/kernels/fusion/onednn/fused_matmul_kernel.cc index 86ef5b368476ad..1f2c0766f95e40 100644 --- a/paddle/phi/kernels/fusion/onednn/fused_matmul_kernel.cc +++ b/paddle/phi/kernels/fusion/onednn/fused_matmul_kernel.cc @@ -369,10 +369,11 @@ void ExecuteFusedMatmul(const OneDNNContext &dev_ctx, if (is_output_fused && !funcs::is_int8()) { auto permuted_md = dst_memory_p->get_desc().permute_axes(fused_transpose_Out); - out->set_mem_desc(permuted_md.reshape(vectorize(out->dims()))); - } else { out->set_mem_desc( - dst_memory_p->get_desc().reshape(vectorize(out->dims()))); + permuted_md.reshape(common::vectorize(out->dims()))); + } else { + out->set_mem_desc(dst_memory_p->get_desc().reshape( + common::vectorize(out->dims()))); } } @@ -380,9 +381,9 @@ std::vector GetInputShape(DDim input_dims, std::vector shape, std::vector axis) { if (!shape.empty() && !axis.empty()) { - return vectorize(input_dims.reshape(shape).transpose(axis)); + return common::vectorize(input_dims.reshape(shape).transpose(axis)); } - return vectorize(input_dims); + return common::vectorize(input_dims); } void CalculateMatrixDims(const std::vector &x_dims, @@ -413,7 +414,7 @@ void CalculateMatrixDims(const std::vector &x_dims, } if (!is_output_fused && x_dims.size() > 2 && y_dims.size() > 2) { - auto out_dims = vectorize(out->dims()); + auto out_dims = common::vectorize(out->dims()); for (size_t i = 0; i < (*x_bd_dims).size() - 2; ++i) { PADDLE_ENFORCE_EQ( (*x_bd_dims)[i] == (*y_bd_dims)[i] || (*x_bd_dims)[i] == 1 || @@ -429,7 +430,7 @@ void CalculateMatrixDims(const std::vector &x_dims, (*y_bd_dims)[i])); (out_dims)[i] = std::max((*x_bd_dims)[i], (*y_bd_dims)[i]); } - out->Resize(make_ddim((out_dims))); + out->Resize(common::make_ddim((out_dims))); } } diff --git a/paddle/phi/kernels/fusion/onednn/fused_transpose_kernel.cc b/paddle/phi/kernels/fusion/onednn/fused_transpose_kernel.cc index 964263424f0973..a7f9e49e325603 100644 --- a/paddle/phi/kernels/fusion/onednn/fused_transpose_kernel.cc +++ b/paddle/phi/kernels/fusion/onednn/fused_transpose_kernel.cc @@ -48,7 +48,7 @@ void SetInMemDescWithSqueeze2FuseSupport( } in->set_mem_desc(in_md.reshape(squeezed_op_tz)); - in->Resize(make_ddim(squeezed_op_tz)); + in->Resize(common::make_ddim(squeezed_op_tz)); } template @@ -76,7 +76,7 @@ void FusedTransposeKernel(const Context& dev_ctx, formated_axis[i] = axis[i] + axis_size; } } - auto dims = phi::vectorize(x_dims); + auto dims = common::vectorize(x_dims); std::rotate(dims.begin() + 1, dims.begin() + 2, dims.end()); x_dims = x_dims.reshape(dims); @@ -107,7 +107,7 @@ void FusedTransposeKernel(const Context& dev_ctx, return; } - auto x_vec_dims = vectorize(x.dims()); + auto x_vec_dims = common::vectorize(x.dims()); auto x_type = funcs::ToOneDNNDataType(x.dtype()); dnnl::primitive_attr attrs; @@ -188,7 +188,7 @@ void FusedTransposeKernel(const Context& dev_ctx, fused_reshape2_shape, out, out_md); } else if (!fused_squeeze2_axes.empty()) { out->set_mem_desc(out_md); - out->Resize(make_ddim(out_md.get_dims())); + out->Resize(common::make_ddim(out_md.get_dims())); } else { out->set_mem_desc(out_md); } diff --git a/paddle/phi/kernels/fusion/onednn/fusion_gru_kernel.cc b/paddle/phi/kernels/fusion/onednn/fusion_gru_kernel.cc index e3fa939aad7537..8e7fe89ec1f7f8 100644 --- a/paddle/phi/kernels/fusion/onednn/fusion_gru_kernel.cc +++ b/paddle/phi/kernels/fusion/onednn/fusion_gru_kernel.cc @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/common/errors.h" #include "paddle/phi/backends/onednn/onednn_reuse.h" #include "paddle/phi/core/compat/convert_utils.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/core/expect.h" #include "paddle/phi/core/utils/data_type.h" @@ -451,12 +451,12 @@ void RunKernel(const phi::OneDNNContext& dev_ctx, auto x_dims = x.dims(); auto x_mat_dims = (x_dims.size() == 3 && x_dims[1] == 1) - ? phi::flatten_to_2d(x_dims, 1) + ? common::flatten_to_2d(x_dims, 1) : x_dims; // Get tensor dimensions - const auto x_mat_dims_vec = phi::vectorize(x_mat_dims); - const auto weight_h_dims = phi::vectorize(weight_h.dims()); + const auto x_mat_dims_vec = common::vectorize(x_mat_dims); + const auto weight_h_dims = common::vectorize(weight_h.dims()); const auto& input_lod = x.lod()[0]; // Calculate RNN dimensions diff --git a/paddle/phi/kernels/fusion/xpu/add_act_xpu_kernel.cc b/paddle/phi/kernels/fusion/xpu/add_act_xpu_kernel.cc index ca221cbc7f4129..5a06ba1422fc03 100644 --- a/paddle/phi/kernels/fusion/xpu/add_act_xpu_kernel.cc +++ b/paddle/phi/kernels/fusion/xpu/add_act_xpu_kernel.cc @@ -37,8 +37,8 @@ void AddActXPUKernel(const Context& ctx, y_max.get_ptr() == nullptr ? nullptr : y_max.get_ptr()->data(); auto* out_data = reinterpret_cast(ctx.template Alloc(out)); - std::vector x_shape = phi::vectorize(x.dims()); - std::vector y_shape = phi::vectorize(y.dims()); + std::vector x_shape = common::vectorize(x.dims()); + std::vector y_shape = common::vectorize(y.dims()); xpu::Activation_t act(static_cast(act_type)); int r = xpu::add_activation_fusion( // TX/TY/TZ/TID diff --git a/paddle/phi/kernels/fusion/xpu/add_layernorm_xpu_kernel.cc b/paddle/phi/kernels/fusion/xpu/add_layernorm_xpu_kernel.cc index e110abca728a08..3cb98906218abb 100644 --- a/paddle/phi/kernels/fusion/xpu/add_layernorm_xpu_kernel.cc +++ b/paddle/phi/kernels/fusion/xpu/add_layernorm_xpu_kernel.cc @@ -60,7 +60,7 @@ static phi::DDim BroadCastInferShape(const DDim x_dims, max_dim, axis); - return phi::make_ddim(out_dims_array); + return common::make_ddim(out_dims_array); } return x_dims; } @@ -84,7 +84,7 @@ void AddLayernormXPUKernel(const Context& ctx, auto x_dims = x.dims(); auto y_dims = y.dims(); auto out_dims = BroadCastInferShape(x_dims, y_dims, -1); - auto layer_norm_x_mat_dims = phi::flatten_to_2d(out_dims, begin_norm_axis); + auto layer_norm_x_mat_dims = common::flatten_to_2d(out_dims, begin_norm_axis); int64_t m = layer_norm_x_mat_dims[0]; int64_t n = layer_norm_x_mat_dims[1]; diff --git a/paddle/phi/kernels/fusion/xpu/bn_act_xpu_kernel.cc b/paddle/phi/kernels/fusion/xpu/bn_act_xpu_kernel.cc index b14cc2e85fab21..81e6e670933628 100644 --- a/paddle/phi/kernels/fusion/xpu/bn_act_xpu_kernel.cc +++ b/paddle/phi/kernels/fusion/xpu/bn_act_xpu_kernel.cc @@ -33,7 +33,7 @@ void BNActXPUKernel(const Context& dev_ctx, int act_type, DenseTensor* y) { using XPUType = typename XPUTypeTrait::Type; - const auto data_layout = phi::StringToDataLayout(data_layout_str); + const auto data_layout = common::StringToDataLayout(data_layout_str); PADDLE_ENFORCE_EQ(data_layout_str == "NCHW" || data_layout_str == "NHWC", true, phi::errors::InvalidArgument( diff --git a/paddle/phi/kernels/fusion/xpu/conv2d_xpu_kernel.cc b/paddle/phi/kernels/fusion/xpu/conv2d_xpu_kernel.cc index 6ba3d84b5eb0b8..aa5d4738aafa97 100644 --- a/paddle/phi/kernels/fusion/xpu/conv2d_xpu_kernel.cc +++ b/paddle/phi/kernels/fusion/xpu/conv2d_xpu_kernel.cc @@ -54,9 +54,10 @@ void Conv2dXPUKernelImpl(const Context& ctx, // update paddings and dilations accoring to padding_algorithm std::vector paddings_vec = paddings; std::vector dilations_vec = dilations; - DDim in_data_dims = phi::slice_ddim(input_dims, 2, input_dims.size()); - DDim filter_data_dims = phi::slice_ddim(filter_dims, 2, filter_dims.size()); - std::vector ksize = phi::vectorize(filter_data_dims); + DDim in_data_dims = common::slice_ddim(input_dims, 2, input_dims.size()); + DDim filter_data_dims = + common::slice_ddim(filter_dims, 2, filter_dims.size()); + std::vector ksize = common::vectorize(filter_data_dims); phi::UpdatePaddingAndDilation(&paddings_vec, &dilations_vec, padding_algorithm, diff --git a/paddle/phi/kernels/fusion/xpu/conv_transpose_xpu_kernel.cc b/paddle/phi/kernels/fusion/xpu/conv_transpose_xpu_kernel.cc index c443e109dc2b6b..58f40f3040f74c 100644 --- a/paddle/phi/kernels/fusion/xpu/conv_transpose_xpu_kernel.cc +++ b/paddle/phi/kernels/fusion/xpu/conv_transpose_xpu_kernel.cc @@ -48,7 +48,7 @@ void Conv2dTransposeXPUKernel(const Context& ctx, DDim in_data_dims = slice_ddim(x.dims(), 2, x.dims().size()); // hw DDim filter_data_dims = slice_ddim(filter.dims(), 2, filter.dims().size()); - std::vector ksize = vectorize(filter_data_dims); + std::vector ksize = common::vectorize(filter_data_dims); std::vector paddings_ = paddings; std::vector dilations_ = dilations; UpdatePaddingAndDilation( diff --git a/paddle/phi/kernels/fusion/xpu/fast_layernorm_xpu_kernel.cc b/paddle/phi/kernels/fusion/xpu/fast_layernorm_xpu_kernel.cc index 6170cd80faf9df..8012462e5c9bc8 100644 --- a/paddle/phi/kernels/fusion/xpu/fast_layernorm_xpu_kernel.cc +++ b/paddle/phi/kernels/fusion/xpu/fast_layernorm_xpu_kernel.cc @@ -28,7 +28,7 @@ void FastLayerNormXPUKernel(const Context& ctx, DenseTensor* out) { using XPUType = typename XPUTypeTrait::Type; const auto& x_dims = x.dims(); - auto matrix_dim = phi::flatten_to_2d(x_dims, begin_norm_axis); + auto matrix_dim = common::flatten_to_2d(x_dims, begin_norm_axis); int left = static_cast(matrix_dim[0]); int right = static_cast(matrix_dim[1]); const auto* x_data = x.data(); diff --git a/paddle/phi/kernels/fusion/xpu/fast_where_xpu_kernel.cc b/paddle/phi/kernels/fusion/xpu/fast_where_xpu_kernel.cc index 8404bcb92015b0..3a2def32fefd26 100644 --- a/paddle/phi/kernels/fusion/xpu/fast_where_xpu_kernel.cc +++ b/paddle/phi/kernels/fusion/xpu/fast_where_xpu_kernel.cc @@ -30,9 +30,9 @@ void FastWhereXPUKernel(const Context& ctx, auto* x_data = reinterpret_cast(x.data()); auto* y_data = reinterpret_cast(y.data()); auto* out_data = reinterpret_cast(ctx.template Alloc(out)); - auto condition_dims = phi::vectorize(condition.dims()); - auto x_dims = phi::vectorize(x.dims()); - auto y_dims = phi::vectorize(y.dims()); + auto condition_dims = common::vectorize(condition.dims()); + auto x_dims = common::vectorize(x.dims()); + auto y_dims = common::vectorize(y.dims()); PADDLE_ENFORCE_EQ( x_dims, y_dims, diff --git a/paddle/phi/kernels/fusion/xpu/fused_feedforward_grad_kernel.cc b/paddle/phi/kernels/fusion/xpu/fused_feedforward_grad_kernel.cc index 3448efca7c3ab1..29f74e8e1fe237 100644 --- a/paddle/phi/kernels/fusion/xpu/fused_feedforward_grad_kernel.cc +++ b/paddle/phi/kernels/fusion/xpu/fused_feedforward_grad_kernel.cc @@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/common/ddim.h" #include "paddle/phi/backends/xpu/enforce_xpu.h" #include "paddle/phi/backends/xpu/xpu_context.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/blas/blas.h" diff --git a/paddle/phi/kernels/fusion/xpu/fused_feedforward_kernel.cc b/paddle/phi/kernels/fusion/xpu/fused_feedforward_kernel.cc index 221305014190bd..dab55c1bbc10ae 100644 --- a/paddle/phi/kernels/fusion/xpu/fused_feedforward_kernel.cc +++ b/paddle/phi/kernels/fusion/xpu/fused_feedforward_kernel.cc @@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/common/ddim.h" #include "paddle/phi/backends/xpu/enforce_xpu.h" #include "paddle/phi/backends/xpu/xpu_context.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/blas/blas.h" diff --git a/paddle/phi/kernels/fusion/xpu/fused_multi_transformer_int8_xpu_kernel.cc b/paddle/phi/kernels/fusion/xpu/fused_multi_transformer_int8_xpu_kernel.cc index 87fb42c9e23b97..236e276cb937d3 100755 --- a/paddle/phi/kernels/fusion/xpu/fused_multi_transformer_int8_xpu_kernel.cc +++ b/paddle/phi/kernels/fusion/xpu/fused_multi_transformer_int8_xpu_kernel.cc @@ -294,7 +294,7 @@ void FusedMultiTransformerInt8XpuKernel( cache_kv_out[i]->ResizeAndAllocate(cache_kv_gather_dims); int64_t curr_index_len = gather_index_t->dims().size() == 0 ? 1 : gather_index_t->dims()[0]; - auto curr_xshape = phi::vectorize(cache_kv_dims); + auto curr_xshape = common::vectorize(cache_kv_dims); if (reinterpret_cast( ctx.template Alloc(cache_kv_out[i])) == cache_kv_data && curr_index_len < curr_xshape[gather_axis]) { @@ -339,7 +339,7 @@ void FusedMultiTransformerInt8XpuKernel( cache_kv_data, gather_index_t->data(), reinterpret_cast(cache_kv_gather_tensor.data()), - phi::vectorize(cache_kv_dims), + common::vectorize(cache_kv_dims), gather_index_t->dims().size() == 0 ? 1 : gather_index_t->dims()[0], gather_axis); @@ -349,7 +349,7 @@ void FusedMultiTransformerInt8XpuKernel( cache_kv_data, gather_index_t->data(), reinterpret_cast(cache_kv_gather_tensor.data()), - phi::vectorize(cache_kv_dims), + common::vectorize(cache_kv_dims), gather_index_t->dims().size() == 0 ? 1 : gather_index_t->dims()[0], gather_axis); @@ -376,7 +376,7 @@ void FusedMultiTransformerInt8XpuKernel( cache_kv_data, gather_index_t->data(), cache_kv_data, - phi::vectorize(cache_kv_dims), + common::vectorize(cache_kv_dims), gather_index_t->dims().size() == 0 ? 1 : gather_index_t->dims()[0], gather_axis, @@ -389,7 +389,7 @@ void FusedMultiTransformerInt8XpuKernel( cache_kv_data, gather_index_t->data(), cache_kv_data, - phi::vectorize(cache_kv_dims), + common::vectorize(cache_kv_dims), gather_index_t->dims().size() == 0 ? 1 : gather_index_t->dims()[0], gather_axis, diff --git a/paddle/phi/kernels/fusion/xpu/fused_multi_transformer_xpu_kernel.cc b/paddle/phi/kernels/fusion/xpu/fused_multi_transformer_xpu_kernel.cc index 879824668a5438..8c151e0257e0e0 100644 --- a/paddle/phi/kernels/fusion/xpu/fused_multi_transformer_xpu_kernel.cc +++ b/paddle/phi/kernels/fusion/xpu/fused_multi_transformer_xpu_kernel.cc @@ -262,7 +262,7 @@ void FusedMultiTransformerXpuKernel( cache_kv_data, gather_index_t->data(), reinterpret_cast(cache_kv_gather_tensor.data()), - phi::vectorize(cache_kv_dims), + common::vectorize(cache_kv_dims), gather_index_t->dims().size() == 0 ? 1 : gather_index_t->dims()[0], gather_axis); @@ -272,7 +272,7 @@ void FusedMultiTransformerXpuKernel( cache_kv_data, gather_index_t->data(), reinterpret_cast(cache_kv_gather_tensor.data()), - phi::vectorize(cache_kv_dims), + common::vectorize(cache_kv_dims), gather_index_t->dims().size() == 0 ? 1 : gather_index_t->dims()[0], gather_axis); @@ -292,7 +292,7 @@ void FusedMultiTransformerXpuKernel( cache_kv_data, gather_index_t->data(), cache_kv_data, - phi::vectorize(cache_kv_dims), + common::vectorize(cache_kv_dims), gather_index_t->dims().size() == 0 ? 1 : gather_index_t->dims()[0], gather_axis); @@ -302,7 +302,7 @@ void FusedMultiTransformerXpuKernel( cache_kv_data, gather_index_t->data(), cache_kv_data, - phi::vectorize(cache_kv_dims), + common::vectorize(cache_kv_dims), gather_index_t->dims().size() == 0 ? 1 : gather_index_t->dims()[0], gather_axis); diff --git a/paddle/phi/kernels/fusion/xpu/fused_softmax_mask_kernel.cc b/paddle/phi/kernels/fusion/xpu/fused_softmax_mask_kernel.cc index 3a8083b9945fb3..01a76f36557bcb 100644 --- a/paddle/phi/kernels/fusion/xpu/fused_softmax_mask_kernel.cc +++ b/paddle/phi/kernels/fusion/xpu/fused_softmax_mask_kernel.cc @@ -56,8 +56,8 @@ void FusedSoftmaxMaskKernel(const Context& dev_ctx, idx, mask_dim[idx])); } - std::vector x_shape = phi::vectorize(x.dims()); - std::vector mask_shape = phi::vectorize(mask.dims()); + std::vector x_shape = common::vectorize(x.dims()); + std::vector mask_shape = common::vectorize(mask.dims()); // int softmax_with_mask(Context* ctx, const T* x, const T* mask, T* y, const // std::vector& x_shape, const std::vector& mask_shape); diff --git a/paddle/phi/kernels/fusion/xpu/layer_norm_act_xpu_kernel.cc b/paddle/phi/kernels/fusion/xpu/layer_norm_act_xpu_kernel.cc index ead6959ba6debc..ec0bec2fbbe383 100644 --- a/paddle/phi/kernels/fusion/xpu/layer_norm_act_xpu_kernel.cc +++ b/paddle/phi/kernels/fusion/xpu/layer_norm_act_xpu_kernel.cc @@ -31,7 +31,7 @@ void LayerNormActXPUKernel(const Context& ctx, DenseTensor* y) { using XPUType = typename XPUTypeTrait::Type; const auto& x_dims = x.dims(); - auto matrix_dim = phi::flatten_to_2d(x_dims, begin_norm_axis); + auto matrix_dim = common::flatten_to_2d(x_dims, begin_norm_axis); int left = static_cast(matrix_dim[0]); int right = static_cast(matrix_dim[1]); const auto* x_data = x.data(); diff --git a/paddle/phi/kernels/fusion/xpu/yolo_box_xpu_kernel.cc b/paddle/phi/kernels/fusion/xpu/yolo_box_xpu_kernel.cc index 0164f8439bdae4..259726bf89094a 100644 --- a/paddle/phi/kernels/fusion/xpu/yolo_box_xpu_kernel.cc +++ b/paddle/phi/kernels/fusion/xpu/yolo_box_xpu_kernel.cc @@ -70,10 +70,11 @@ void YoloBoxXPUKernel(const Context& ctx, stride_data = stride.data(); anchor_grid_data = anchor_grid.data(); } - std::vector x_shape = phi::vectorize(x.dims()); - std::vector grid_shape = phi::vectorize(grid.dims()); - std::vector stride_shape = phi::vectorize(stride.dims()); - std::vector anchor_grid_shape = phi::vectorize(anchor_grid.dims()); + std::vector x_shape = common::vectorize(x.dims()); + std::vector grid_shape = common::vectorize(grid.dims()); + std::vector stride_shape = common::vectorize(stride.dims()); + std::vector anchor_grid_shape = + common::vectorize(anchor_grid.dims()); // yolo_box_coord only support fp32&&fp16 precision int r = xpu::yolo_box_coord( /* baidu::xpu::api::Context* ctx */ ctx.x_context(), diff --git a/paddle/phi/kernels/gpu/affine_grid_grad_kernel.cu b/paddle/phi/kernels/gpu/affine_grid_grad_kernel.cu index f42d13df86a7c0..d2833db851f77f 100644 --- a/paddle/phi/kernels/gpu/affine_grid_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/affine_grid_grad_kernel.cu @@ -40,7 +40,7 @@ struct Linspace { bool align_corners, DenseTensor* numbers, const phi::GPUContext& dev_ctx) { - numbers->Resize(phi::make_ddim({count})); + numbers->Resize(common::make_ddim({count})); T* number_data = dev_ctx.template Alloc(numbers); T slice = (end - start) / (T)(count - 1); if (!align_corners) { @@ -144,7 +144,7 @@ void AffineGridGrad4DCUDAKernel(const Context& dev_ctx, int w = 0; h = size_attr[2]; w = size_attr[3]; - theta_grad->Resize(phi::make_ddim({n, 2, 3})); + theta_grad->Resize(common::make_ddim({n, 2, 3})); T* theta_grad_data = dev_ctx.template Alloc(theta_grad); phi::funcs::SetConstant()( dev_ctx, theta_grad, static_cast(0)); @@ -199,7 +199,7 @@ void AffineGridGrad5DCUDAKernel(const Context& dev_ctx, d = size_attr[2]; h = size_attr[3]; w = size_attr[4]; - theta_grad->Resize(phi::make_ddim({n, 3, 4})); + theta_grad->Resize(common::make_ddim({n, 3, 4})); T* theta_grad_data = dev_ctx.template Alloc(theta_grad); phi::funcs::SetConstant()( dev_ctx, theta_grad, static_cast(0)); diff --git a/paddle/phi/kernels/gpu/affine_grid_kernel.cu b/paddle/phi/kernels/gpu/affine_grid_kernel.cu index d9d539ce28e23f..71220ba40700ea 100644 --- a/paddle/phi/kernels/gpu/affine_grid_kernel.cu +++ b/paddle/phi/kernels/gpu/affine_grid_kernel.cu @@ -39,7 +39,7 @@ struct Linspace { bool align_corners, DenseTensor* numbers, const phi::GPUContext& dev_ctx) { - numbers->Resize(phi::make_ddim({count})); + numbers->Resize(common::make_ddim({count})); T* number_data = dev_ctx.template Alloc(numbers); T slice = (end - start) / (T)(count - 1); if (!align_corners) { @@ -136,7 +136,7 @@ void AffineGrid4DCUDAKernel(const Context& dev_ctx, int w = 0; h = size_attr[2]; w = size_attr[3]; - output->Resize(phi::make_ddim({n, h, w, 2})); + output->Resize(common::make_ddim({n, h, w, 2})); T* out_data = dev_ctx.template Alloc(output); T h_step; @@ -186,7 +186,7 @@ void AffineGrid5DCUDAKernel(const Context& dev_ctx, d = size_attr[2]; h = size_attr[3]; w = size_attr[4]; - output->Resize(phi::make_ddim({n, d, h, w, 3})); + output->Resize(common::make_ddim({n, d, h, w, 3})); T* out_data = dev_ctx.template Alloc(output); T d_step; diff --git a/paddle/phi/kernels/gpu/arange_kernel.cu b/paddle/phi/kernels/gpu/arange_kernel.cu index 3c793e106f049b..10905ff89e18e9 100644 --- a/paddle/phi/kernels/gpu/arange_kernel.cu +++ b/paddle/phi/kernels/gpu/arange_kernel.cu @@ -14,12 +14,12 @@ #include "paddle/phi/kernels/arange_kernel.h" +#include "paddle/common/errors.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/common/amp_type_traits.h" #include "paddle/phi/common/bfloat16.h" #include "paddle/phi/common/float16.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/range_function.h" @@ -47,7 +47,7 @@ void ArangeTensorKernel(const Context& dev_ctx, int64_t size = 0; phi::funcs::GetSize(start_value, end_value, step_value, &size); - out->Resize(phi::make_ddim({size})); + out->Resize(common::make_ddim({size})); T* out_data = dev_ctx.template Alloc(out); auto stream = dev_ctx.stream(); @@ -68,7 +68,7 @@ void ArangeNullaryKernel(const Context& dev_ctx, DenseTensor* out) { int64_t size = 0; phi::funcs::GetSize(start_value, end_value, step_value, &size); - out->Resize(phi::make_ddim({size})); + out->Resize(common::make_ddim({size})); T* out_data = dev_ctx.template Alloc(out); auto stream = dev_ctx.stream(); diff --git a/paddle/phi/kernels/gpu/arg_min_max_kernel.cu b/paddle/phi/kernels/gpu/arg_min_max_kernel.cu index caa635255b9878..8a628560f27adc 100644 --- a/paddle/phi/kernels/gpu/arg_min_max_kernel.cu +++ b/paddle/phi/kernels/gpu/arg_min_max_kernel.cu @@ -28,7 +28,7 @@ namespace cub = hipcub; #endif #include -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" #include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/kernels/funcs/math_function.h" namespace phi { @@ -171,7 +171,7 @@ struct VisitDataCudaArgMinMaxFunctor { phi::DDim x_dims; int new_axis = axis; if (flatten) { - x_dims = phi::make_ddim({x.numel()}); + x_dims = common::make_ddim({x.numel()}); // if flatten, the axis just as 0 new_axis = 0; } else { diff --git a/paddle/phi/kernels/gpu/argsort_grad_kernel.cu b/paddle/phi/kernels/gpu/argsort_grad_kernel.cu index 4cc6b1dd3cb888..5d7dcc08e44c55 100644 --- a/paddle/phi/kernels/gpu/argsort_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/argsort_grad_kernel.cu @@ -175,7 +175,7 @@ void ArgsortGradKernel(const Context& dev_ctx, // Special case for full sort, speedup ~190x. if (axis == -1 || axis + 1 == in_dims.size()) { const int64_t input_height = - phi::product(phi::slice_ddim(in_dims, 0, in_dims.size() - 1)); + common::product(common::slice_ddim(in_dims, 0, in_dims.size() - 1)); const int64_t input_width = in_dims[in_dims.size() - 1]; ArgFullAssign( dev_ctx, &out_grad, &indices, in_grad, input_height, input_width); @@ -204,8 +204,8 @@ void ArgsortGradKernel(const Context& dev_ctx, TransposeKernel(dev_ctx, out_grad, trans, &trans_dO); TransposeKernel(dev_ctx, indices, trans, &trans_ind); - const int64_t input_height = - phi::product(phi::slice_ddim(trans_dims, 0, trans_dims.size() - 1)); + const int64_t input_height = common::product( + common::slice_ddim(trans_dims, 0, trans_dims.size() - 1)); const int64_t input_width = trans_dims[trans_dims.size() - 1]; DenseTensor tmp_out; diff --git a/paddle/phi/kernels/gpu/argsort_kernel.cu b/paddle/phi/kernels/gpu/argsort_kernel.cu index 4d16f826cca86a..48a5cfd4d09e9d 100644 --- a/paddle/phi/kernels/gpu/argsort_kernel.cu +++ b/paddle/phi/kernels/gpu/argsort_kernel.cu @@ -109,7 +109,7 @@ void ArgFullSort(const phi::GPUContext& ctx, auto cu_stream = ctx.stream(); DenseTensor input_indices; const std::vector dims = {num_rows, num_cols}; - auto dim = phi::make_ddim(dims); + auto dim = common::make_ddim(dims); input_indices.Resize(dim); ctx.template Alloc(&input_indices); size_t temp_storage_bytes = -1; @@ -264,7 +264,7 @@ void ArgsortKernel(const Context& dev_ctx, // Special case for full sort, speedup ~190x. if (axis == -1 || axis + 1 == in_dims.size()) { const int64_t input_height = - phi::product(phi::slice_ddim(in_dims, 0, in_dims.size() - 1)); + common::product(common::slice_ddim(in_dims, 0, in_dims.size() - 1)); const int64_t input_width = in_dims[in_dims.size() - 1]; ArgFullSort(dev_ctx, &input, @@ -295,8 +295,8 @@ void ArgsortKernel(const Context& dev_ctx, // Do transpose TransposeKernel(dev_ctx, input, trans, &trans_inp); - const int64_t input_height = - phi::product(phi::slice_ddim(trans_dims, 0, trans_dims.size() - 1)); + const int64_t input_height = common::product( + common::slice_ddim(trans_dims, 0, trans_dims.size() - 1)); const int64_t input_width = trans_dims[trans_dims.size() - 1]; DenseTensor tmp_out; diff --git a/paddle/phi/kernels/gpu/assign_pos_kernel.cu b/paddle/phi/kernels/gpu/assign_pos_kernel.cu index dc164a8bbe6d92..891a18fc413155 100644 --- a/paddle/phi/kernels/gpu/assign_pos_kernel.cu +++ b/paddle/phi/kernels/gpu/assign_pos_kernel.cu @@ -70,7 +70,7 @@ void AssignPosKernel(const Context& dev_ctx, cpu_eff_num_len_data = cpu_eff_num_len.data()[0]; } - phi::DDim out_dims = phi::make_ddim({cpu_eff_num_len_data}); + phi::DDim out_dims = common::make_ddim({cpu_eff_num_len_data}); out->Resize(out_dims); auto out_data = dev_ctx.template Alloc(out); diff --git a/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu b/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu index c3c353859728b7..c275f58ff734b9 100644 --- a/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu @@ -14,9 +14,9 @@ #include "glog/logging.h" +#include "paddle/common/layout.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_dnn.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/core/enforce.h" #include "paddle/phi/core/flags.h" #include "paddle/phi/core/kernel_registry.h" @@ -508,7 +508,7 @@ void BatchNormGradFunctor(const Context &ctx, DenseTensor *bias_grad) { double epsilon = static_cast(epsilon_f); - const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); + const DataLayout data_layout = common::StringToDataLayout(data_layout_str); const auto *d_y = &y_grad; @@ -1353,7 +1353,7 @@ void BatchNormDoubleGradKernel( "you want to use global status in pre_train model, " "please set `use_global_stats = True`")); - const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); + const DataLayout data_layout = common::StringToDataLayout(data_layout_str); const DenseTensor *running_mean = nullptr; const DenseTensor *running_variance = nullptr; diff --git a/paddle/phi/kernels/gpu/batch_norm_kernel.cu b/paddle/phi/kernels/gpu/batch_norm_kernel.cu index 20aa02a5f24856..2158d0d1189f59 100644 --- a/paddle/phi/kernels/gpu/batch_norm_kernel.cu +++ b/paddle/phi/kernels/gpu/batch_norm_kernel.cu @@ -22,9 +22,9 @@ namespace cub = hipcub; #include "glog/logging.h" +#include "paddle/common/layout.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_dnn.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/core/enforce.h" #include "paddle/phi/core/flags.h" #include "paddle/phi/core/kernel_registry.h" @@ -532,7 +532,7 @@ void BatchNormKernel(const Context &ctx, DenseTensor *reserve_space) { double epsilon = epsilon_f; const bool trainable_stats = trainable_statistics; - const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); + const DataLayout data_layout = common::StringToDataLayout(data_layout_str); bool test_mode = is_test && (!trainable_stats); // Get the size for each dimension. diff --git a/paddle/phi/kernels/gpu/c_split_kernel.cu b/paddle/phi/kernels/gpu/c_split_kernel.cu index 2fda7d3cf37f0d..f003e4a73f802c 100644 --- a/paddle/phi/kernels/gpu/c_split_kernel.cu +++ b/paddle/phi/kernels/gpu/c_split_kernel.cu @@ -86,8 +86,8 @@ void CSplitKernel(const Context& ctx, int64_t end_size = dims[dims_size - 1]; // remain dim - auto remain_ddim = phi::slice_ddim(dims, 0, dims_size - 1); - int64_t remain_numel = phi::product(remain_ddim); + auto remain_ddim = common::slice_ddim(dims, 0, dims_size - 1); + int64_t remain_numel = common::product(remain_ddim); int64_t limit = x.numel(); int64_t blocks = NumBlocks(limit); diff --git a/paddle/phi/kernels/gpu/concat_kernel.cu b/paddle/phi/kernels/gpu/concat_kernel.cu index f0dc0c9153430c..74ba93d05893d2 100644 --- a/paddle/phi/kernels/gpu/concat_kernel.cu +++ b/paddle/phi/kernels/gpu/concat_kernel.cu @@ -83,8 +83,8 @@ void ConcatKernel(const Context& dev_ctx, if (in->numel() == 0UL) { continue; } - auto in_stride = phi::stride_numel(in->dims()); - auto out_stride = phi::stride_numel(out->dims()); + auto in_stride = common::stride_numel(in->dims()); + auto out_stride = common::stride_numel(out->dims()); phi::funcs::StridedNumelCopyWithAxis( dev_ctx, axis, diff --git a/paddle/phi/kernels/gpu/contiguous_kernel.cu b/paddle/phi/kernels/gpu/contiguous_kernel.cu index ff53a9456182fb..6405b35599ed2c 100644 --- a/paddle/phi/kernels/gpu/contiguous_kernel.cu +++ b/paddle/phi/kernels/gpu/contiguous_kernel.cu @@ -30,7 +30,7 @@ template __global__ void ContiguousCaseZeroFunc( const T* input_data, T* out_data, - phi::Array input_stride) { + Array input_stride) { int64_t input_offset = 0; int64_t output_offset = (blockIdx.z * gridDim.y * gridDim.x + blockIdx.y * gridDim.x + blockIdx.x) * @@ -56,8 +56,8 @@ template __global__ void ContiguousCaseOneFunc( const T* input_data, T* out_data, - phi::Array input_stride, - phi::Array dims, + Array input_stride, + Array dims, const int64_t x_max) { int64_t x = blockIdx.x * blockDim.x + threadIdx.x; if (x < x_max) { @@ -511,8 +511,8 @@ void ContiguousKernel(const Context& dev_ctx, return; } - phi::Array input_stride; - phi::Array input_dims; + Array input_stride; + Array input_dims; for (int i = 0; i < input.dims().size(); i++) { input_dims[i] = input.dims()[i]; input_stride[i] = input.strides()[i]; diff --git a/paddle/phi/kernels/gpu/conv_transpose_grad_kernel.cu b/paddle/phi/kernels/gpu/conv_transpose_grad_kernel.cu index c64facc1e6879b..e96d53b1fdb311 100644 --- a/paddle/phi/kernels/gpu/conv_transpose_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/conv_transpose_grad_kernel.cu @@ -14,8 +14,8 @@ #include "paddle/phi/kernels/conv_transpose_grad_kernel.h" -#include "paddle/phi/common/layout.h" -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" +#include "paddle/common/layout.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/cpu/conv_util.h" #include "paddle/phi/kernels/funcs/math_function.h" @@ -71,7 +71,7 @@ void DepthwiseConv2dTransposeGradKernel(const Context& ctx, const std::string& data_format, DenseTensor* dx, DenseTensor* dfilter) { - const DataLayout data_layout = phi::StringToDataLayout(data_format); + const DataLayout data_layout = common::StringToDataLayout(data_format); DenseTensor filter_ = filter; if (!dx && !dfilter) { @@ -91,7 +91,7 @@ void DepthwiseConv2dTransposeGradKernel(const Context& ctx, in_data_dims = slice_ddim(x_dims, 1, x_dims.size() - 1); } DDim filter_data_dims = slice_ddim(filter_dims, 2, filter_dims.size()); - std::vector ksize = vectorize(filter_data_dims); + std::vector ksize = common::vectorize(filter_data_dims); UpdatePaddingAndDilation( &paddings_, &dilations_, padding_algorithm, in_data_dims, strides, ksize); diff --git a/paddle/phi/kernels/gpu/conv_transpose_kernel.cu b/paddle/phi/kernels/gpu/conv_transpose_kernel.cu index bee31450cbf70f..7d29f3503fd35c 100644 --- a/paddle/phi/kernels/gpu/conv_transpose_kernel.cu +++ b/paddle/phi/kernels/gpu/conv_transpose_kernel.cu @@ -14,8 +14,8 @@ #include "paddle/phi/kernels/conv_transpose_kernel.h" -#include "paddle/phi/common/layout.h" -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" +#include "paddle/common/layout.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/cpu/conv_util.h" #include "paddle/phi/kernels/funcs/math_function.h" @@ -37,7 +37,7 @@ void DepthwiseConv2dTransposeKernel(const Context& ctx, const std::vector& dilations, const std::string& data_format, DenseTensor* out) { - const DataLayout data_layout = phi::StringToDataLayout(data_format); + const DataLayout data_layout = common::StringToDataLayout(data_format); DenseTensor filter_ = filter; ctx.template Alloc(out); @@ -72,7 +72,7 @@ void DepthwiseConv2dTransposeKernel(const Context& ctx, in_data_dims = slice_ddim(x_dims, 1, x_dims.size() - 1); } DDim filter_data_dims = slice_ddim(filter_dims, 2, filter_dims.size()); - std::vector ksize = vectorize(filter_data_dims); + std::vector ksize = common::vectorize(filter_data_dims); UpdatePaddingAndDilation( &paddings_, &dilations_, padding_algorithm, in_data_dims, strides, ksize); diff --git a/paddle/phi/kernels/gpu/cum_maxmin_kernel.cu b/paddle/phi/kernels/gpu/cum_maxmin_kernel.cu index 49903bde6ff99b..24ba48429e10ce 100644 --- a/paddle/phi/kernels/gpu/cum_maxmin_kernel.cu +++ b/paddle/phi/kernels/gpu/cum_maxmin_kernel.cu @@ -275,7 +275,7 @@ void ScanWithIndicesKernel(const Context& dev_ctx, x_data, values_data, indices_data, num_rows, row_size, init, op); } else { int64_t row_size = x.dims()[axis]; - auto sizes = phi::vectorize(x.dims()); + auto sizes = common::vectorize(x.dims()); const int64_t num_orows = std::accumulate(sizes.begin(), diff --git a/paddle/phi/kernels/gpu/decode_jpeg_kernel.cu b/paddle/phi/kernels/gpu/decode_jpeg_kernel.cu index 0b5a10b93d85a1..ef6ce5d159aeb6 100644 --- a/paddle/phi/kernels/gpu/decode_jpeg_kernel.cu +++ b/paddle/phi/kernels/gpu/decode_jpeg_kernel.cu @@ -118,7 +118,7 @@ void DecodeJpegKernel(const Context& dev_ctx, int sz = widths[0] * heights[0]; std::vector out_shape = {output_components, height, width}; - out->Resize(phi::make_ddim(out_shape)); + out->Resize(common::make_ddim(out_shape)); T* data = dev_ctx.template Alloc(out); diff --git a/paddle/phi/kernels/gpu/depthwise_conv.h b/paddle/phi/kernels/gpu/depthwise_conv.h index 2908a155cdedd3..278b219b453d3d 100644 --- a/paddle/phi/kernels/gpu/depthwise_conv.h +++ b/paddle/phi/kernels/gpu/depthwise_conv.h @@ -38,6 +38,7 @@ namespace math { * \brief Compute the depthwise convolution which include * forward process and backpropagation process */ +using DataLayout = phi::DataLayout; template diff --git a/paddle/phi/kernels/gpu/depthwise_conv_grad_kernel.cu b/paddle/phi/kernels/gpu/depthwise_conv_grad_kernel.cu index b2856080a7873e..a46eb02dc7c8a5 100644 --- a/paddle/phi/kernels/gpu/depthwise_conv_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/depthwise_conv_grad_kernel.cu @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/common/layout.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/common/bfloat16.h" #include "paddle/phi/common/float16.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/cpu/conv_util.h" #include "paddle/phi/kernels/funcs/batch_norm_utils.h" @@ -57,14 +57,14 @@ void DepthwiseConvGradKernel(const Context& dev_ctx, auto filter_dims = filter.dims(); DDim in_data_dims; - const phi::DataLayout data_layout = phi::StringToDataLayout(data_format); + const phi::DataLayout data_layout = common::StringToDataLayout(data_format); if (data_layout != phi::DataLayout::kNHWC) { in_data_dims = slice_ddim(in_dims, 2, in_dims.size()); } else { in_data_dims = slice_ddim(in_dims, 1, in_dims.size() - 1); } DDim filter_data_dims = slice_ddim(filter_dims, 2, filter_dims.size()); - std::vector ksize = vectorize(filter_data_dims); + std::vector ksize = common::vectorize(filter_data_dims); UpdatePaddingAndDilation( &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize); diff --git a/paddle/phi/kernels/gpu/depthwise_conv_kernel.cu b/paddle/phi/kernels/gpu/depthwise_conv_kernel.cu index cd4579ef16d58d..eb87c49a9de4b2 100644 --- a/paddle/phi/kernels/gpu/depthwise_conv_kernel.cu +++ b/paddle/phi/kernels/gpu/depthwise_conv_kernel.cu @@ -77,7 +77,7 @@ void DepthwiseConvKernel(const Context& dev_ctx, auto filter_dims = filter.dims(); DDim in_data_dims; - const phi::DataLayout data_layout = phi::StringToDataLayout(data_format); + const phi::DataLayout data_layout = common::StringToDataLayout(data_format); if (data_layout != phi::DataLayout::kNHWC) { in_data_dims = slice_ddim(in_dims, 2, in_dims.size()); } else { @@ -85,7 +85,7 @@ void DepthwiseConvKernel(const Context& dev_ctx, } DDim filter_data_dims = slice_ddim(filter_dims, 2, filter_dims.size()); - std::vector ksize = vectorize(filter_data_dims); + std::vector ksize = common::vectorize(filter_data_dims); UpdatePaddingAndDilation( &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize); diff --git a/paddle/phi/kernels/gpu/diagonal_grad_kernel.cu b/paddle/phi/kernels/gpu/diagonal_grad_kernel.cu index bac9a297b580e9..681954317d51c4 100644 --- a/paddle/phi/kernels/gpu/diagonal_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/diagonal_grad_kernel.cu @@ -36,7 +36,8 @@ void DiagonalGradKernel(const Context& dev_ctx, auto dout_dim = dout->dims().Get(); auto dout_dim_size = dout->dims().size(); - std::vector res_dout = vectorize(phi::stride(dout->dims())); + std::vector res_dout = + common::vectorize(common::stride(dout->dims())); DenseTensor dout_stride_tensor; phi::TensorFromVector(res_dout, dev_ctx, &dout_stride_tensor); int64_t* dout_stride = dout_stride_tensor.data(); @@ -46,7 +47,7 @@ void DiagonalGradKernel(const Context& dev_ctx, auto dx_dim = dx->dims().Get(); auto dx_dim_size = dx->dims().size(); - std::vector res_dx = vectorize(phi::stride(dx->dims())); + std::vector res_dx = common::vectorize(common::stride(dx->dims())); DenseTensor dx_stride_tensor; phi::TensorFromVector(res_dx, dev_ctx, &dx_stride_tensor); int64_t* dx_stride = dx_stride_tensor.data(); diff --git a/paddle/phi/kernels/gpu/diagonal_kernel.cu b/paddle/phi/kernels/gpu/diagonal_kernel.cu index 2acc527e9b7c7f..6adcb9b28f5d81 100644 --- a/paddle/phi/kernels/gpu/diagonal_kernel.cu +++ b/paddle/phi/kernels/gpu/diagonal_kernel.cu @@ -33,7 +33,8 @@ void DiagonalKernel(const Context& dev_ctx, auto input_dim = input->dims().Get(); auto input_dim_size = input->dims().size(); - std::vector res_in = vectorize(phi::stride(input->dims())); + std::vector res_in = + common::vectorize(common::stride(input->dims())); DenseTensor input_stride_tensor; phi::TensorFromVector(res_in, dev_ctx, &input_stride_tensor); int64_t* input_stride = input_stride_tensor.data(); @@ -43,7 +44,8 @@ void DiagonalKernel(const Context& dev_ctx, auto output_dim = output->dims().Get(); auto output_dim_size = output->dims().size(); - std::vector res_out = vectorize(phi::stride(output->dims())); + std::vector res_out = + common::vectorize(common::stride(output->dims())); DenseTensor output_stride_tensor; phi::TensorFromVector(res_out, dev_ctx, &output_stride_tensor); int64_t* output_stride = output_stride_tensor.data(); diff --git a/paddle/phi/kernels/gpu/dist_kernel.cu b/paddle/phi/kernels/gpu/dist_kernel.cu index e146fb47cf66d4..a9cbf97b975f22 100644 --- a/paddle/phi/kernels/gpu/dist_kernel.cu +++ b/paddle/phi/kernels/gpu/dist_kernel.cu @@ -134,7 +134,7 @@ void DistKernel(const Context& dev_ctx, if (xdim == y.dims()) { // same shape auto n = x.numel(); auto config = phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, n); - intermediate.Resize(phi::make_ddim({config.block_per_grid.x})); + intermediate.Resize(common::make_ddim({config.block_per_grid.x})); T* i_ptr = dev_ctx.template Alloc(&intermediate); std::vector axis_dims = {static_cast(-1)}; diff --git a/paddle/phi/kernels/gpu/embedding_grad_kernel.cu b/paddle/phi/kernels/gpu/embedding_grad_kernel.cu index a7c75e64a462ad..8689f7fde8b3ba 100644 --- a/paddle/phi/kernels/gpu/embedding_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/embedding_grad_kernel.cu @@ -209,7 +209,7 @@ struct EmbeddingSparseGradCUDAFunctor { auto* d_output_data = d_output->template data(); auto d_output_dims = d_output->dims(); auto d_output_dims_2d = - phi::flatten_to_2d(d_output_dims, d_output_dims.size() - 1); + common::flatten_to_2d(d_output_dims, d_output_dims.size() - 1); PADDLE_ENFORCE_EQ(d_table_value->dims(), d_output_dims_2d, phi::errors::InvalidArgument( diff --git a/paddle/phi/kernels/gpu/expand_as_kernel.cu b/paddle/phi/kernels/gpu/expand_as_kernel.cu index b296b5b7e2014d..6bd7cb80da28fe 100644 --- a/paddle/phi/kernels/gpu/expand_as_kernel.cu +++ b/paddle/phi/kernels/gpu/expand_as_kernel.cu @@ -31,7 +31,7 @@ void ExpandAsKernel(const Context& ctx, DenseTensor* out) { int rank = x.dims().size(); int target_rank = static_cast(target_shape.size()); - auto vec_in_dims = phi::vectorize(x.dims()); + auto vec_in_dims = common::vectorize(x.dims()); unsigned int diff = target_rank - rank; vec_in_dims.insert(vec_in_dims.begin(), diff, 1); diff --git a/paddle/phi/kernels/gpu/expand_kernel.cu b/paddle/phi/kernels/gpu/expand_kernel.cu index dc632ce4d4e63b..ef5643737f4007 100644 --- a/paddle/phi/kernels/gpu/expand_kernel.cu +++ b/paddle/phi/kernels/gpu/expand_kernel.cu @@ -29,7 +29,7 @@ void ExpandKernel(const Context& ctx, DenseTensor* out) { auto expand_shape = shape.GetData(); auto diff = expand_shape.size() - x.dims().size(); - auto out_shape = phi::vectorize(x.dims()); + auto out_shape = common::vectorize(x.dims()); out_shape.insert(out_shape.begin(), diff, 1); for (size_t i = 0; i < out_shape.size(); ++i) { PADDLE_ENFORCE_NE( @@ -69,7 +69,7 @@ void ExpandKernel(const Context& ctx, } } - out->Resize(phi::make_ddim(out_shape)); + out->Resize(common::make_ddim(out_shape)); ctx.template Alloc(out); std::vector ins = {&x}; std::vector outs = {out}; diff --git a/paddle/phi/kernels/gpu/fill_diagonal_tensor_grad_kernel.cu b/paddle/phi/kernels/gpu/fill_diagonal_tensor_grad_kernel.cu index eda1d3ba2225bf..f4eb8fcb8c059d 100644 --- a/paddle/phi/kernels/gpu/fill_diagonal_tensor_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/fill_diagonal_tensor_grad_kernel.cu @@ -73,7 +73,7 @@ void FillDiagonalTensorGradKernel(const Context &ctx, auto stream = ctx.stream(); DenseTensor tensor_tmp; - tensor_tmp.Resize(phi::make_ddim({2 + matrows})); + tensor_tmp.Resize(common::make_ddim({2 + matrows})); int64_t *memory_block_cu = ctx.template Alloc(&tensor_tmp); const auto gpu_place = ctx.GetPlace(); memory_utils::Copy(gpu_place, diff --git a/paddle/phi/kernels/gpu/fill_diagonal_tensor_kernel.cu b/paddle/phi/kernels/gpu/fill_diagonal_tensor_kernel.cu index 8e6841cf6bb5bb..0f43a57c54de6b 100644 --- a/paddle/phi/kernels/gpu/fill_diagonal_tensor_kernel.cu +++ b/paddle/phi/kernels/gpu/fill_diagonal_tensor_kernel.cu @@ -58,7 +58,7 @@ void FillDiagonalTensorKernel(const Context &ctx, auto out_dims = out->dims(); auto matdims = y.dims(); - auto fill_dims = phi::flatten_to_2d(matdims, matdims.size() - 1); + auto fill_dims = common::flatten_to_2d(matdims, matdims.size() - 1); int64_t new_dims[2]; std::vector memory_block; @@ -89,7 +89,7 @@ void FillDiagonalTensorKernel(const Context &ctx, auto stream = ctx.stream(); DenseTensor tensor_tmp; - tensor_tmp.Resize(phi::make_ddim({2 + fill_dims[0]})); + tensor_tmp.Resize(common::make_ddim({2 + fill_dims[0]})); int64_t *memory_block_cu = ctx.template Alloc(&tensor_tmp); const auto gpu_place = ctx.GetPlace(); memory_utils::Copy(gpu_place, diff --git a/paddle/phi/kernels/gpu/flip_kernel.cu b/paddle/phi/kernels/gpu/flip_kernel.cu index 71fdbcaaa68bb4..4b73fd48d95985 100644 --- a/paddle/phi/kernels/gpu/flip_kernel.cu +++ b/paddle/phi/kernels/gpu/flip_kernel.cu @@ -13,11 +13,11 @@ // limitations under the License. #include "paddle/phi/kernels/flip_kernel.h" +#include "paddle/common/array.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/core/utils/array.h" namespace phi { @@ -74,7 +74,7 @@ void FlipKernel(const Context& dev_ctx, const int64_t numel = x.numel(); size_t flip_dims_size = axis.size(); - auto x_stride = phi::stride(x_dims); + auto x_stride = common::stride(x_dims); phi::Array stride_array; phi::Array shape_array; diff --git a/paddle/phi/kernels/gpu/full_kernel.cu b/paddle/phi/kernels/gpu/full_kernel.cu index bd1d7db96cfeca..fde2e33505f971 100644 --- a/paddle/phi/kernels/gpu/full_kernel.cu +++ b/paddle/phi/kernels/gpu/full_kernel.cu @@ -41,7 +41,7 @@ void FullKernel(const Context& dev_ctx, const Scalar& val, DataType dtype, DenseTensor* out) { - out->Resize(phi::make_ddim(shape.GetData())); + out->Resize(common::make_ddim(shape.GetData())); int numel = out->numel(); dev_ctx.template Alloc(out); diff --git a/paddle/phi/kernels/gpu/gaussian_inplace_grad_kernel.cu b/paddle/phi/kernels/gpu/gaussian_inplace_grad_kernel.cu index d2bb9c31fa67da..7846a596a6bd64 100644 --- a/paddle/phi/kernels/gpu/gaussian_inplace_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/gaussian_inplace_grad_kernel.cu @@ -27,7 +27,7 @@ void GaussianInplaceGradKernel(const Context& ctx, float std, int seed, DenseTensor* x_grad) { - auto dims = vectorize(x_grad->dims()); + auto dims = common::vectorize(x_grad->dims()); float value = static_cast(0.0f); phi::FullKernel(ctx, dims, value, phi::DataType::UNDEFINED, x_grad); } diff --git a/paddle/phi/kernels/gpu/gaussian_kernel.cu b/paddle/phi/kernels/gpu/gaussian_kernel.cu index 6e5c7ee63ce531..36fad8215d8261 100644 --- a/paddle/phi/kernels/gpu/gaussian_kernel.cu +++ b/paddle/phi/kernels/gpu/gaussian_kernel.cu @@ -59,7 +59,7 @@ void GaussianKernel(const Context& dev_ctx, int seed, DataType dtype, DenseTensor* out) { - out->Resize(phi::make_ddim(shape.GetData())); + out->Resize(common::make_ddim(shape.GetData())); dev_ctx.template Alloc(out); if (seed == 0) { // use global Generator seed diff --git a/paddle/phi/kernels/gpu/generate_proposals_kernel.cu b/paddle/phi/kernels/gpu/generate_proposals_kernel.cu index 38e0e27d99f14e..ce2f8dc2467ed0 100644 --- a/paddle/phi/kernels/gpu/generate_proposals_kernel.cu +++ b/paddle/phi/kernels/gpu/generate_proposals_kernel.cu @@ -47,16 +47,16 @@ static void SortDescending(const phi::GPUContext &ctx, DenseTensor *index_out) { int num = static_cast(value.numel()); DenseTensor index_in_t; - index_in_t.Resize(phi::make_ddim({num})); + index_in_t.Resize(common::make_ddim({num})); int *idx_in = ctx.template Alloc(&index_in_t); phi::funcs::ForRange for_range(ctx, num); for_range(funcs::RangeInitFunctor{0, 1, idx_in}); - index_out->Resize(phi::make_ddim({num})); + index_out->Resize(common::make_ddim({num})); int *idx_out = ctx.template Alloc(index_out); const T *keys_in = value.data(); - value_out->Resize(phi::make_ddim({num})); + value_out->Resize(common::make_ddim({num})); T *keys_out = ctx.template Alloc(value_out); // Determine temporary device storage requirements @@ -333,7 +333,7 @@ static void NMS(const phi::GPUContext &ctx, } } } - keep_out->Resize(phi::make_ddim({num_to_keep})); + keep_out->Resize(common::make_ddim({num_to_keep})); int *keep = ctx.template Alloc(keep_out); memory_utils::Copy(place, keep, @@ -364,12 +364,12 @@ static std::pair ProposalForOneImage( int num = scores.numel(); int pre_nms_num = (pre_nms_top_n <= 0 || pre_nms_top_n > num) ? scores.numel() : pre_nms_top_n; - scores_sort.Resize(phi::make_ddim({pre_nms_num, 1})); - index_sort.Resize(phi::make_ddim({pre_nms_num, 1})); + scores_sort.Resize(common::make_ddim({pre_nms_num, 1})); + index_sort.Resize(common::make_ddim({pre_nms_num, 1})); // 2. box decode and clipping DenseTensor proposals; - proposals.Resize(phi::make_ddim({pre_nms_num, 4})); + proposals.Resize(common::make_ddim({pre_nms_num, 4})); ctx.template Alloc(&proposals); { @@ -385,9 +385,9 @@ static std::pair ProposalForOneImage( // 3. filter DenseTensor keep_index, keep_num_t; - keep_index.Resize(phi::make_ddim({pre_nms_num})); + keep_index.Resize(common::make_ddim({pre_nms_num})); ctx.template Alloc(&keep_index); - keep_num_t.Resize(phi::make_ddim({1})); + keep_num_t.Resize(common::make_ddim({1})); ctx.template Alloc(&keep_num_t); min_size = std::max(min_size, 1.0f); auto stream = ctx.stream(); @@ -408,23 +408,23 @@ static std::pair ProposalForOneImage( sizeof(int), ctx.stream()); ctx.Wait(); - keep_index.Resize(phi::make_ddim({keep_num})); + keep_index.Resize(common::make_ddim({keep_num})); DenseTensor scores_filter, proposals_filter; // Handle the case when there is no keep index left if (keep_num == 0) { phi::funcs::SetConstant set_zero; - proposals_filter.Resize(phi::make_ddim({1, 4})); + proposals_filter.Resize(common::make_ddim({1, 4})); ctx.template Alloc(&proposals_filter); - scores_filter.Resize(phi::make_ddim({1, 1})); + scores_filter.Resize(common::make_ddim({1, 1})); ctx.template Alloc(&scores_filter); set_zero(ctx, &proposals_filter, static_cast(0)); set_zero(ctx, &scores_filter, static_cast(0)); return std::make_pair(proposals_filter, scores_filter); } - proposals_filter.Resize(phi::make_ddim({keep_num, 4})); + proposals_filter.Resize(common::make_ddim({keep_num, 4})); ctx.template Alloc(&proposals_filter); - scores_filter.Resize(phi::make_ddim({keep_num, 1})); + scores_filter.Resize(common::make_ddim({keep_num, 1})); ctx.template Alloc(&scores_filter); phi::funcs::GPUGather(ctx, proposals, keep_index, &proposals_filter); phi::funcs::GPUGather(ctx, scores_sort, keep_index, &scores_filter); @@ -438,13 +438,13 @@ static std::pair ProposalForOneImage( NMS( ctx, proposals_filter, keep_index, nms_thresh, &keep_nms, pixel_offset); if (post_nms_top_n > 0 && post_nms_top_n < keep_nms.numel()) { - keep_nms.Resize(phi::make_ddim({post_nms_top_n})); + keep_nms.Resize(common::make_ddim({post_nms_top_n})); } DenseTensor scores_nms, proposals_nms; - proposals_nms.Resize(phi::make_ddim({keep_nms.numel(), 4})); + proposals_nms.Resize(common::make_ddim({keep_nms.numel(), 4})); ctx.template Alloc(&proposals_nms); - scores_nms.Resize(phi::make_ddim({keep_nms.numel(), 1})); + scores_nms.Resize(common::make_ddim({keep_nms.numel(), 1})); ctx.template Alloc(&scores_nms); phi::funcs::GPUGather(ctx, proposals_filter, keep_nms, &proposals_nms); phi::funcs::GPUGather(ctx, scores_filter, keep_nms, &scores_nms); @@ -487,9 +487,9 @@ void GenerateProposalsKernel(const Context &ctx, int64_t w_bbox = bbox_dim[3]; DenseTensor bbox_deltas_swap, scores_swap; - bbox_deltas_swap.Resize(phi::make_ddim({num, h_bbox, w_bbox, c_bbox})); + bbox_deltas_swap.Resize(common::make_ddim({num, h_bbox, w_bbox, c_bbox})); ctx.template Alloc(&bbox_deltas_swap); - scores_swap.Resize(phi::make_ddim({num, h_score, w_score, c_score})); + scores_swap.Resize(common::make_ddim({num, h_score, w_score, c_score})); ctx.template Alloc(&scores_swap); phi::funcs::Transpose trans; @@ -499,12 +499,12 @@ void GenerateProposalsKernel(const Context &ctx, DenseTensor tmp_anchors = anchors; DenseTensor tmp_variances = variances; - tmp_anchors.Resize(phi::make_ddim({tmp_anchors.numel() / 4, 4})); - tmp_variances.Resize(phi::make_ddim({tmp_variances.numel() / 4, 4})); + tmp_anchors.Resize(common::make_ddim({tmp_anchors.numel() / 4, 4})); + tmp_variances.Resize(common::make_ddim({tmp_variances.numel() / 4, 4})); - rpn_rois->Resize(phi::make_ddim({bbox_deltas.numel() / 4, 4})); + rpn_rois->Resize(common::make_ddim({bbox_deltas.numel() / 4, 4})); ctx.template Alloc(rpn_rois); - rpn_roi_probs->Resize(phi::make_ddim({scores.numel(), 1})); + rpn_roi_probs->Resize(common::make_ddim({scores.numel(), 1})); ctx.template Alloc(rpn_roi_probs); T *rpn_rois_data = rpn_rois->data(); @@ -522,8 +522,9 @@ void GenerateProposalsKernel(const Context &ctx, DenseTensor bbox_deltas_slice = bbox_deltas_swap.Slice(i, i + 1); DenseTensor scores_slice = scores_swap.Slice(i, i + 1); - bbox_deltas_slice.Resize(phi::make_ddim({h_bbox * w_bbox * c_bbox / 4, 4})); - scores_slice.Resize(phi::make_ddim({h_score * w_score * c_score, 1})); + bbox_deltas_slice.Resize( + common::make_ddim({h_bbox * w_bbox * c_bbox / 4, 4})); + scores_slice.Resize(common::make_ddim({h_score * w_score * c_score, 1})); std::pair box_score_pair = ProposalForOneImage(ctx, @@ -560,7 +561,7 @@ void GenerateProposalsKernel(const Context &ctx, tmp_num.push_back(proposals.dims()[0]); } if (rpn_rois_num != nullptr) { - rpn_rois_num->Resize(phi::make_ddim({num})); + rpn_rois_num->Resize(common::make_ddim({num})); ctx.template Alloc(rpn_rois_num); int *num_data = rpn_rois_num->data(); memory_utils::Copy(place, @@ -569,12 +570,12 @@ void GenerateProposalsKernel(const Context &ctx, &tmp_num[0], sizeof(int) * num, ctx.stream()); - rpn_rois_num->Resize(phi::make_ddim({num})); + rpn_rois_num->Resize(common::make_ddim({num})); } phi::LoD lod; lod.emplace_back(offset); - rpn_rois->Resize(phi::make_ddim({num_proposals, 4})); - rpn_roi_probs->Resize(phi::make_ddim({num_proposals, 1})); + rpn_rois->Resize(common::make_ddim({num_proposals, 4})); + rpn_roi_probs->Resize(common::make_ddim({num_proposals, 1})); } } // namespace phi diff --git a/paddle/phi/kernels/gpu/group_norm_grad_kernel.cu b/paddle/phi/kernels/gpu/group_norm_grad_kernel.cu index ca933cd97c7fb5..b3e34429e9ccb9 100644 --- a/paddle/phi/kernels/gpu/group_norm_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/group_norm_grad_kernel.cu @@ -14,8 +14,8 @@ #include "paddle/phi/kernels/group_norm_grad_kernel.h" +#include "paddle/common/layout.h" #include "paddle/phi/backends/gpu/gpu_context.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/gpu/group_norm_utils.h" @@ -280,7 +280,7 @@ void GroupNormGradKernel(const Context& dev_ctx, DenseTensor* d_scale, DenseTensor* d_bias) { using AccT = typename phi::dtype::MPTypeTrait::Type; - const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); + const DataLayout data_layout = common::StringToDataLayout(data_layout_str); const auto scale_ptr = scale.get_ptr(); const auto bias_ptr = bias.get_ptr(); diff --git a/paddle/phi/kernels/gpu/group_norm_kernel.cu b/paddle/phi/kernels/gpu/group_norm_kernel.cu index 503ba9feefec64..301701c61d34ea 100644 --- a/paddle/phi/kernels/gpu/group_norm_kernel.cu +++ b/paddle/phi/kernels/gpu/group_norm_kernel.cu @@ -14,8 +14,8 @@ #include "paddle/phi/kernels/group_norm_kernel.h" +#include "paddle/common/layout.h" #include "paddle/phi/backends/gpu/gpu_context.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/gpu/group_norm_utils.h" @@ -801,7 +801,7 @@ void GroupNormDirectCUDAFunctor::operator()( AccT* mean, AccT* variance, const DataLayout data_layout) { - const auto input_ddim = phi::make_ddim(input_shape); + const auto input_ddim = common::make_ddim(input_shape); const int C = (data_layout == DataLayout::kNCHW ? input_ddim[1] : input_ddim[input_ddim.size() - 1]); @@ -898,7 +898,7 @@ void GroupNormGeneralCaseKernel(const Context& dev_ctx, DenseTensor* mean, DenseTensor* var) { using AccT = typename phi::dtype::MPTypeTrait::Type; - const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); + const DataLayout data_layout = common::StringToDataLayout(data_layout_str); const auto scale_ptr = scale.get_ptr(); const auto bias_ptr = bias.get_ptr(); const auto x_dims = x.dims(); diff --git a/paddle/phi/kernels/gpu/gumbel_softmax_kernel.cu b/paddle/phi/kernels/gpu/gumbel_softmax_kernel.cu index aee591894cc811..124629f580457c 100644 --- a/paddle/phi/kernels/gpu/gumbel_softmax_kernel.cu +++ b/paddle/phi/kernels/gpu/gumbel_softmax_kernel.cu @@ -141,7 +141,7 @@ struct GumbleNoiseGenerator { const float temperature) { DenseTensor random_tensor; int64_t size = size_to_axis * size_from_axis; - random_tensor.Resize(make_ddim({size})); + random_tensor.Resize(common::make_ddim({size})); using MPType = typename phi::dtype::MPTypeTrait::Type; MPType* random_data = ctx.template Alloc(&random_tensor); diff --git a/paddle/phi/kernels/gpu/index_add_grad_kernel.cu b/paddle/phi/kernels/gpu/index_add_grad_kernel.cu index c0d5b737c5fbbf..394600a1eb9a6e 100644 --- a/paddle/phi/kernels/gpu/index_add_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/index_add_grad_kernel.cu @@ -42,7 +42,7 @@ void IndexAddGradKernel(const Context& ctx, auto output_dim = out_grad.dims(); auto add_value_dim = add_value_grad->dims(); dim = dim >= 0 ? dim : dim + input_dim.size(); - auto stride_dim = phi::stride(input_dim); + auto stride_dim = common::stride(input_dim); int64_t stride = stride_dim[dim]; int64_t size = add_value_dim[dim]; int64_t delta = input_dim[dim] - size; diff --git a/paddle/phi/kernels/gpu/index_add_kernel.cu b/paddle/phi/kernels/gpu/index_add_kernel.cu index 8fd15d5435f98b..0e8546e88d54a1 100644 --- a/paddle/phi/kernels/gpu/index_add_kernel.cu +++ b/paddle/phi/kernels/gpu/index_add_kernel.cu @@ -60,7 +60,7 @@ void IndexAddKernel(const Context& ctx, const auto& index_type = index.dtype(); int dim = axis; dim = dim >= 0 ? dim : dim + input_dim.size(); - auto stride_dim = phi::stride(input_dim); + auto stride_dim = common::stride(input_dim); int64_t stride = stride_dim[dim]; int64_t size = add_value_dim[dim]; int64_t delta = input_dim[dim] - size; diff --git a/paddle/phi/kernels/gpu/index_put_grad_kernel.cu b/paddle/phi/kernels/gpu/index_put_grad_kernel.cu index d63d670945fba3..b0e2865d75840c 100644 --- a/paddle/phi/kernels/gpu/index_put_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/index_put_grad_kernel.cu @@ -26,8 +26,8 @@ namespace phi { template __global__ void SetZeroCudaKernel(int64_t** indices, - phi::Array stride, - phi::Array shape, + Array stride, + Array shape, const int rank, const int64_t numel, T* out) { @@ -56,14 +56,13 @@ __global__ void SetZeroCudaKernel(int64_t** indices, } template -__global__ void IndexPutGradCudaKernel( - const T* out_grad, - int64_t** indices, - phi::Array stride, - phi::Array shape, - const int rank, - const int64_t numel, - T* value_grad) { +__global__ void IndexPutGradCudaKernel(const T* out_grad, + int64_t** indices, + Array stride, + Array shape, + const int rank, + const int64_t numel, + T* value_grad) { int64_t idx = static_cast(threadIdx.x) + static_cast(blockDim.x) * static_cast(blockIdx.x); @@ -103,10 +102,10 @@ void LaunchIndexPutGradCudaKernel( T* x_grad_data = x_grad->data(); auto x_grad_dims = x_grad->dims(); - auto x_grad_stride = phi::stride(x_grad_dims); + auto x_grad_stride = common::stride(x_grad_dims); - phi::Array stride_array; - phi::Array shape_array; + Array stride_array; + Array shape_array; for (int i = 0; i < rank; ++i) { stride_array[i] = x_grad_stride[i]; shape_array[i] = x_grad_dims[i]; @@ -125,10 +124,10 @@ void LaunchIndexPutGradCudaKernel( } auto out_grad_dims = out_grad.dims(); - auto out_grad_stride = phi::stride(out_grad_dims); + auto out_grad_stride = common::stride(out_grad_dims); - phi::Array stride_array; - phi::Array shape_array; + Array stride_array; + Array shape_array; for (int i = 0; i < rank; ++i) { stride_array[i] = out_grad_stride[i]; shape_array[i] = out_grad_dims[i]; @@ -199,8 +198,9 @@ void LaunchIndexPutGradCudaKernel( numel, tmp_value_grad_data); - std::vector after_dims = phi::vectorize(tmp_value_grad.dims()); - std::vector before_dims = phi::vectorize(value_grad->dims()); + std::vector after_dims = + common::vectorize(tmp_value_grad.dims()); + std::vector before_dims = common::vectorize(value_grad->dims()); std::vector compress_dims; std::vector dims_without_1; @@ -208,7 +208,7 @@ void LaunchIndexPutGradCudaKernel( &after_dims, &before_dims, &compress_dims, &dims_without_1); auto pre_dims = value_grad->dims(); - value_grad->Resize(phi::make_ddim(dims_without_1)); + value_grad->Resize(common::make_ddim(dims_without_1)); IntArray v_axis(compress_dims); SumKernel(dev_ctx, tmp_value_grad, @@ -245,7 +245,7 @@ void IndexPutGradKernel(const Context& dev_ctx, } if (value_grad) { FullKernel(dev_ctx, - phi::vectorize(value_grad->dims()), + common::vectorize(value_grad->dims()), 0.0f, value_grad->dtype(), value_grad); @@ -255,7 +255,7 @@ void IndexPutGradKernel(const Context& dev_ctx, auto bd_dim = funcs::BroadCastTensorsDims(int_indices_v); - std::vector res_dim_v(phi::vectorize(bd_dim)); + std::vector res_dim_v(common::vectorize(bd_dim)); std::vector res_indices_v(x.dims().size(), nullptr); std::vector tmp_res_indices_v; std::vector range_tensor_v; diff --git a/paddle/phi/kernels/gpu/index_put_kernel.cu b/paddle/phi/kernels/gpu/index_put_kernel.cu index ee58eab21c53df..ffd4ee7572d562 100644 --- a/paddle/phi/kernels/gpu/index_put_kernel.cu +++ b/paddle/phi/kernels/gpu/index_put_kernel.cu @@ -25,8 +25,8 @@ template __global__ void IndexPutCudaKernel(const T* x, const T* vals, int64_t** indices, - phi::Array stride, - phi::Array shape, + Array stride, + Array shape, const int rank, const int64_t numel, const int64_t is_single_val_tensor, @@ -78,10 +78,10 @@ void LaunchIndexPutCudaKernel(const Context& dev_ctx, auto x_dims = x.dims(); const int rank = x_dims.size(); - auto x_stride = phi::stride(x_dims); + auto x_stride = common::stride(x_dims); - phi::Array stride_array; - phi::Array shape_array; + Array stride_array; + Array shape_array; for (int i = 0; i < rank; ++i) { stride_array[i] = x_stride[i]; shape_array[i] = x_dims[i]; @@ -134,7 +134,7 @@ void IndexPutKernel(const Context& dev_ctx, } auto bd_dim = funcs::BroadCastTensorsDims(int_indices_v); - std::vector res_dim_v(phi::vectorize(bd_dim)); + std::vector res_dim_v(common::vectorize(bd_dim)); std::vector res_indices_v(x.dims().size(), nullptr); std::vector tmp_res_indices_v; std::vector tmp_value_v; @@ -157,7 +157,7 @@ void IndexPutKernel(const Context& dev_ctx, if (value.numel() != 1) { tmp_value_v.emplace_back( - DenseTensor(value.dtype()).Resize(phi::make_ddim(res_dim_v))); + DenseTensor(value.dtype()).Resize(common::make_ddim(res_dim_v))); ExpandKernel( dev_ctx, value, IntArray(res_dim_v), &tmp_value_v[0]); ptr_value = &tmp_value_v[0]; diff --git a/paddle/phi/kernels/gpu/index_select_grad_kernel.cu b/paddle/phi/kernels/gpu/index_select_grad_kernel.cu index 6d0ba9e5bd4ef9..94f94a319c97a2 100644 --- a/paddle/phi/kernels/gpu/index_select_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/index_select_grad_kernel.cu @@ -60,7 +60,7 @@ void IndexSelectGradKernel(const Context& ctx, auto input_dim = x_grad->dims(); auto output_dim = out_grad.dims(); dim = dim >= 0 ? dim : dim + input_dim.size(); - auto stride_dim = phi::stride(input_dim); + auto stride_dim = common::stride(input_dim); int64_t stride = stride_dim[dim]; int64_t size = output_dim[dim]; int64_t delta = input_dim[dim] - size; diff --git a/paddle/phi/kernels/gpu/index_select_kernel.cu b/paddle/phi/kernels/gpu/index_select_kernel.cu index 910015e00f0103..b81fb22cb96e8a 100644 --- a/paddle/phi/kernels/gpu/index_select_kernel.cu +++ b/paddle/phi/kernels/gpu/index_select_kernel.cu @@ -34,7 +34,7 @@ void IndexSelectKernel(const Context& ctx, auto input_dim = x.dims(); auto output_dim = output->dims(); dim = dim >= 0 ? dim : dim + input_dim.size(); - auto stride_dim = phi::stride(input_dim); + auto stride_dim = common::stride(input_dim); int64_t stride = stride_dim[dim]; int64_t size = output_dim[dim]; int64_t delta = input_dim[dim] - size; diff --git a/paddle/phi/kernels/gpu/instance_norm_grad_kernel.cu b/paddle/phi/kernels/gpu/instance_norm_grad_kernel.cu index 0f17a1bcc318a7..4bdd78e2d6e2c1 100644 --- a/paddle/phi/kernels/gpu/instance_norm_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/instance_norm_grad_kernel.cu @@ -16,8 +16,8 @@ #include "glog/logging.h" +#include "paddle/common/layout.h" #include "paddle/phi/backends/gpu/gpu_context.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/phi/kernels/gpu/instance_norm_kernel.cu b/paddle/phi/kernels/gpu/instance_norm_kernel.cu index b46584798b1ead..6358611e9e958c 100644 --- a/paddle/phi/kernels/gpu/instance_norm_kernel.cu +++ b/paddle/phi/kernels/gpu/instance_norm_kernel.cu @@ -16,8 +16,8 @@ #include "glog/logging.h" +#include "paddle/common/layout.h" #include "paddle/phi/backends/gpu/gpu_context.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/full_kernel.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/phi/kernels/gpu/interpolate_grad_kernel.cu b/paddle/phi/kernels/gpu/interpolate_grad_kernel.cu index da633b73bf6e43..f596859fd2d575 100644 --- a/paddle/phi/kernels/gpu/interpolate_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/interpolate_grad_kernel.cu @@ -14,11 +14,11 @@ #include "paddle/phi/kernels/interpolate_grad_kernel.h" +#include "paddle/common/layout.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/backends/gpu/gpu_primitives.h" #include "paddle/phi/common/amp_type_traits.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/interpolate_function.h" #include "paddle/phi/kernels/funcs/math_cuda_utils.h" @@ -761,7 +761,7 @@ static void Interpolate1DCUDABwd( bool align_corners, int align_mode, DenseTensor* input_grad) { - const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); + const DataLayout data_layout = common::StringToDataLayout(data_layout_str); int n, c, in_d, in_h, in_w; funcs::ExtractNCDWH(input.dims(), data_layout, &n, &c, &in_d, &in_h, &in_w); @@ -875,7 +875,7 @@ static void Interpolate2DCUDABwd( bool align_corners, int align_mode, DenseTensor* input_grad) { - const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); + const DataLayout data_layout = common::StringToDataLayout(data_layout_str); int n, c, in_d, in_h, in_w; funcs::ExtractNCDWH(input.dims(), data_layout, &n, &c, &in_d, &in_h, &in_w); @@ -1134,7 +1134,7 @@ static void Interpolate3DCUDABwd( bool align_corners, int align_mode, DenseTensor* input_grad) { - const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); + const DataLayout data_layout = common::StringToDataLayout(data_layout_str); int n, c, in_d, in_h, in_w; funcs::ExtractNCDWH(input.dims(), data_layout, &n, &c, &in_d, &in_h, &in_w); diff --git a/paddle/phi/kernels/gpu/interpolate_kernel.cu b/paddle/phi/kernels/gpu/interpolate_kernel.cu index 7d2211f2758488..a87d235971d2be 100644 --- a/paddle/phi/kernels/gpu/interpolate_kernel.cu +++ b/paddle/phi/kernels/gpu/interpolate_kernel.cu @@ -14,13 +14,13 @@ #include "paddle/phi/kernels/interpolate_kernel.h" +#include "paddle/common/layout.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_device_function.h" #include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/backends/gpu/gpu_primitives.h" #include "paddle/phi/common/amp_type_traits.h" #include "paddle/phi/common/float16.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/interpolate_function.h" #include "paddle/phi/kernels/primitive/datamover_primitives.h" @@ -658,7 +658,7 @@ static void Interpolate1DCUDAFwd( DenseTensor* output) { auto* input_data = input.data(); - const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); + const DataLayout data_layout = common::StringToDataLayout(data_layout_str); int n, c, in_d, in_h, in_w; funcs::ExtractNCDWH(input.dims(), data_layout, &n, &c, &in_d, &in_h, &in_w); @@ -772,7 +772,7 @@ static void Interpolate2DCUDAFwd( DenseTensor* output) { auto* input_data = input.data(); - const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); + const DataLayout data_layout = common::StringToDataLayout(data_layout_str); int n, c, in_d, in_h, in_w; funcs::ExtractNCDWH(input.dims(), data_layout, &n, &c, &in_d, &in_h, &in_w); @@ -1024,7 +1024,7 @@ static void Interpolate3DCUDAFwd( DenseTensor* output) { auto* input_data = input.data(); - const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); + const DataLayout data_layout = common::StringToDataLayout(data_layout_str); int n, c, in_d, in_h, in_w; funcs::ExtractNCDWH(input.dims(), data_layout, &n, &c, &in_d, &in_h, &in_w); diff --git a/paddle/phi/kernels/gpu/kthvalue_kernel.cu b/paddle/phi/kernels/gpu/kthvalue_kernel.cu index 2ecec80c27b242..b79ee58d38975f 100644 --- a/paddle/phi/kernels/gpu/kthvalue_kernel.cu +++ b/paddle/phi/kernels/gpu/kthvalue_kernel.cu @@ -46,7 +46,7 @@ bool SortKthvalue(const phi::GPUContext& dev_ctx, auto cu_stream = dev_ctx.stream(); DenseTensor input_indices; const std::vector dims = {num_rows, num_cols}; - auto dim = phi::make_ddim(dims); + auto dim = common::make_ddim(dims); input_indices.Resize(dim); dev_ctx.template Alloc(&input_indices); size_t temp_storage_bytes = -1; @@ -140,7 +140,7 @@ bool SortKthvalue(const phi::GPUContext& dev_ctx, auto e_tmp_indices = EigenMatrix::From(static_cast(temp_indices)); std::vector odims = {static_cast(num_rows), static_cast(1)}; - dim = phi::make_ddim(odims); + dim = common::make_ddim(odims); auto e_values = EigenMatrix::From(*out_tensor, dim); auto e_tmp_values = EigenMatrix::From(static_cast(temp_values)); @@ -182,7 +182,7 @@ void KthvalueKernel(const Context& dev_ctx, if (axis == in_dims.size() - 1) { const int64_t& input_height = - phi::product(phi::slice_ddim(in_dims, 0, in_dims.size() - 1)); + common::product(common::slice_ddim(in_dims, 0, in_dims.size() - 1)); const int64_t& input_width = in_dims[in_dims.size() - 1]; #if defined(PADDLE_WITH_CUDA) && CUDA_VERSION >= 9000 const T* input_data = x.data(); @@ -221,7 +221,7 @@ void KthvalueKernel(const Context& dev_ctx, for (int i = axis + 1; i < in_dims.size(); i++) { tmp_out_shape.emplace_back(in_dims[i]); } - DDim tmp_out_dims = phi::make_ddim(tmp_out_shape); + DDim tmp_out_dims = common::make_ddim(tmp_out_shape); output->Resize(tmp_out_dims); indices->Resize(tmp_out_dims); } @@ -243,8 +243,8 @@ void KthvalueKernel(const Context& dev_ctx, trans_out.Resize(trans_out_dims); int64_t* tran_indices_data = dev_ctx.template Alloc(&trans_ind); T* tran_output_data = dev_ctx.template Alloc(&trans_out); - const int64_t input_height = - phi::product(phi::slice_ddim(trans_dims, 0, trans_dims.size() - 1)); + const int64_t input_height = common::product( + common::slice_ddim(trans_dims, 0, trans_dims.size() - 1)); const int64_t input_width = trans_dims[trans_dims.size() - 1]; #if defined(PADDLE_WITH_CUDA) && CUDA_VERSION >= 9000 diff --git a/paddle/phi/kernels/gpu/layer_norm_grad_kernel.cu b/paddle/phi/kernels/gpu/layer_norm_grad_kernel.cu index e8fc640cdd508e..1e10da8967c21a 100644 --- a/paddle/phi/kernels/gpu/layer_norm_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/layer_norm_grad_kernel.cu @@ -45,7 +45,7 @@ void LayerNormGradKernel(const Context &dev_ctx, auto *d_y = &out_grad; const auto &x_dims = x.dims(); - auto matrix_dim = phi::flatten_to_2d(x_dims, begin_norm_axis); + auto matrix_dim = common::flatten_to_2d(x_dims, begin_norm_axis); int64_t batch_size = static_cast(matrix_dim[0]); int64_t feature_size = static_cast(matrix_dim[1]); diff --git a/paddle/phi/kernels/gpu/layer_norm_kernel.cu b/paddle/phi/kernels/gpu/layer_norm_kernel.cu index eb85d9ac826d0a..d9757183b289c8 100644 --- a/paddle/phi/kernels/gpu/layer_norm_kernel.cu +++ b/paddle/phi/kernels/gpu/layer_norm_kernel.cu @@ -463,8 +463,8 @@ void LayerNormDirectCUDAFunctor::operator()(gpuStream_t stream, U *variance, int begin_norm_axis, float eps) { - const auto x_dims = phi::make_ddim(input_shape); - auto matrix_dim = phi::flatten_to_2d(x_dims, begin_norm_axis); + const auto x_dims = common::make_ddim(input_shape); + auto matrix_dim = common::flatten_to_2d(x_dims, begin_norm_axis); int64_t batch_size = static_cast(matrix_dim[0]); int64_t feature_size = static_cast(matrix_dim[1]); switch (phi::funcs::GetDesiredBlockDim(feature_size)) { @@ -534,7 +534,7 @@ void LayerNormKernel(const Context &dev_ctx, "Unsupported data type of Scale and Bias")); } - auto matrix_dim = phi::flatten_to_2d(x_dims, begin_norm_axis); + auto matrix_dim = common::flatten_to_2d(x_dims, begin_norm_axis); int64_t batch_size = static_cast(matrix_dim[0]); int64_t feature_size = static_cast(matrix_dim[1]); auto stream = dev_ctx.stream(); diff --git a/paddle/phi/kernels/gpu/lerp_grad_kernel.cu b/paddle/phi/kernels/gpu/lerp_grad_kernel.cu index d18c769b5117d0..6f41545e4d8e31 100644 --- a/paddle/phi/kernels/gpu/lerp_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/lerp_grad_kernel.cu @@ -82,10 +82,10 @@ __global__ void LerpGradScalarKernelImpl(const T* weight, bool XYNeedReduce(const DenseTensor& x, const DenseTensor& y, const DenseTensor& out) { - auto x_dims = - x.dims().size() ? x.dims() : make_ddim(std::vector(1, 1)); - auto y_dims = - y.dims().size() ? y.dims() : make_ddim(std::vector(1, 1)); + auto x_dims = x.dims().size() ? x.dims() + : common::make_ddim(std::vector(1, 1)); + auto y_dims = y.dims().size() ? y.dims() + : common::make_ddim(std::vector(1, 1)); auto out_dims = out.dims(); if (out_dims.size() == 0) { @@ -242,7 +242,7 @@ void LerpGradKernel(const Context& ctx, x_grad_data, y_grad_data); - auto zero_dim = make_ddim(std::vector(1, 1)); + auto zero_dim = common::make_ddim(std::vector(1, 1)); if (x_grad) { std::vector reduce_axis_x = funcs::GetReduceDim(x_grad->dims().size() ? x_grad->dims() : zero_dim, diff --git a/paddle/phi/kernels/gpu/lerp_kernel.cu b/paddle/phi/kernels/gpu/lerp_kernel.cu index f9d8514a54ca22..9ecb6b14379e2d 100644 --- a/paddle/phi/kernels/gpu/lerp_kernel.cu +++ b/paddle/phi/kernels/gpu/lerp_kernel.cu @@ -86,20 +86,22 @@ void LerpKernel(const Context &ctx, if (x.dims().size() < y.dims().size() && x.dims().size() < weight.dims().size()) { // x broadcast to b_min - ExpandKernel(ctx, x, phi::vectorize(b_min.dims()), &b_min); + ExpandKernel( + ctx, x, common::vectorize(b_min.dims()), &b_min); inputs.emplace_back(&b_min); inputs.emplace_back(&y); inputs.emplace_back(&weight); } else if (y.dims().size() < weight.dims().size()) { // y broadcast to b_min - ExpandKernel(ctx, y, phi::vectorize(b_min.dims()), &b_min); + ExpandKernel( + ctx, y, common::vectorize(b_min.dims()), &b_min); inputs.emplace_back(&x); inputs.emplace_back(&b_min); inputs.emplace_back(&weight); } else { // weight broadcast to b_min ExpandKernel( - ctx, weight, phi::vectorize(b_min.dims()), &b_min); + ctx, weight, common::vectorize(b_min.dims()), &b_min); inputs.emplace_back(&x); inputs.emplace_back(&y); inputs.emplace_back(&b_min); diff --git a/paddle/phi/kernels/gpu/linspace_kernel.cu b/paddle/phi/kernels/gpu/linspace_kernel.cu index 68ff7c3b0a93de..3a54561c9a1444 100644 --- a/paddle/phi/kernels/gpu/linspace_kernel.cu +++ b/paddle/phi/kernels/gpu/linspace_kernel.cu @@ -88,7 +88,7 @@ void LinspaceKernel(const Context& ctx, "than 0, but received num is %d", num)); - out->Resize(phi::make_ddim({num})); + out->Resize(common::make_ddim({num})); T* out_data = ctx.template Alloc(out); auto stream = ctx.stream(); diff --git a/paddle/phi/kernels/gpu/logspace_kernel.cu b/paddle/phi/kernels/gpu/logspace_kernel.cu index 458600e2f29e14..250e440170d7d1 100644 --- a/paddle/phi/kernels/gpu/logspace_kernel.cu +++ b/paddle/phi/kernels/gpu/logspace_kernel.cu @@ -93,7 +93,7 @@ void LogspaceKernel(const Context& ctx, "than 0, but received num is %d", num)); - out->Resize(phi::make_ddim({num})); + out->Resize(common::make_ddim({num})); T* out_data = ctx.template Alloc(out); double step = 0; diff --git a/paddle/phi/kernels/gpu/logsumexp_kernel.cu b/paddle/phi/kernels/gpu/logsumexp_kernel.cu index ef2c29bbb2da0d..d780f2258651f0 100644 --- a/paddle/phi/kernels/gpu/logsumexp_kernel.cu +++ b/paddle/phi/kernels/gpu/logsumexp_kernel.cu @@ -57,8 +57,8 @@ void LogsumexpFallbackKernel(const Context& dev_ctx, auto* in_x = &x; auto* out_y = out; - auto outdim = phi::make_ddim(outdim_vec); - auto keeped_outdim = phi::make_ddim(keeped_outdim_vec); + auto outdim = common::make_ddim(outdim_vec); + auto keeped_outdim = common::make_ddim(keeped_outdim_vec); out->Resize(outdim); dev_ctx.template Alloc(out_y); @@ -128,7 +128,7 @@ void LogsumexpKernel(const Context& dev_ctx, } } - auto outdim = phi::make_ddim(outdim_vec); + auto outdim = common::make_ddim(outdim_vec); if (compute_size <= 1024) { if (perm.size() != xdim.size()) perm.insert(perm.end(), axis_vec.begin(), axis_vec.end()); @@ -138,7 +138,7 @@ void LogsumexpKernel(const Context& dev_ctx, (axis_vec.size() == 1 && axis_vec[0] == xdim.size())) { transpose_x = x; } else { - transpose_x.Resize(make_ddim(transpose_shape)); + transpose_x.Resize(common::make_ddim(transpose_shape)); dev_ctx.template Alloc(&transpose_x); phi::funcs::TransposeGPUKernelDriver(dev_ctx, x, perm, &transpose_x); } diff --git a/paddle/phi/kernels/gpu/lstsq_kernel.cu b/paddle/phi/kernels/gpu/lstsq_kernel.cu index adb0ca09d89386..85db2de74e6fdd 100644 --- a/paddle/phi/kernels/gpu/lstsq_kernel.cu +++ b/paddle/phi/kernels/gpu/lstsq_kernel.cu @@ -61,22 +61,22 @@ void LstsqKernel(const Context& dev_ctx, T rcond = rcond_scalar.to(); DenseTensor* new_x = new DenseTensor(); - new_x->Resize(phi::make_ddim({batch_count, m, n})); + new_x->Resize(common::make_ddim({batch_count, m, n})); dev_ctx.template Alloc(new_x); phi::Copy(dev_ctx, x, dev_ctx.GetPlace(), true, new_x); DenseTensor* new_y = new DenseTensor(); - new_y->Resize(phi::make_ddim({batch_count, m, nrhs})); + new_y->Resize(common::make_ddim({batch_count, m, nrhs})); dev_ctx.template Alloc(new_y); phi::Copy(dev_ctx, y, dev_ctx.GetPlace(), true, new_y); // Prepare tau - auto tau_dims_vec = phi::vectorize(x_dims); + auto tau_dims_vec = common::vectorize(x_dims); tau_dims_vec.pop_back(); tau_dims_vec[tau_dims_vec.size() - 1] = min_mn; DenseTensor* tau = new DenseTensor(); - tau->Resize(phi::make_ddim(tau_dims_vec)); + tau->Resize(common::make_ddim(tau_dims_vec)); auto tau_data = dev_ctx.template Alloc(tau); if (m >= n) { @@ -108,7 +108,7 @@ void LstsqKernel(const Context& dev_ctx, DenseTensor slice_r = phi::funcs::Slice(dev_ctx, trans_r, {-2}, {0}, {min_mn}); DenseTensor* res_r = new DenseTensor(); - res_r->Resize(phi::make_ddim({batch_count, min_mn, min_mn})); + res_r->Resize(common::make_ddim({batch_count, min_mn, min_mn})); dev_ctx.template Alloc(res_r); phi::TrilTriuKernel(dev_ctx, slice_r, 0, false, res_r); @@ -133,7 +133,7 @@ void LstsqKernel(const Context& dev_ctx, DenseTensor slice_r = phi::funcs::Slice(dev_ctx, trans_r, {-2}, {0}, {min_mn}); DenseTensor* res_r = new DenseTensor(); - res_r->Resize(phi::make_ddim({batch_count, min_mn, min_mn})); + res_r->Resize(common::make_ddim({batch_count, min_mn, min_mn})); dev_ctx.template Alloc(res_r); phi::TrilTriuKernel(dev_ctx, slice_r, 0, false, res_r); @@ -161,7 +161,7 @@ void LstsqKernel(const Context& dev_ctx, dev_ctx, solu_tensor, dev_ctx.GetPlace(), true, solution); } - if (batch_count == 1) solution->Resize(phi::make_ddim({n, nrhs})); + if (batch_count == 1) solution->Resize(common::make_ddim({n, nrhs})); GetResidualsTensor(dev_ctx, x, y, solution, residuals); } diff --git a/paddle/phi/kernels/gpu/lu_kernel.cu b/paddle/phi/kernels/gpu/lu_kernel.cu index d5646d546b67d9..f509e0a173161b 100644 --- a/paddle/phi/kernels/gpu/lu_kernel.cu +++ b/paddle/phi/kernels/gpu/lu_kernel.cu @@ -138,16 +138,16 @@ void LUKernel(const Context& dev_ctx, int n = static_cast(outdims[outrank - 2]); int lda = std::max(1, m); if (pivot) { - auto ipiv_dims = phi::slice_ddim(outdims, 0, outrank - 1); + auto ipiv_dims = common::slice_ddim(outdims, 0, outrank - 1); ipiv_dims[outrank - 2] = std::min(m, n); pivots->Resize(ipiv_dims); } dev_ctx.template Alloc(pivots); auto ipiv_data = pivots->data(); - auto info_dims = phi::slice_ddim(outdims, 0, outrank - 2); + auto info_dims = common::slice_ddim(outdims, 0, outrank - 2); if (info_dims.size() == 0) { - info_dims = phi::make_ddim({1}); + info_dims = common::make_ddim({1}); } infos->Resize(info_dims); dev_ctx.template Alloc(infos); diff --git a/paddle/phi/kernels/gpu/masked_select_grad_kernel.cu b/paddle/phi/kernels/gpu/masked_select_grad_kernel.cu index 4feadcf899a443..4bf5949f084fe5 100644 --- a/paddle/phi/kernels/gpu/masked_select_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/masked_select_grad_kernel.cu @@ -65,8 +65,8 @@ void MaskedSelectGradKernel(const Context& dev_ctx, bool expand_x = false; auto expanded_size = funcs::MatrixGetBroadcastBatchPortion( - vectorize(x_grad->dims()), vectorize(mask.dims())); - auto expaned_dims = make_ddim(expanded_size); + common::vectorize(x_grad->dims()), common::vectorize(mask.dims())); + auto expaned_dims = common::make_ddim(expanded_size); if (mask.dims() != expaned_dims) { ExpandKernel( diff --git a/paddle/phi/kernels/gpu/masked_select_kernel.cu b/paddle/phi/kernels/gpu/masked_select_kernel.cu index cd92d7f03e7dfd..9739f9799a4ec1 100644 --- a/paddle/phi/kernels/gpu/masked_select_kernel.cu +++ b/paddle/phi/kernels/gpu/masked_select_kernel.cu @@ -54,9 +54,9 @@ void MaskedSelectKernel(const Context& dev_ctx, DenseTensor x_expand; auto expanded_size = funcs::MatrixGetBroadcastBatchPortion( - vectorize(x.dims()), vectorize(mask.dims())); + common::vectorize(x.dims()), common::vectorize(mask.dims())); - DDim epxand_dims = make_ddim(expanded_size); + DDim epxand_dims = common::make_ddim(expanded_size); if (mask.dims() != epxand_dims) { phi::ExpandKernel( dev_ctx, mask, IntArray(expanded_size), &mask_expand); diff --git a/paddle/phi/kernels/gpu/mode_kernel.cu b/paddle/phi/kernels/gpu/mode_kernel.cu index ed598b2e75d849..793928177000e6 100644 --- a/paddle/phi/kernels/gpu/mode_kernel.cu +++ b/paddle/phi/kernels/gpu/mode_kernel.cu @@ -56,7 +56,7 @@ void ModeKernel(const Context& dev_ctx, if (axis == in_dims.size() - 1) { const int64_t& input_height = - phi::product(phi::slice_ddim(in_dims, 0, in_dims.size() - 1)); + common::product(common::slice_ddim(in_dims, 0, in_dims.size() - 1)); const int64_t& input_width = in_dims[in_dims.size() - 1]; funcs::GetModebySort( dev_ctx, &x, input_width, input_height, output_data, indices_data); @@ -80,7 +80,7 @@ void ModeKernel(const Context& dev_ctx, for (int i = axis + 1; i < in_dims.size(); i++) { tmp_out_shape.emplace_back(in_dims[i]); } - DDim tmp_out_dim = phi::make_ddim(tmp_out_shape); + DDim tmp_out_dim = common::make_ddim(tmp_out_shape); out->Resize(tmp_out_dim); indices->Resize(tmp_out_dim); } @@ -109,8 +109,8 @@ void ModeKernel(const Context& dev_ctx, trans_out.Resize(trans_out_shape); T* trans_out_data = dev_ctx.template Alloc(&trans_out); - const int64_t input_height = - phi::product(phi::slice_ddim(trans_shape, 0, trans_shape.size() - 1)); + const int64_t input_height = common::product( + common::slice_ddim(trans_shape, 0, trans_shape.size() - 1)); const int64_t input_width = trans_shape[trans_shape.size() - 1]; funcs::GetModebySort(dev_ctx, &trans_input, diff --git a/paddle/phi/kernels/gpu/multinomial_kernel.cu b/paddle/phi/kernels/gpu/multinomial_kernel.cu index 96fc3d1ac2b2e5..635e9189b7d89a 100644 --- a/paddle/phi/kernels/gpu/multinomial_kernel.cu +++ b/paddle/phi/kernels/gpu/multinomial_kernel.cu @@ -22,10 +22,10 @@ limitations under the License. */ namespace cub = hipcub; #endif +#include "paddle/common/ddim.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/common/data_type.h" #include "paddle/phi/common/scalar.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/arg_min_max_kernel.h" #include "paddle/phi/kernels/empty_kernel.h" @@ -193,7 +193,8 @@ void MultinomialKernel(const Context& dev_ctx, ArgMaxKernel( dev_ctx, rand, -1, true, false, DataType::INT64, out); } else { - std::vector out_dim_vec = vectorize(out->dims()); + std::vector out_dim_vec = + common::vectorize(out->dims()); DenseTensor value = Empty(dev_ctx, IntArray(out_dim_vec)); TopkKernel( dev_ctx, rand, num_samples, -1, true, true, &value, out); diff --git a/paddle/phi/kernels/gpu/nanmedian_kernel.cu b/paddle/phi/kernels/gpu/nanmedian_kernel.cu index ccd1b5561f081d..01144442f3904b 100644 --- a/paddle/phi/kernels/gpu/nanmedian_kernel.cu +++ b/paddle/phi/kernels/gpu/nanmedian_kernel.cu @@ -166,10 +166,10 @@ void ProcessMedianKernel(const Context& dev_ctx, bool ignore_nan = true; if (ignore_nan) { - nan_counts.Resize(phi::make_ddim({pre_dim})); + nan_counts.Resize(common::make_ddim({pre_dim})); dev_ctx.template Alloc(&nan_counts); nan_counts_ptr = nan_counts.data(); - nan_stat.Resize(phi::make_ddim({2})); + nan_stat.Resize(common::make_ddim({2})); int64_t* nan_stat_mem = dev_ctx.template Alloc(&nan_stat); int64_t* nan_stat_ptr = nan_stat.data(); diff --git a/paddle/phi/kernels/gpu/nms_kernel.cu b/paddle/phi/kernels/gpu/nms_kernel.cu index 8eab5c261f5863..3de36f7bd68b38 100644 --- a/paddle/phi/kernels/gpu/nms_kernel.cu +++ b/paddle/phi/kernels/gpu/nms_kernel.cu @@ -103,7 +103,7 @@ void NMSKernel(const Context& dev_ctx, } } } - output->Resize(phi::make_ddim({last_box_num})); + output->Resize(common::make_ddim({last_box_num})); auto* output_data = dev_ctx.template Alloc(output); memory_utils::Copy(dev_ctx.GetPlace(), output_data, diff --git a/paddle/phi/kernels/gpu/nonzero_kernel.cu b/paddle/phi/kernels/gpu/nonzero_kernel.cu index bc44f4f033c458..65cdcd3d6a058d 100644 --- a/paddle/phi/kernels/gpu/nonzero_kernel.cu +++ b/paddle/phi/kernels/gpu/nonzero_kernel.cu @@ -20,7 +20,7 @@ namespace cub = hipcub; #endif -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/phi/kernels/gpu/number_count_kernel.cu b/paddle/phi/kernels/gpu/number_count_kernel.cu index e17727751f4bf1..6fdfb71724aef4 100644 --- a/paddle/phi/kernels/gpu/number_count_kernel.cu +++ b/paddle/phi/kernels/gpu/number_count_kernel.cu @@ -77,7 +77,7 @@ void NumberCountKernel(const Context& ctx, DenseTensor* out) { int64_t batch_size = numbers.numel(); - DDim out_dims = phi::make_ddim({upper_range}); + DDim out_dims = common::make_ddim({upper_range}); out->Resize(out_dims); auto out_data = ctx.template Alloc(out); const T* gate_data = numbers.data(); diff --git a/paddle/phi/kernels/gpu/overlap_add_grad_kernel.cu b/paddle/phi/kernels/gpu/overlap_add_grad_kernel.cu index a2ec60109d6404..337620a556db5e 100644 --- a/paddle/phi/kernels/gpu/overlap_add_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/overlap_add_grad_kernel.cu @@ -50,15 +50,16 @@ void OverlapAddGradKernel(const Context& dev_ctx, phi::DDim x_grad_resized_dims; phi::DDim out_grad_resized_dims; if (axis == 0) { - preserved_dims = phi::slice_ddim(out_grad_.dims(), 1, out_grad_rank); + preserved_dims = common::slice_ddim(out_grad_.dims(), 1, out_grad_rank); x_grad_resized_dims = { - n_frames, frame_length, phi::product(preserved_dims)}; - out_grad_resized_dims = {seq_length, phi::product(preserved_dims)}; + n_frames, frame_length, common::product(preserved_dims)}; + out_grad_resized_dims = {seq_length, common::product(preserved_dims)}; } else { - preserved_dims = phi::slice_ddim(out_grad_.dims(), 0, out_grad_rank - 1); + preserved_dims = + common::slice_ddim(out_grad_.dims(), 0, out_grad_rank - 1); x_grad_resized_dims = { - phi::product(preserved_dims), frame_length, n_frames}; - out_grad_resized_dims = {phi::product(preserved_dims), seq_length}; + common::product(preserved_dims), frame_length, n_frames}; + out_grad_resized_dims = {common::product(preserved_dims), seq_length}; } x_grad->Resize(x_grad_resized_dims); out_grad_.Resize(out_grad_resized_dims); @@ -73,31 +74,31 @@ void OverlapAddGradKernel(const Context& dev_ctx, trans_out_grad = out_grad_; std::vector perm_x_grad{1, 0}; - auto x_grad_dims_vec = phi::vectorize(x_grad->dims()); + auto x_grad_dims_vec = common::vectorize(x_grad->dims()); for (int i = 0; i < x_grad->dims().size(); ++i) { x_grad_dims_vec[i] = x_grad->dims()[perm_x_grad[i]]; } - trans_x_grad.Resize(phi::make_ddim(x_grad_dims_vec)); + trans_x_grad.Resize(common::make_ddim(x_grad_dims_vec)); dev_ctx.template Alloc(&trans_x_grad); phi::funcs::TransCompute( perm_x_grad.size(), dev_ctx, *x_grad, &trans_x_grad, perm_x_grad); } else { std::vector perm_d_out{1, 0}; - auto out_grad_dims_vec = phi::vectorize(out_grad_.dims()); + auto out_grad_dims_vec = common::vectorize(out_grad_.dims()); for (int i = 0; i < out_grad_.dims().size(); ++i) { out_grad_dims_vec[i] = out_grad_.dims()[perm_d_out[i]]; } - trans_out_grad.Resize(phi::make_ddim(out_grad_dims_vec)); + trans_out_grad.Resize(common::make_ddim(out_grad_dims_vec)); dev_ctx.template Alloc(&trans_out_grad); phi::funcs::TransCompute( perm_d_out.size(), dev_ctx, out_grad_, &trans_out_grad, perm_d_out); std::vector perm_x_grad{2, 1, 0}; - auto x_grad_dims_vec = phi::vectorize(x_grad->dims()); + auto x_grad_dims_vec = common::vectorize(x_grad->dims()); for (int i = 0; i < x_grad->dims().size(); ++i) { x_grad_dims_vec[i] = x_grad->dims()[perm_x_grad[i]]; } - trans_x_grad.Resize(phi::make_ddim(x_grad_dims_vec)); + trans_x_grad.Resize(common::make_ddim(x_grad_dims_vec)); dev_ctx.template Alloc(&trans_x_grad); phi::funcs::TransCompute( perm_x_grad.size(), dev_ctx, *x_grad, &trans_x_grad, perm_x_grad); @@ -146,7 +147,7 @@ void OverlapAddGradKernel(const Context& dev_ctx, restored_x_grad_shape.push_back(n_frames); } - x_grad->Resize(phi::make_ddim(restored_x_grad_shape)); + x_grad->Resize(common::make_ddim(restored_x_grad_shape)); } } diff --git a/paddle/phi/kernels/gpu/overlap_add_kernel.cu b/paddle/phi/kernels/gpu/overlap_add_kernel.cu index b8726b8d8e15ad..71668e9e10b43a 100644 --- a/paddle/phi/kernels/gpu/overlap_add_kernel.cu +++ b/paddle/phi/kernels/gpu/overlap_add_kernel.cu @@ -46,13 +46,15 @@ void OverlapAddKernel(const Context& dev_ctx, phi::DDim x_resized_dims; phi::DDim out_resized_dims; if (axis == 0) { - preserved_dims = phi::slice_ddim(out->dims(), 1, out_rank); - x_resized_dims = {n_frames, frame_length, phi::product(preserved_dims)}; - out_resized_dims = {seq_length, phi::product(preserved_dims)}; + preserved_dims = common::slice_ddim(out->dims(), 1, out_rank); + x_resized_dims = { + n_frames, frame_length, common::product(preserved_dims)}; + out_resized_dims = {seq_length, common::product(preserved_dims)}; } else { - preserved_dims = phi::slice_ddim(out->dims(), 0, out_rank - 1); - x_resized_dims = {phi::product(preserved_dims), frame_length, n_frames}; - out_resized_dims = {phi::product(preserved_dims), seq_length}; + preserved_dims = common::slice_ddim(out->dims(), 0, out_rank - 1); + x_resized_dims = { + common::product(preserved_dims), frame_length, n_frames}; + out_resized_dims = {common::product(preserved_dims), seq_length}; } x_.Resize(x_resized_dims); out->Resize(out_resized_dims); @@ -67,31 +69,31 @@ void OverlapAddKernel(const Context& dev_ctx, trans_out = *out; std::vector perm_x{1, 0}; - auto x_dims_vec = phi::vectorize(x_.dims()); + auto x_dims_vec = common::vectorize(x_.dims()); for (int i = 0; i < x_.dims().size(); ++i) { x_dims_vec[i] = x_.dims()[perm_x[i]]; } - trans_x.Resize(phi::make_ddim(x_dims_vec)); + trans_x.Resize(common::make_ddim(x_dims_vec)); dev_ctx.template Alloc(&trans_x); phi::funcs::TransCompute( perm_x.size(), dev_ctx, x_, &trans_x, perm_x); } else { std::vector perm_out{1, 0}; - auto out_dims_vec = phi::vectorize(out->dims()); + auto out_dims_vec = common::vectorize(out->dims()); for (int i = 0; i < out->dims().size(); ++i) { out_dims_vec[i] = out->dims()[perm_out[i]]; } - trans_out.Resize(phi::make_ddim(out_dims_vec)); + trans_out.Resize(common::make_ddim(out_dims_vec)); dev_ctx.template Alloc(&trans_out); phi::funcs::TransCompute( perm_out.size(), dev_ctx, *out, &trans_out, perm_out); std::vector perm_x{2, 1, 0}; - auto x_dims_vec = phi::vectorize(x_.dims()); + auto x_dims_vec = common::vectorize(x_.dims()); for (int i = 0; i < x_.dims().size(); ++i) { x_dims_vec[i] = x_.dims()[perm_x[i]]; } - trans_x.Resize(phi::make_ddim(x_dims_vec)); + trans_x.Resize(common::make_ddim(x_dims_vec)); dev_ctx.template Alloc(&trans_x); phi::funcs::TransCompute( perm_x.size(), dev_ctx, x_, &trans_x, perm_x); @@ -132,7 +134,7 @@ void OverlapAddKernel(const Context& dev_ctx, restored_out_shape.push_back(seq_length); } - out->Resize(phi::make_ddim(restored_out_shape)); + out->Resize(common::make_ddim(restored_out_shape)); } } diff --git a/paddle/phi/kernels/gpu/p_recv_kernel.cu b/paddle/phi/kernels/gpu/p_recv_kernel.cu index 1e413797b6b893..b6fd090173260f 100644 --- a/paddle/phi/kernels/gpu/p_recv_kernel.cu +++ b/paddle/phi/kernels/gpu/p_recv_kernel.cu @@ -16,9 +16,9 @@ #include "glog/logging.h" +#include "paddle/common/ddim.h" #include "paddle/phi/backends/all_context.h" #include "paddle/phi/common/memory_utils.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/kernel_registry.h" #if defined(PADDLE_WITH_NCCL) || \ @@ -168,7 +168,7 @@ void PRecvArrayKernel(const Context& dev_ctx, dev_ctx.Alloc(&out, dtype); comm_ctx->Recv(&out, out.numel(), peer, stream); VLOG(3) << "rank " << comm_ctx->GetRank() << " recv " - << phi::product(out_dims) << " from " << peer; + << common::product(out_dims) << " from " << peer; } #else PADDLE_THROW( diff --git a/paddle/phi/kernels/gpu/p_send_kernel.cu b/paddle/phi/kernels/gpu/p_send_kernel.cu index 520adcf730a1d6..efbb69afcdab75 100644 --- a/paddle/phi/kernels/gpu/p_send_kernel.cu +++ b/paddle/phi/kernels/gpu/p_send_kernel.cu @@ -156,7 +156,7 @@ void PSendArrayKernel(const Context& dev_ctx, ncclDataType_t dtype = ToNCCLDataType(x.type()); comm_ctx->Send(x, x.numel(), peer, stream); VLOG(3) << "rank " << comm_ctx->GetRank() << " send " - << phi::product(x.dims()) << " to " << peer; + << common::product(x.dims()) << " to " << peer; } #else PADDLE_THROW( diff --git a/paddle/phi/kernels/gpu/qr_kernel.cu b/paddle/phi/kernels/gpu/qr_kernel.cu index 14f602cc95bd62..5bbb2ef158aa1a 100644 --- a/paddle/phi/kernels/gpu/qr_kernel.cu +++ b/paddle/phi/kernels/gpu/qr_kernel.cu @@ -40,7 +40,7 @@ static DenseTensor Fill(const Context& ctx, std::vector shape, float fill_value) { DenseTensor ret; - ret.Resize(make_ddim(shape)); + ret.Resize(common::make_ddim(shape)); ctx.template Alloc(&ret); funcs::SetConstant()(ctx, &ret, T(fill_value)); return ret; @@ -85,7 +85,7 @@ void QrKernel(const Context& ctx, phi::Copy(ctx, x, ctx.GetPlace(), false, &qr); // Prepare tau - auto tau_dims_vec = phi::vectorize(x_dims); + auto tau_dims_vec = common::vectorize(x_dims); tau_dims_vec.pop_back(); tau_dims_vec[tau_dims_vec.size() - 1] = min_mn; DenseTensor tau = Fill(ctx, tau_dims_vec, 0); @@ -133,7 +133,7 @@ void QrKernel(const Context& ctx, phi::Copy(ctx, sliced_q, q->place(), false, q); } else { if (m > n) { - auto new_qr_dims_vec = phi::vectorize(x_dims); + auto new_qr_dims_vec = common::vectorize(x_dims); new_qr_dims_vec[new_qr_dims_vec.size() - 1] = m; DenseTensor new_qr = Fill(ctx, new_qr_dims_vec, 0); auto new_qr_data = ctx.template Alloc>(&new_qr); @@ -195,11 +195,11 @@ void BatchedGeqrf(const GPUContext& dev_ctx, phi::dynload::cusolverDnSgeqrf_bufferSize(handle, m, n, a, lda, &lwork)); DenseTensor workspace = DenseTensor(); - workspace.Resize(make_ddim({lwork})); + workspace.Resize(common::make_ddim({lwork})); float* workspace_ptr = dev_ctx.template Alloc(&workspace); DenseTensor info = DenseTensor(); - info.Resize(make_ddim({1})); + info.Resize(common::make_ddim({1})); int* info_d = dev_ctx.template Alloc(&info); for (int i = 0; i < batch_size; ++i) { @@ -249,11 +249,11 @@ void BatchedGeqrf(const GPUContext& dev_ctx, phi::dynload::cusolverDnDgeqrf_bufferSize(handle, m, n, a, lda, &lwork)); DenseTensor workspace = DenseTensor(); - workspace.Resize(make_ddim({lwork})); + workspace.Resize(common::make_ddim({lwork})); double* workspace_ptr = dev_ctx.template Alloc(&workspace); DenseTensor info = DenseTensor(); - info.Resize(make_ddim({1})); + info.Resize(common::make_ddim({1})); int* info_d = dev_ctx.template Alloc(&info); for (int i = 0; i < batch_size; ++i) { @@ -304,11 +304,11 @@ void BatchedOrgqr(const GPUContext& dev_ctx, handle, m, n, k, a, lda, tau, &lwork)); DenseTensor workspace = DenseTensor(); - workspace.Resize(make_ddim({lwork})); + workspace.Resize(common::make_ddim({lwork})); float* workspace_ptr = dev_ctx.template Alloc(&workspace); DenseTensor info = DenseTensor(); - info.Resize(make_ddim({1})); + info.Resize(common::make_ddim({1})); int* info_d = dev_ctx.template Alloc(&info); for (int i = 0; i < batch_size; ++i) { @@ -360,11 +360,11 @@ void BatchedOrgqr(const GPUContext& dev_ctx, handle, m, n, k, a, lda, tau, &lwork)); DenseTensor workspace = DenseTensor(); - workspace.Resize(make_ddim({lwork})); + workspace.Resize(common::make_ddim({lwork})); double* workspace_ptr = dev_ctx.template Alloc(&workspace); DenseTensor info = DenseTensor(); - info.Resize(make_ddim({1})); + info.Resize(common::make_ddim({1})); int* info_d = dev_ctx.template Alloc(&info); for (int i = 0; i < batch_size; ++i) { diff --git a/paddle/phi/kernels/gpu/randint_kernel.cu b/paddle/phi/kernels/gpu/randint_kernel.cu index 39a57a5a6e8653..22c32b883bf08a 100644 --- a/paddle/phi/kernels/gpu/randint_kernel.cu +++ b/paddle/phi/kernels/gpu/randint_kernel.cu @@ -31,7 +31,7 @@ void RandintKernel(const Context& dev_ctx, DataType dtype, DenseTensor* out) { int seed = 0; - out->Resize(phi::make_ddim(shape.GetData())); + out->Resize(common::make_ddim(shape.GetData())); T* data = dev_ctx.template Alloc(out); funcs::uniform_distribution dist; funcs::uniform_int_transform trans(low, high); diff --git a/paddle/phi/kernels/gpu/randperm_kernel.cu b/paddle/phi/kernels/gpu/randperm_kernel.cu index 4c6597b93f91fd..f439336cc1e709 100644 --- a/paddle/phi/kernels/gpu/randperm_kernel.cu +++ b/paddle/phi/kernels/gpu/randperm_kernel.cu @@ -107,7 +107,7 @@ void RandpermKernel(const Context& dev_ctx, range_data[idx] = static_cast(idx); }); - out->Resize(phi::make_ddim({n})); + out->Resize(common::make_ddim({n})); T* out_data = dev_ctx.template Alloc(out); // Refer to [Algorithm of randperm] https://osf.io/af2hy/ to diff --git a/paddle/phi/kernels/gpu/reduce_amin_amax_common.h b/paddle/phi/kernels/gpu/reduce_amin_amax_common.h index b04267030b2846..02dfa4348f5c0d 100644 --- a/paddle/phi/kernels/gpu/reduce_amin_amax_common.h +++ b/paddle/phi/kernels/gpu/reduce_amin_amax_common.h @@ -42,7 +42,7 @@ void ReduceCudaAMaxAMinGrad(const Context& dev_ctx, // get reduce_dim and reduce_num for reduce_mean_grad int dim_size = in_x->dims().size(); auto reduce_dims = funcs::details::GetReduceDim(dims, dim_size, reduce_all); - auto update_dims = vectorize(d_x->dims()); + auto update_dims = common::vectorize(d_x->dims()); int reduce_num = 1; for (auto i : reduce_dims) { reduce_num *= (in_x->dims())[i]; @@ -52,12 +52,12 @@ void ReduceCudaAMaxAMinGrad(const Context& dev_ctx, // make new tensor reduce_out phi::DenseTensor new_y(out_y->type()); new_y.ShareDataWith(*out_y); - new_y.Resize(phi::make_ddim(update_dims)); + new_y.Resize(common::make_ddim(update_dims)); // make new tensor d_out phi::DenseTensor new_dout(d_out->type()); new_dout.ShareDataWith(*d_out); - new_dout.Resize(phi::make_ddim(update_dims)); + new_dout.Resize(common::make_ddim(update_dims)); dev_ctx.Alloc(d_x, d_out->dtype()); auto new_in = std::make_unique(*in_x); @@ -74,7 +74,7 @@ void ReduceCudaAMaxAMinGrad(const Context& dev_ctx, // make new tensor equal_count phi::DenseTensor* equal_count = new phi::DenseTensor(); - equal_count->Resize(phi::make_ddim(update_dims)); + equal_count->Resize(common::make_ddim(update_dims)); dev_ctx.template Alloc(equal_count); // compute diff --git a/paddle/phi/kernels/gpu/reduce_grad.h b/paddle/phi/kernels/gpu/reduce_grad.h index 7e01c1ae843910..0a01fe1ff1aab4 100644 --- a/paddle/phi/kernels/gpu/reduce_grad.h +++ b/paddle/phi/kernels/gpu/reduce_grad.h @@ -61,7 +61,7 @@ void ReduceGradKernel(const Context& dev_ctx, std::vector reduce_dims = funcs::details::GetReduceDim(dims, dim_size, reduce_all); - auto update_dims = vectorize(d_x->dims()); + auto update_dims = common::vectorize(d_x->dims()); int reduce_num = 1; for (auto i : reduce_dims) { reduce_num *= (in_x->dims())[i]; @@ -70,7 +70,7 @@ void ReduceGradKernel(const Context& dev_ctx, // make new tensor DenseTensor new_d_out(d_out->dtype()); new_d_out.ShareDataWith(*d_out); - new_d_out.Resize(phi::make_ddim(update_dims)); + new_d_out.Resize(common::make_ddim(update_dims)); dev_ctx.Alloc(d_x, x.dtype()); diff --git a/paddle/phi/kernels/gpu/reduce_kernel.cu b/paddle/phi/kernels/gpu/reduce_kernel.cu index d9714d37febd9b..51b50ed6e00248 100644 --- a/paddle/phi/kernels/gpu/reduce_kernel.cu +++ b/paddle/phi/kernels/gpu/reduce_kernel.cu @@ -52,7 +52,7 @@ void ReduceSumGradKernel(const Context& dev_ctx, std::vector reduce_dims = funcs::details::GetReduceDim(dims.GetData(), dim_size, reduce_all); - auto update_dims = vectorize(x.dims()); + auto update_dims = common::vectorize(x.dims()); for (auto i : reduce_dims) { update_dims[i] = 1; } @@ -60,7 +60,7 @@ void ReduceSumGradKernel(const Context& dev_ctx, // make new tensor DenseTensor new_out_grad(out_grad.dtype()); new_out_grad.ShareDataWith(out_grad); - new_out_grad.Resize(phi::make_ddim(update_dims)); + new_out_grad.Resize(common::make_ddim(update_dims)); // call ReduceGrad dev_ctx.Alloc(x_grad, x.dtype()); @@ -89,7 +89,7 @@ void ReduceMinGradKernel(const Context& dev_ctx, int dim_size = x.dims().size(); auto reduce_dims = funcs::details::GetReduceDim(dims.GetData(), dim_size, reduce_all); - auto update_dims = vectorize(x.dims()); + auto update_dims = common::vectorize(x.dims()); for (auto i : reduce_dims) { update_dims[i] = 1; } @@ -97,11 +97,11 @@ void ReduceMinGradKernel(const Context& dev_ctx, // make new tensor of out and out_grad phi::DenseTensor new_out(out.type()); new_out.ShareDataWith(out); - new_out.Resize(phi::make_ddim(update_dims)); + new_out.Resize(common::make_ddim(update_dims)); phi::DenseTensor new_out_grad(out_grad.type()); new_out_grad.ShareDataWith(out_grad); - new_out_grad.Resize(phi::make_ddim(update_dims)); + new_out_grad.Resize(common::make_ddim(update_dims)); // make equal_out phi::DenseTensor* equal_out = new phi::DenseTensor(); @@ -134,7 +134,7 @@ void ReduceMeanGradKernel(const Context& dev_ctx, std::vector reduce_dims = funcs::details::GetReduceDim(dims.GetData(), dim_size, reduce_all); - auto update_dims = vectorize(x.dims()); + auto update_dims = common::vectorize(x.dims()); int reduce_num = 1; for (auto i : reduce_dims) { reduce_num *= (x.dims())[i]; @@ -144,7 +144,7 @@ void ReduceMeanGradKernel(const Context& dev_ctx, // make new tensor DenseTensor new_out_grad(out_grad.dtype()); new_out_grad.ShareDataWith(out_grad); - new_out_grad.Resize(phi::make_ddim(update_dims)); + new_out_grad.Resize(common::make_ddim(update_dims)); // call BroadcastKernel dev_ctx.Alloc(x_grad, x.dtype()); @@ -172,7 +172,7 @@ void ReduceMaxGradKernel(const Context& dev_ctx, int dim_size = x.dims().size(); auto reduce_dims = funcs::details::GetReduceDim(dims.GetData(), dim_size, reduce_all); - auto update_dims = vectorize(x.dims()); + auto update_dims = common::vectorize(x.dims()); for (auto i : reduce_dims) { update_dims[i] = 1; } @@ -180,11 +180,11 @@ void ReduceMaxGradKernel(const Context& dev_ctx, // make new tensor of out and out_grad phi::DenseTensor new_out(out.type()); new_out.ShareDataWith(out); - new_out.Resize(phi::make_ddim(update_dims)); + new_out.Resize(common::make_ddim(update_dims)); phi::DenseTensor new_out_grad(out_grad.type()); new_out_grad.ShareDataWith(out_grad); - new_out_grad.Resize(phi::make_ddim(update_dims)); + new_out_grad.Resize(common::make_ddim(update_dims)); // make equal_out phi::DenseTensor* equal_out = new phi::DenseTensor(); diff --git a/paddle/phi/kernels/gpu/roi_pool_kernel.cu b/paddle/phi/kernels/gpu/roi_pool_kernel.cu index f7a53636fcbf65..75bdf5d4664529 100644 --- a/paddle/phi/kernels/gpu/roi_pool_kernel.cu +++ b/paddle/phi/kernels/gpu/roi_pool_kernel.cu @@ -111,7 +111,7 @@ void RoiPoolKernel(const Context& dev_ctx, DenseTensor* arg_max) { auto x_dims = x.dims(); int batch_size = x_dims[0]; - auto in_stride = phi::stride(x_dims); + auto in_stride = common::stride(x_dims); int channels = x_dims[1]; int height = x_dims[2]; int width = x_dims[3]; diff --git a/paddle/phi/kernels/gpu/roll_grad_kernel.cu b/paddle/phi/kernels/gpu/roll_grad_kernel.cu index 71d1cd356a2692..7239868e78159e 100644 --- a/paddle/phi/kernels/gpu/roll_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/roll_grad_kernel.cu @@ -37,7 +37,7 @@ void RollGradKernel(const Context& dev_ctx, int64_t numel = out_grad.numel(); auto input_dim = out_grad.dims(); - auto stride_dim = phi::stride(input_dim); + auto stride_dim = common::stride(input_dim); std::vector strides(rank), sizes(rank); if (axis.size() == 0) { diff --git a/paddle/phi/kernels/gpu/roll_kernel.cu b/paddle/phi/kernels/gpu/roll_kernel.cu index cf4f87ac118546..718abfe46994b2 100644 --- a/paddle/phi/kernels/gpu/roll_kernel.cu +++ b/paddle/phi/kernels/gpu/roll_kernel.cu @@ -14,11 +14,11 @@ #include "paddle/phi/kernels/roll_kernel.h" +#include "paddle/common/array.h" #include "paddle/phi/common/bfloat16.h" #include "paddle/phi/common/complex.h" #include "paddle/phi/common/float16.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/core/utils/array.h" #include "paddle/phi/kernels/gpu/roll_kernel_impl.h" namespace phi { @@ -37,7 +37,7 @@ void RollKernel(const Context& dev_ctx, int64_t numel = x.numel(); auto input_dim = x.dims(); - auto stride_dim = phi::stride(input_dim); + auto stride_dim = common::stride(input_dim); std::vector strides(rank), sizes(rank); if (axis.size() == 0) { diff --git a/paddle/phi/kernels/gpu/roll_kernel_impl.h b/paddle/phi/kernels/gpu/roll_kernel_impl.h index c7ffcb2d5ca522..7689f5242a1223 100644 --- a/paddle/phi/kernels/gpu/roll_kernel_impl.h +++ b/paddle/phi/kernels/gpu/roll_kernel_impl.h @@ -14,8 +14,8 @@ #pragma once +#include "paddle/common/array.h" #include "paddle/phi/backends/gpu/gpu_primitives.h" -#include "paddle/phi/core/utils/array.h" #include "paddle/phi/kernels/primitive/kernel_primitives.h" namespace phi { diff --git a/paddle/phi/kernels/gpu/send_u_recv_kernel.cu b/paddle/phi/kernels/gpu/send_u_recv_kernel.cu index 5e34c490d82999..192257ed8f95c0 100644 --- a/paddle/phi/kernels/gpu/send_u_recv_kernel.cu +++ b/paddle/phi/kernels/gpu/send_u_recv_kernel.cu @@ -47,11 +47,11 @@ void GraphSendRecvOpCUDAKernelLaunchHelper(const Context& ctx, } } else { // Set out dim following out_size. - std::vector dims_ = phi::vectorize(out->dims()); + std::vector dims_ = common::vectorize(out->dims()); if (dims_.size() > 0) { dims_[0] = out_size; } - out->Resize(phi::make_ddim(dims_)); + out->Resize(common::make_ddim(dims_)); memset_size = out_size; for (int i = 1; i < src_dims.size(); ++i) { memset_size *= src_dims[i]; diff --git a/paddle/phi/kernels/gpu/send_ue_recv_kernel.cu b/paddle/phi/kernels/gpu/send_ue_recv_kernel.cu index 7274b391e8d135..07c81d86f61014 100644 --- a/paddle/phi/kernels/gpu/send_ue_recv_kernel.cu +++ b/paddle/phi/kernels/gpu/send_ue_recv_kernel.cu @@ -44,13 +44,13 @@ void GraphSendUERecvOpCUDAKernelLaunchHelper(const Context& ctx, const int& index_size = src_index.dims()[0]; auto out_dims = out->dims(); int64_t memset_size = 1; - std::vector dims_ = phi::vectorize(out_dims); + std::vector dims_ = common::vectorize(out_dims); if (out_size <= 0) { dims_[0] = x.dims()[0]; } else { dims_[0] = out_size; } - out->Resize(phi::make_ddim(dims_)); + out->Resize(common::make_ddim(dims_)); for (size_t i = 0; i < dims_.size(); i++) { memset_size *= dims_[i]; } diff --git a/paddle/phi/kernels/gpu/send_uv_grad_kernel.cu b/paddle/phi/kernels/gpu/send_uv_grad_kernel.cu index c50b1960d00563..f5aea524031d24 100644 --- a/paddle/phi/kernels/gpu/send_uv_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/send_uv_grad_kernel.cu @@ -81,7 +81,7 @@ void CalculateGrad(const Context& ctx, out_grad, d_index, s_index, index_size, slice_size, x_grad); } else { const auto& bcast_info = phi::CalcBCastInfo(out_grad_dims, x_grad_dims); - auto out_grad_dims_1 = phi::vectorize(out_grad_dims); + auto out_grad_dims_1 = common::vectorize(out_grad_dims); std::vector out_grad_dims_2(out_grad_dims_1.begin() + 1, out_grad_dims_1.end()); out_grad_dims_2.insert(out_grad_dims_2.begin(), x_grad_dims[0]); @@ -160,7 +160,7 @@ void CalculateGrad(const Context& ctx, mul_functor, sum_functor); } else { - auto out_grad_dims_1 = phi::vectorize(out_grad_dims); + auto out_grad_dims_1 = common::vectorize(out_grad_dims); std::vector out_grad_dims_2(out_grad_dims_1.begin() + 1, out_grad_dims_1.end()); out_grad_dims_2.insert(out_grad_dims_2.begin(), x_grad_dims[0]); diff --git a/paddle/phi/kernels/gpu/shuffle_batch_grad_kernel.cu b/paddle/phi/kernels/gpu/shuffle_batch_grad_kernel.cu index 33b39666edf071..9472861a64c8e3 100644 --- a/paddle/phi/kernels/gpu/shuffle_batch_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/shuffle_batch_grad_kernel.cu @@ -21,9 +21,9 @@ #include #endif +#include "paddle/common/errors.h" #include "paddle/phi/common/memory_utils.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/for_range.h" diff --git a/paddle/phi/kernels/gpu/shuffle_batch_kernel.cu b/paddle/phi/kernels/gpu/shuffle_batch_kernel.cu index e145e7e1c8a206..e1bc107e214f78 100644 --- a/paddle/phi/kernels/gpu/shuffle_batch_kernel.cu +++ b/paddle/phi/kernels/gpu/shuffle_batch_kernel.cu @@ -21,9 +21,9 @@ #include #endif +#include "paddle/common/errors.h" #include "paddle/phi/common/memory_utils.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/for_range.h" @@ -49,7 +49,7 @@ void ShuffleBatchKernel(const Context& dev_ctx, for (int i = 0; i < x.dims().size() - 1; i++) { elem_size *= x.dims()[i]; } - shuffleidx->Resize(phi::make_ddim({elem_size})); + shuffleidx->Resize(common::make_ddim({elem_size})); int64_t seed_int = 0; if (seed.initialized()) { @@ -92,7 +92,7 @@ void ShuffleBatchKernel(const Context& dev_ctx, phi::funcs::ForRange for_range(dev_ctx, elem_size * x_embed_size); for_range(functor); - seed_out->Resize(phi::make_ddim({1})); + seed_out->Resize(common::make_ddim({1})); auto* seed_out_data = dev_ctx.template HostAlloc(seed_out); *seed_out_data = engine(); #endif diff --git a/paddle/phi/kernels/gpu/sigmoid_cross_entropy_with_logits_grad_kernel.cu b/paddle/phi/kernels/gpu/sigmoid_cross_entropy_with_logits_grad_kernel.cu index a6e627a5fb4bf2..ffc3055c27acde 100644 --- a/paddle/phi/kernels/gpu/sigmoid_cross_entropy_with_logits_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/sigmoid_cross_entropy_with_logits_grad_kernel.cu @@ -124,7 +124,7 @@ void SigmoidCrossEntropyWithLogitsGradKernel( DenseTensor *norm_tensor = new DenseTensor(); norm_tensor->Resize({sizeof(T)}); dev_ctx.template Alloc(norm_tensor); - auto dims = phi::vectorize(counts_tensor->dims()); + auto dims = common::vectorize(counts_tensor->dims()); std::vector reduce_dim = {}; for (int i = 0; i < dims.size(); i++) { reduce_dim.push_back(i); diff --git a/paddle/phi/kernels/gpu/sigmoid_cross_entropy_with_logits_kernel.cu b/paddle/phi/kernels/gpu/sigmoid_cross_entropy_with_logits_kernel.cu index 966c85506a128a..f94c09922980b7 100644 --- a/paddle/phi/kernels/gpu/sigmoid_cross_entropy_with_logits_kernel.cu +++ b/paddle/phi/kernels/gpu/sigmoid_cross_entropy_with_logits_kernel.cu @@ -126,7 +126,7 @@ void SigmoidCrossEntropyWithLogitsKernel( DenseTensor *norm_tensor = new DenseTensor(); norm_tensor->Resize({sizeof(T)}); dev_ctx.template Alloc(norm_tensor); - auto dims = phi::vectorize(counts_tensor->dims()); + auto dims = common::vectorize(counts_tensor->dims()); std::vector reduce_dim = {}; for (int i = 0; i < dims.size(); i++) { reduce_dim.push_back(i); diff --git a/paddle/phi/kernels/gpu/strided_copy_kernel.cu b/paddle/phi/kernels/gpu/strided_copy_kernel.cu index fc452eb44973dd..ae173b5f03528f 100644 --- a/paddle/phi/kernels/gpu/strided_copy_kernel.cu +++ b/paddle/phi/kernels/gpu/strided_copy_kernel.cu @@ -779,9 +779,9 @@ bool LaunchStrided2ContiguousCazeOneKernel( template __global__ void Strided2ContiguousDefaultFunc( const T* input_data, - phi::Array input_stride, + Array input_stride, T* output_data, - phi::Array dims, + Array dims, const int64_t numel) { int64_t gid = blockIdx.x * blockDim.x + threadIdx.x; #pragma unroll @@ -1185,8 +1185,8 @@ template __global__ void Contiguous2StridedDefaultFunc( const T* input_data, T* output_data, - phi::Array output_stride, - phi::Array dims, + Array output_stride, + Array dims, const int64_t numel) { int64_t gid = blockIdx.x * blockDim.x + threadIdx.x; #pragma unroll @@ -1265,8 +1265,8 @@ void StridedCopyKernel(const Context& dev_ctx, int64_t offset, DenseTensor* out) { phi::DenseTensorMeta meta = input.meta(); - meta.strides = phi::make_ddim(out_stride); - meta.dims = phi::make_ddim(dims); + meta.strides = common::make_ddim(out_stride); + meta.dims = common::make_ddim(dims); meta.offset = offset; out->set_meta(meta); @@ -1286,8 +1286,8 @@ void StridedCopyKernel(const Context& dev_ctx, const T* input_data = input.data(); int rank = input.dims().size(); - phi::Array input_dims; - phi::Array input_stride; + Array input_dims; + Array input_stride; for (int i = 0; i < input.dims().size(); i++) { input_dims[i] = input.dims()[i]; input_stride[i] = input.strides()[i]; @@ -1299,7 +1299,7 @@ void StridedCopyKernel(const Context& dev_ctx, "StridedCopyKernel's out tensor must complete " "mutable data before call kernel.")); - phi::Array output_stride; + Array output_stride; for (int i = 0; i < meta.dims.size(); i++) { output_stride[i] = meta.strides[i]; } diff --git a/paddle/phi/kernels/gpu/temporal_shift_grad_kernel.cu b/paddle/phi/kernels/gpu/temporal_shift_grad_kernel.cu index b50fad637d106e..8d3a8ee2114bd9 100644 --- a/paddle/phi/kernels/gpu/temporal_shift_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/temporal_shift_grad_kernel.cu @@ -14,8 +14,8 @@ #include "paddle/phi/kernels/temporal_shift_grad_kernel.h" +#include "paddle/common/layout.h" #include "paddle/phi/backends/gpu/gpu_context.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/core/kernel_registry.h" namespace phi { @@ -98,7 +98,7 @@ void TemporalShiftGradKernel(const Context& dev_ctx, auto* input_grad = x_grad; auto* output_grad = &out_grad; int t = seg_num; - const DataLayout data_layout = phi::StringToDataLayout(data_format_str); + const DataLayout data_layout = common::StringToDataLayout(data_format_str); const int nt = output_grad->dims()[0]; const int c = (data_layout == DataLayout::kNCHW ? output_grad->dims()[1] @@ -117,8 +117,8 @@ void TemporalShiftGradKernel(const Context& dev_ctx, const int c2 = static_cast(c * 2 * shift_ratio); DDim in_grad_dims = - (data_layout == DataLayout::kNCHW ? phi::make_ddim({nt, c, h, w}) - : phi::make_ddim({nt, h, w, c})); + (data_layout == DataLayout::kNCHW ? common::make_ddim({nt, c, h, w}) + : common::make_ddim({nt, h, w, c})); const T* output_grad_data = output_grad->data(); input_grad->Resize(in_grad_dims); T* input_grad_data = dev_ctx.template Alloc(input_grad); diff --git a/paddle/phi/kernels/gpu/temporal_shift_kernel.cu b/paddle/phi/kernels/gpu/temporal_shift_kernel.cu index 4904da296488f3..5867fa98e01641 100644 --- a/paddle/phi/kernels/gpu/temporal_shift_kernel.cu +++ b/paddle/phi/kernels/gpu/temporal_shift_kernel.cu @@ -14,8 +14,8 @@ #include "paddle/phi/kernels/temporal_shift_kernel.h" +#include "paddle/common/layout.h" #include "paddle/phi/backends/gpu/gpu_context.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/core/kernel_registry.h" namespace phi { @@ -98,7 +98,7 @@ void TemporalShiftKernel(const Context& dev_ctx, auto* input = &x; auto* output = out; int t = seg_num; - const DataLayout data_layout = phi::StringToDataLayout(data_format_str); + const DataLayout data_layout = common::StringToDataLayout(data_format_str); const int nt = input->dims()[0]; const int c = @@ -117,8 +117,8 @@ void TemporalShiftKernel(const Context& dev_ctx, const int c2 = static_cast(c * 2 * shift_ratio); DDim out_dims = - (data_layout == DataLayout::kNCHW ? phi::make_ddim({nt, c, h, w}) - : phi::make_ddim({nt, h, w, c})); + (data_layout == DataLayout::kNCHW ? common::make_ddim({nt, c, h, w}) + : common::make_ddim({nt, h, w, c})); const T* input_data = input->data(); output->Resize(out_dims); T* output_data = dev_ctx.template Alloc(output); diff --git a/paddle/phi/kernels/gpu/tile_kernel.cu b/paddle/phi/kernels/gpu/tile_kernel.cu index 7861a2bdf01f87..06b07437cf660e 100644 --- a/paddle/phi/kernels/gpu/tile_kernel.cu +++ b/paddle/phi/kernels/gpu/tile_kernel.cu @@ -45,7 +45,7 @@ void TileKernel(const Context& dev_ctx, repeat_times_data[i])); } - auto vec_x_dims = phi::vectorize(x_dims); + auto vec_x_dims = common::vectorize(x_dims); if (repeat_times_data.size() < vec_x_dims.size()) { int diff = vec_x_dims.size() - repeat_times_data.size(); repeat_times_data.insert(repeat_times_data.begin(), diff, 1); @@ -63,19 +63,19 @@ void TileKernel(const Context& dev_ctx, vec_x_dims.size(), repeat_times_data.size())); - DDim new_x_dims = make_ddim(vec_x_dims); + DDim new_x_dims = common::make_ddim(vec_x_dims); DDim out_dims(new_x_dims); DenseTensor new_x = x; vec_x_dims.insert(vec_x_dims.begin(), 1, 1); for (size_t i = 0; i < repeat_times_data.size(); ++i) { out_dims[i] *= repeat_times_data[i]; - new_x.Resize(make_ddim(vec_x_dims)); + new_x.Resize(common::make_ddim(vec_x_dims)); std::vector ins = {&new_x}; vec_x_dims[i] *= repeat_times_data[i]; if (i != repeat_times_data.size() - 1) { if (repeat_times_data[i] != 1) { DenseTensor tmp_out; - tmp_out.Resize(make_ddim(vec_x_dims)); + tmp_out.Resize(common::make_ddim(vec_x_dims)); dev_ctx.template Alloc(&tmp_out); std::vector outs = {&tmp_out}; phi::funcs::BroadcastKernel( @@ -86,7 +86,7 @@ void TileKernel(const Context& dev_ctx, vec_x_dims[i] *= vec_x_dims[i + 1]; vec_x_dims[i + 1] = 1; } else { - out->Resize(make_ddim(vec_x_dims)); + out->Resize(common::make_ddim(vec_x_dims)); dev_ctx.template Alloc(out); std::vector outs = {out}; phi::funcs::BroadcastKernel( diff --git a/paddle/phi/kernels/gpu/top_k_kernel.cu b/paddle/phi/kernels/gpu/top_k_kernel.cu index c5ac9f244d9682..aa8eb2c4969deb 100644 --- a/paddle/phi/kernels/gpu/top_k_kernel.cu +++ b/paddle/phi/kernels/gpu/top_k_kernel.cu @@ -98,7 +98,7 @@ void TopkKernel(const Context& dev_ctx, if (axis == in_dims.size() - 1) { // if get the topK from the last axis const int64_t& input_height = - phi::product(phi::slice_ddim(in_dims, 0, in_dims.size() - 1)); + common::product(common::slice_ddim(in_dims, 0, in_dims.size() - 1)); const int64_t& input_width = in_dims[in_dims.size() - 1]; if (k > input_width) { @@ -264,8 +264,8 @@ void TopkKernel(const Context& dev_ctx, dev_ctx.template Alloc(&trans_ind); dev_ctx.template Alloc(&trans_out); - const int64_t input_height = - phi::product(phi::slice_ddim(trans_dims, 0, trans_dims.size() - 1)); + const int64_t input_height = common::product( + common::slice_ddim(trans_dims, 0, trans_dims.size() - 1)); const int64_t input_width = trans_dims[trans_dims.size() - 1]; if (k > input_width) k = input_width; diff --git a/paddle/phi/kernels/gpu/top_p_sampling_kernel.cu b/paddle/phi/kernels/gpu/top_p_sampling_kernel.cu index 549ecca212c85b..a78040eb6a6697 100644 --- a/paddle/phi/kernels/gpu/top_p_sampling_kernel.cu +++ b/paddle/phi/kernels/gpu/top_p_sampling_kernel.cu @@ -587,20 +587,20 @@ void TopPSamplingKernel(const Context& dev_ctx, int64_t* ids_ptr = dev_ctx.template Alloc(ids); DenseTensor ps_now; - ps_now.Resize(phi::make_ddim({bs, 1})); + ps_now.Resize(common::make_ddim({bs, 1})); dev_ctx.template Alloc(&ps_now); phi::Copy(dev_ctx, ps, dev_ctx.GetPlace(), false, &ps_now); DenseTensor inds_input; - inds_input.Resize(phi::make_ddim({bs, vocab_size})); + inds_input.Resize(common::make_ddim({bs, vocab_size})); dev_ctx.template Alloc(&inds_input); DenseTensor sorted_out; - sorted_out.Resize(phi::make_ddim({bs, vocab_size})); + sorted_out.Resize(common::make_ddim({bs, vocab_size})); dev_ctx.template Alloc(&sorted_out); DenseTensor sorted_id; - sorted_id.Resize(phi::make_ddim({bs, vocab_size})); + sorted_id.Resize(common::make_ddim({bs, vocab_size})); dev_ctx.template Alloc(&sorted_id); int BlockSize = GetBlockSize(vocab_size); @@ -629,10 +629,10 @@ void TopPSamplingKernel(const Context& dev_ctx, setup_kernel<<<1, 256, 0, cu_stream>>>(dev_curand_states, seed, bs); DenseTensor count_iter; - count_iter.Resize(phi::make_ddim({bs + 1})); + count_iter.Resize(common::make_ddim({bs + 1})); dev_ctx.template Alloc(&count_iter); DenseTensor count_iter_begin; - count_iter_begin.Resize(phi::make_ddim({bs})); + count_iter_begin.Resize(common::make_ddim({bs})); dev_ctx.template Alloc(&count_iter_begin); SetCountIter<<<1, 256, 0, cu_stream>>>(count_iter.data(), bs + 1); @@ -684,7 +684,7 @@ void TopPSamplingKernel(const Context& dev_ctx, temp_storage_bytes = div_up(temp_storage_bytes, 256) * 256; int64_t temp_size = temp_storage_bytes; DenseTensor temp_storage; - temp_storage.Resize(phi::make_ddim({temp_size})); + temp_storage.Resize(common::make_ddim({temp_size})); dev_ctx.template Alloc(&temp_storage); cub::DeviceSegmentedRadixSort::SortPairsDescending( diff --git a/paddle/phi/kernels/gpu/triangular_solve_kernel.cu b/paddle/phi/kernels/gpu/triangular_solve_kernel.cu index 889c421eb0bb96..2a943fd0ac6815 100644 --- a/paddle/phi/kernels/gpu/triangular_solve_kernel.cu +++ b/paddle/phi/kernels/gpu/triangular_solve_kernel.cu @@ -14,9 +14,9 @@ #include "paddle/phi/kernels/triangular_solve_kernel.h" +#include "paddle/common/ddim.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/common/memory_utils.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/expand_kernel.h" @@ -47,7 +47,7 @@ void TriangularSolveKernel(const Context& dev_ctx, const T* x_bst_data = x_bst.data(); ExpandKernel(dev_ctx, x, x_bst_dims, &x_bst); - out->Resize(phi::make_ddim(y_bst_dims_vec)); + out->Resize(common::make_ddim(y_bst_dims_vec)); T* out_data = dev_ctx.template Alloc(out); IntArray y_bst_dims(y_bst_dims_vec); ExpandKernel(dev_ctx, y, y_bst_dims, out); diff --git a/paddle/phi/kernels/gpu/uniform_inplace_grad_kernel.cu b/paddle/phi/kernels/gpu/uniform_inplace_grad_kernel.cu index 3b7f8a931278e9..aece91fb3ea46f 100644 --- a/paddle/phi/kernels/gpu/uniform_inplace_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/uniform_inplace_grad_kernel.cu @@ -30,7 +30,7 @@ void UniformInplaceGradKernel(const Context& ctx, int diag_step, float diag_val, DenseTensor* x_grad) { - auto dims = vectorize(x_grad->dims()); + auto dims = common::vectorize(x_grad->dims()); float value = static_cast(0.0f); phi::FullKernel(ctx, dims, value, phi::DataType::UNDEFINED, x_grad); } diff --git a/paddle/phi/kernels/gpu/uniform_kernel.cu b/paddle/phi/kernels/gpu/uniform_kernel.cu index 2a514947bb7177..f148cef4b3d535 100644 --- a/paddle/phi/kernels/gpu/uniform_kernel.cu +++ b/paddle/phi/kernels/gpu/uniform_kernel.cu @@ -61,7 +61,7 @@ void UniformKernel(const Context& dev_ctx, const Scalar& max, int seed, DenseTensor* out) { - out->Resize(phi::make_ddim(shape.GetData())); + out->Resize(common::make_ddim(shape.GetData())); dev_ctx.template Alloc(out); if (seed == 0) { // Use global Generator seed diff --git a/paddle/phi/kernels/gpu/unique_consecutive_functor.h b/paddle/phi/kernels/gpu/unique_consecutive_functor.h index d70813c84aaaee..1ec918a02c43f5 100644 --- a/paddle/phi/kernels/gpu/unique_consecutive_functor.h +++ b/paddle/phi/kernels/gpu/unique_consecutive_functor.h @@ -54,13 +54,13 @@ static void UniqueConsecutiveFlattenedCUDATensor(const Context& context, auto in_data_hat = context.template Alloc(&in_hat); DenseTensor sorted_indices; - sorted_indices.Resize(phi::make_ddim({num_input})); + sorted_indices.Resize(common::make_ddim({num_input})); auto sorted_indices_data = context.template Alloc(&sorted_indices); thrust::sequence( thrust::device, sorted_indices_data, sorted_indices_data + num_input); // 1. Calculate op result: 'out' DenseTensor range; - range.Resize(phi::make_ddim({num_input + 1})); + range.Resize(common::make_ddim({num_input + 1})); auto range_data_ptr = context.template Alloc(&range); thrust::sequence( thrust::device, range_data_ptr, range_data_ptr + num_input + 1); @@ -72,14 +72,14 @@ static void UniqueConsecutiveFlattenedCUDATensor(const Context& context, thrust::device, out_data, out_data + num_input, range_data_ptr, equal) .first - out_data; - out->Resize(phi::make_ddim({num_out})); + out->Resize(common::make_ddim({num_out})); // 2. Calculate inverse index: 'inverse' if (return_inverse) { - inverse->Resize(phi::make_ddim({num_input})); + inverse->Resize(common::make_ddim({num_input})); auto inverse_data = context.template Alloc(inverse); DenseTensor inv_loc; - inv_loc.Resize(phi::make_ddim({num_input})); + inv_loc.Resize(common::make_ddim({num_input})); auto inv_loc_data_ptr = context.template Alloc(&inv_loc); thrust::adjacent_difference(thrust::device, in_data_hat, @@ -100,7 +100,7 @@ static void UniqueConsecutiveFlattenedCUDATensor(const Context& context, } // 3. Calculate 'counts' if (return_counts) { - counts->Resize(phi::make_ddim({num_out})); + counts->Resize(common::make_ddim({num_out})); auto count_data = context.template Alloc(counts); // init 'count_data' as 0 thrust::fill(thrust::device, count_data, count_data + num_out, 0); @@ -174,10 +174,10 @@ static void ComputeUniqueConsecutiveDims(const Context& context, DenseTensor* inverse, DenseTensor* counts) { // 1. inverse indices: 'inverse' - inverse->Resize(phi::make_ddim({row})); + inverse->Resize(common::make_ddim({row})); auto inverse_data = context.template Alloc(inverse); DenseTensor inv_loc; - inv_loc.Resize(phi::make_ddim({row})); + inv_loc.Resize(common::make_ddim({row})); auto inv_loc_data_ptr = context.template Alloc(&inv_loc); thrust::adjacent_difference(thrust::device, sorted_indices_data, @@ -198,7 +198,7 @@ static void ComputeUniqueConsecutiveDims(const Context& context, // 2. sorted indices DenseTensor range; - range.Resize(phi::make_ddim({row + 1})); + range.Resize(common::make_ddim({row + 1})); auto range_data_ptr = context.template Alloc(&range); thrust::sequence(thrust::device, range_data_ptr, range_data_ptr + row + 1); int num_out; @@ -211,10 +211,10 @@ static void ComputeUniqueConsecutiveDims(const Context& context, sorted_indices_data; thrust::device_ptr range_data_ptr_dev(range_data_ptr); range_data_ptr_dev[num_out] = row; - sorted_indices->Resize(phi::make_ddim({num_out})); + sorted_indices->Resize(common::make_ddim({num_out})); // 3. counts: 'counts' - counts->Resize(phi::make_ddim({num_out})); + counts->Resize(common::make_ddim({num_out})); auto count_data = context.template Alloc(counts); thrust::fill(thrust::device, count_data, count_data + row, 0); thrust::adjacent_difference( @@ -349,11 +349,11 @@ static void UniqueConsecutiveDimsCUDATensor(const Context& context, std::iota(permute.begin(), permute.end(), 0); permute[axis] = 0; permute[0] = axis; - std::vector in_trans_dims_vec(phi::vectorize(in.dims())); + std::vector in_trans_dims_vec(common::vectorize(in.dims())); in_trans_dims_vec[axis] = in.dims()[0]; in_trans_dims_vec[0] = in.dims()[axis]; DenseTensor in_trans; - DDim in_trans_dims = phi::make_ddim(in_trans_dims_vec); + DDim in_trans_dims = common::make_ddim(in_trans_dims_vec); in_trans.Resize(in_trans_dims); context.template Alloc(&in_trans); phi::funcs::TransCompute(in.dims().size(), // num of dims @@ -363,7 +363,7 @@ static void UniqueConsecutiveDimsCUDATensor(const Context& context, permute); // index of axis // Reshape tensor: eg. [dim1, dim0, dim2] -> [dim1, dim0*dim2] - DDim in_trans_flat_dims = phi::flatten_to_2d(in_trans_dims, 1); + DDim in_trans_flat_dims = common::flatten_to_2d(in_trans_dims, 1); in_trans.Resize(in_trans_flat_dims); // now 'in_trans' is 2D @@ -372,7 +372,7 @@ static void UniqueConsecutiveDimsCUDATensor(const Context& context, const InT* in_trans_data = in_trans.data(); DenseTensor sorted_indices; - sorted_indices.Resize(phi::make_ddim({row})); + sorted_indices.Resize(common::make_ddim({row})); auto sorted_indices_data = context.template Alloc(&sorted_indices); // 2. Calculate 'inverse', 'counts' @@ -396,14 +396,14 @@ static void UniqueConsecutiveDimsCUDATensor(const Context& context, DenseTensor out_trans; std::vector out_trans_dims_vec = in_trans_dims_vec; out_trans_dims_vec[0] = sorted_indices.numel(); - out_trans.Resize(phi::make_ddim(out_trans_dims_vec)); + out_trans.Resize(common::make_ddim(out_trans_dims_vec)); context.template Alloc(&out_trans); IndexSelect( context, in_trans, sorted_indices, &out_trans, 0); std::swap(out_trans_dims_vec[0], out_trans_dims_vec[axis]); - out->Resize(phi::make_ddim(out_trans_dims_vec)); + out->Resize(common::make_ddim(out_trans_dims_vec)); context.template Alloc(out); std::vector out_trans_unbind = phi::funcs::Unbind(out_trans); phi::funcs::ConcatFunctor concat_functor; diff --git a/paddle/phi/kernels/gpu/unique_consecutive_kernel.cu b/paddle/phi/kernels/gpu/unique_consecutive_kernel.cu index 9c32bff0ccb809..207593065b7a91 100644 --- a/paddle/phi/kernels/gpu/unique_consecutive_kernel.cu +++ b/paddle/phi/kernels/gpu/unique_consecutive_kernel.cu @@ -17,8 +17,8 @@ #include "paddle/phi/kernels/unique_consecutive_kernel.h" #include "paddle/phi/kernels/gpu/unique_consecutive_functor.h" +#include "paddle/common/errors.h" #include "paddle/phi/backends/gpu/gpu_context.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/core/kernel_registry.h" namespace phi { diff --git a/paddle/phi/kernels/gpu/unique_kernel.cu b/paddle/phi/kernels/gpu/unique_kernel.cu index 5d4399e42e1abb..682528b1d80c64 100644 --- a/paddle/phi/kernels/gpu/unique_kernel.cu +++ b/paddle/phi/kernels/gpu/unique_kernel.cu @@ -121,7 +121,7 @@ UniqueFlattendCUDATensor(const Context& context, phi::Copy(context, in, context.GetPlace(), false, &in_hat); auto* in_data_hat = context.template Alloc(&in_hat); - indices->Resize(phi::make_ddim({num_input})); + indices->Resize(common::make_ddim({num_input})); auto* indices_data = context.template Alloc(indices); thrust::sequence(thrust::device, indices_data, indices_data + num_input); @@ -130,7 +130,7 @@ UniqueFlattendCUDATensor(const Context& context, // 1. Calculate op result: 'out' DenseTensor range; - range.Resize(phi::make_ddim({num_input + 1})); + range.Resize(common::make_ddim({num_input + 1})); auto* range_data_ptr = context.template Alloc(&range); thrust::sequence( thrust::device, range_data_ptr, range_data_ptr + num_input + 1); @@ -142,14 +142,14 @@ UniqueFlattendCUDATensor(const Context& context, thrust::device, out_data, out_data + num_input, range_data_ptr, equal) .first - out_data; - out->Resize(phi::make_ddim({num_out})); + out->Resize(common::make_ddim({num_out})); // 3. Calculate inverse index: 'inverse' if (return_inverse) { - index->Resize(phi::make_ddim({num_input})); + index->Resize(common::make_ddim({num_input})); auto* inverse_data = context.template Alloc(index); DenseTensor inv_loc; - inv_loc.Resize(phi::make_ddim({num_input})); + inv_loc.Resize(common::make_ddim({num_input})); auto inv_loc_data_ptr = context.template Alloc(&inv_loc); thrust::adjacent_difference(thrust::device, in_data_hat, @@ -172,7 +172,7 @@ UniqueFlattendCUDATensor(const Context& context, // 2. Calculate sorted index: 'indices' if (return_index) { DenseTensor tmp_indices; - tmp_indices.Resize(phi::make_ddim({num_input})); + tmp_indices.Resize(common::make_ddim({num_input})); auto* tmp_indices_data_ptr = context.template Alloc(&tmp_indices); thrust::copy(thrust::device, in_data_hat, @@ -183,12 +183,12 @@ UniqueFlattendCUDATensor(const Context& context, tmp_indices_data_ptr + num_input, indices_data, equal); - indices->Resize(phi::make_ddim({num_out})); + indices->Resize(common::make_ddim({num_out})); } // 4. Calculate 'counts' if (return_counts) { - counts->Resize(phi::make_ddim({num_out})); + counts->Resize(common::make_ddim({num_out})); auto count_data = context.template Alloc(counts); // init 'count_data' as 0 thrust::fill(thrust::device, count_data, count_data + num_out, 0); @@ -219,12 +219,12 @@ UniqueFlattendCUDATensor(const Context& context, // 1. Sort indices DenseTensor in_resize; in_resize.ShareDataWith(in); - in_resize.Resize(phi::make_ddim({num_input})); + in_resize.Resize(common::make_ddim({num_input})); const InT* in_data = in_resize.data(); auto equal = BinaryEqual(1, in_data); auto not_equal = BinaryNotEqual(1, in_data); - indices->Resize(phi::make_ddim({num_input})); + indices->Resize(common::make_ddim({num_input})); auto* indices_data = context.template Alloc(indices); thrust::sequence(thrust::device, indices_data, indices_data + num_input); @@ -235,10 +235,10 @@ UniqueFlattendCUDATensor(const Context& context, // 2. Calculate inverse indices: 'index' if (return_inverse) { - index->Resize(phi::make_ddim({num_input})); + index->Resize(common::make_ddim({num_input})); auto* inverse_data = context.template Alloc(index); DenseTensor inv_loc; - inv_loc.Resize(phi::make_ddim({num_input})); + inv_loc.Resize(common::make_ddim({num_input})); auto inv_loc_data_ptr = context.template Alloc(&inv_loc); thrust::adjacent_difference(thrust::device, indices_data, @@ -260,7 +260,7 @@ UniqueFlattendCUDATensor(const Context& context, // 3. Calculate op result and sorted index: 'out' & 'indices' DenseTensor range; - range.Resize(phi::make_ddim({num_input + 1})); + range.Resize(common::make_ddim({num_input + 1})); auto* range_data_ptr = context.template Alloc(&range); thrust::sequence( thrust::device, range_data_ptr, range_data_ptr + num_input + 1); @@ -272,14 +272,14 @@ UniqueFlattendCUDATensor(const Context& context, equal) .first - indices_data; - indices->Resize(phi::make_ddim({num_out})); - out->Resize(phi::make_ddim({num_out})); + indices->Resize(common::make_ddim({num_out})); + out->Resize(common::make_ddim({num_out})); context.template Alloc(out); phi::IndexSelectKernel(context, in_resize, *indices, 0, out); // 4. Calculate 'counts' if (return_counts) { - counts->Resize(phi::make_ddim({num_out})); + counts->Resize(common::make_ddim({num_out})); auto count_data = context.template Alloc(counts); // init 'count_data' as 0 thrust::fill(thrust::device, count_data, count_data + num_out, 0); @@ -312,10 +312,10 @@ static void ComputeUniqueDims(const Context& context, not_equal_T not_equal, int64_t row) { // 1. inverse indices: 'inverse' - inverse->Resize(phi::make_ddim({row})); + inverse->Resize(common::make_ddim({row})); auto* inverse_data = context.template Alloc(inverse); DenseTensor inv_loc; - inv_loc.Resize(phi::make_ddim({row})); + inv_loc.Resize(common::make_ddim({row})); auto inv_loc_data_ptr = context.template Alloc(&inv_loc); thrust::adjacent_difference(thrust::device, sorted_indices_data, @@ -336,7 +336,7 @@ static void ComputeUniqueDims(const Context& context, // 2. sorted indices DenseTensor range; - range.Resize(phi::make_ddim({row + 1})); + range.Resize(common::make_ddim({row + 1})); auto range_data_ptr = context.template Alloc(&range); thrust::sequence(thrust::device, range_data_ptr, range_data_ptr + row + 1); int num_out; @@ -349,10 +349,10 @@ static void ComputeUniqueDims(const Context& context, sorted_indices_data; thrust::device_ptr range_data_ptr_dev(range_data_ptr); range_data_ptr_dev[num_out] = row; - sorted_indices->Resize(phi::make_ddim({num_out})); + sorted_indices->Resize(common::make_ddim({num_out})); // 3. counts: 'counts' - counts->Resize(phi::make_ddim({num_out})); + counts->Resize(common::make_ddim({num_out})); auto* count_data = context.template Alloc(counts); thrust::fill(thrust::device, count_data, count_data + num_out, 0); thrust::adjacent_difference(thrust::device, @@ -376,8 +376,8 @@ static void UniqueDimsCUDATensor(const Context& context, // 1. Transpose & reshape // Transpose tensor: eg. axis=1, [dim0, dim1, dim2] -> [dim1, dim0, dim2] DenseTensor in_trans; - std::vector in_trans_dims_vec(phi::vectorize(in.dims())); - auto in_trans_dims = phi::make_ddim(in_trans_dims_vec); + std::vector in_trans_dims_vec(common::vectorize(in.dims())); + auto in_trans_dims = common::make_ddim(in_trans_dims_vec); std::vector permute(in.dims().size()); bool is_transpose = axis != 0; if (is_transpose) { @@ -386,7 +386,7 @@ static void UniqueDimsCUDATensor(const Context& context, permute[0] = axis; in_trans_dims_vec[axis] = in.dims()[0]; in_trans_dims_vec[0] = in.dims()[axis]; - in_trans_dims = phi::make_ddim(in_trans_dims_vec); + in_trans_dims = common::make_ddim(in_trans_dims_vec); in_trans.Resize(in_trans_dims); context.template Alloc(&in_trans); phi::funcs::TransCompute( @@ -399,7 +399,7 @@ static void UniqueDimsCUDATensor(const Context& context, in_trans.ShareDataWith(in); } // Reshape tensor: eg. [dim1, dim0, dim2] -> [dim1, dim0*dim2] - auto in_trans_flat_dims = phi::flatten_to_2d(in_trans_dims, 1); + auto in_trans_flat_dims = common::flatten_to_2d(in_trans_dims, 1); in_trans.Resize(in_trans_flat_dims); // now 'in_trans' is 2D @@ -407,7 +407,7 @@ static void UniqueDimsCUDATensor(const Context& context, int64_t row = in_trans.dims()[0]; const InT* in_trans_data = in_trans.data(); - indices->Resize(phi::make_ddim({row})); + indices->Resize(common::make_ddim({row})); auto* sorted_indices_data = context.template Alloc(indices); // 2. Calculate 'indices', 'inverse', 'counts' @@ -437,19 +437,19 @@ static void UniqueDimsCUDATensor(const Context& context, out_trans_dims_vec[0] = indices->numel(); if (is_transpose) { DenseTensor out_trans; - out_trans.Resize(phi::make_ddim(out_trans_dims_vec)); + out_trans.Resize(common::make_ddim(out_trans_dims_vec)); context.template Alloc(&out_trans); phi::IndexSelectKernel( context, in_trans, *indices, 0, &out_trans); std::swap(out_trans_dims_vec[0], out_trans_dims_vec[axis]); - out->Resize(phi::make_ddim(out_trans_dims_vec)); + out->Resize(common::make_ddim(out_trans_dims_vec)); context.template Alloc(out); phi::funcs::TransCompute( out_trans.dims().size(), context, out_trans, out, permute); } else { - out->Resize(phi::make_ddim(out_trans_dims_vec)); + out->Resize(common::make_ddim(out_trans_dims_vec)); context.template Alloc(out); phi::IndexSelectKernel(context, in_trans, *indices, 0, out); diff --git a/paddle/phi/kernels/gpu/viterbi_decode_kernel.cu b/paddle/phi/kernels/gpu/viterbi_decode_kernel.cu index b69c4a691d0e33..81cc8ee78a947d 100644 --- a/paddle/phi/kernels/gpu/viterbi_decode_kernel.cu +++ b/paddle/phi/kernels/gpu/viterbi_decode_kernel.cu @@ -188,7 +188,7 @@ struct GetMaxValue { const DenseTensor& input, T* max_value) { DenseTensor out_data; - out_data.Resize(phi::make_ddim({1})); + out_data.Resize(common::make_ddim({1})); dev_ctx.template Alloc(&out_data); switch (ComputeBlockSize(input.numel())) { FIXED_BLOCK_DIM_CASE( diff --git a/paddle/phi/kernels/gpu/yolo_box_kernel.cu b/paddle/phi/kernels/gpu/yolo_box_kernel.cu index e948667624d6ca..8616b8bb429556 100644 --- a/paddle/phi/kernels/gpu/yolo_box_kernel.cu +++ b/paddle/phi/kernels/gpu/yolo_box_kernel.cu @@ -129,7 +129,8 @@ void YoloBoxKernel(const Context& dev_ctx, int bytes = sizeof(int) * anchors.size(); DenseTensor tmp_anchors; - tmp_anchors.Resize(phi::make_dim(anchors.size())); + using common::make_dim; + tmp_anchors.Resize(make_dim(anchors.size())); int* anchors_data = dev_ctx.template Alloc(&tmp_anchors); const auto gplace = dev_ctx.GetPlace(); const auto cplace = phi::CPUPlace(); diff --git a/paddle/phi/kernels/gpudnn/affine_grid_kernel.cu b/paddle/phi/kernels/gpudnn/affine_grid_kernel.cu index 060f8c86710b58..bde4faefc5de3f 100644 --- a/paddle/phi/kernels/gpudnn/affine_grid_kernel.cu +++ b/paddle/phi/kernels/gpudnn/affine_grid_kernel.cu @@ -49,7 +49,7 @@ void AffineGridCudnnKernel(const Context& dev_ctx, h_size_data[1] = size_attr[1]; h_size_data[2] = size_attr[2]; h_size_data[3] = size_attr[3]; - output->Resize(phi::make_ddim({n, h_size_data[2], h_size_data[3], 2})); + output->Resize(common::make_ddim({n, h_size_data[2], h_size_data[3], 2})); T* output_data = dev_ctx.template Alloc(output); ScopedSpatialTransformerDescriptor st_desc; cudnnSpatialTransformerDescriptor_t cudnn_st_desc = diff --git a/paddle/phi/kernels/gpudnn/conv_cudnn_frontend.h b/paddle/phi/kernels/gpudnn/conv_cudnn_frontend.h index d0bdcc10beaa83..1b8fe788c30c21 100644 --- a/paddle/phi/kernels/gpudnn/conv_cudnn_frontend.h +++ b/paddle/phi/kernels/gpudnn/conv_cudnn_frontend.h @@ -87,7 +87,7 @@ class CudnnFrontendConvHelper { const phi::DenseTensor* tensor, int64_t id, cudnnTensorFormat_t layout_format) { - auto transformed_dims = phi::vectorize(tensor->dims()); + auto transformed_dims = common::vectorize(tensor->dims()); if (layout_format == CUDNN_TENSOR_NHWC) { transformed_dims = phi::backends::gpu::TransformDimOrder(transformed_dims); diff --git a/paddle/phi/kernels/gpudnn/conv_gpudnn_base.h b/paddle/phi/kernels/gpudnn/conv_gpudnn_base.h index 186bbd75fae62c..1b6ad4fdaa93c3 100644 --- a/paddle/phi/kernels/gpudnn/conv_gpudnn_base.h +++ b/paddle/phi/kernels/gpudnn/conv_gpudnn_base.h @@ -146,8 +146,8 @@ struct ConvArgsBase { template phi::autotune::ConvCacheKey ConvertToConvCacheKey() const { - auto x_shape = phi::vectorize(x->dims()); - auto w_shape = phi::vectorize(w->dims()); + auto x_shape = common::vectorize(x->dims()); + auto w_shape = common::vectorize(w->dims()); VLOG(10) << "[ConvArgs] x_dims=" << x_shape << ", w_dims=" << w_shape << ", strides=" << s << ", paddings=" << p << ", dilations=" << d << ", data=" << phi::CppTypeToDataType::Type() diff --git a/paddle/phi/kernels/gpudnn/conv_grad_kernel.cu b/paddle/phi/kernels/gpudnn/conv_grad_kernel.cu index 2c6e898fa25c85..77b636bbb4ba1c 100644 --- a/paddle/phi/kernels/gpudnn/conv_grad_kernel.cu +++ b/paddle/phi/kernels/gpudnn/conv_grad_kernel.cu @@ -536,7 +536,7 @@ void ConvCudnnGradKernel(const Context& ctx, in_data_dims = slice_ddim(in_dims, 1, in_dims.size() - 1); filter_data_dims = slice_ddim(filter_dims, 1, filter_dims.size() - 1); } - std::vector ksize = vectorize(filter_data_dims); + std::vector ksize = common::vectorize(filter_data_dims); UpdatePaddingAndDilation( &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize); @@ -579,7 +579,7 @@ void ConvCudnnGradKernel(const Context& ctx, input_pad[2 * i + 2 + 1] = paddings[2 * i + 1] - padding_common[i]; } } - DDim new_input_shape(make_ddim(new_input_shape_vec)); + DDim new_input_shape(common::make_ddim(new_input_shape_vec)); transformed_input.Resize(new_input_shape); ctx.template Alloc(&transformed_input); @@ -906,7 +906,7 @@ void ConvCudnnGradGradKernel( auto filter_dims = W->dims(); DDim in_data_dims = slice_ddim(in_dims, 2, in_dims.size()); DDim filter_data_dims = slice_ddim(filter_dims, 2, filter_dims.size()); - std::vector ksize = vectorize(filter_data_dims); + std::vector ksize = common::vectorize(filter_data_dims); UpdatePaddingAndDilation( &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize); @@ -935,7 +935,7 @@ void ConvCudnnGradGradKernel( input_pad[2 * i + 4] = paddings[2 * i] - padding_common[i]; input_pad[2 * i + 4 + 1] = paddings[2 * i + 1] - padding_common[i]; } - DDim new_input_shape(make_ddim(new_input_shape_vec)); + DDim new_input_shape(common::make_ddim(new_input_shape_vec)); transformed_X.Resize(new_input_shape); transformed_ddX.Resize(new_input_shape); transformed_dX.Resize(new_input_shape); diff --git a/paddle/phi/kernels/gpudnn/conv_kernel.cu b/paddle/phi/kernels/gpudnn/conv_kernel.cu index 65418673827cd5..36d0bad6b103f7 100644 --- a/paddle/phi/kernels/gpudnn/conv_kernel.cu +++ b/paddle/phi/kernels/gpudnn/conv_kernel.cu @@ -395,7 +395,7 @@ void ConvCudnnKernel(const Context& ctx, filter_data_dims = slice_ddim(filter_dims, 1, filter_dims.size() - 1); } - std::vector ksize = vectorize(filter_data_dims); + std::vector ksize = common::vectorize(filter_data_dims); UpdatePaddingAndDilation( &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize); @@ -435,7 +435,7 @@ void ConvCudnnKernel(const Context& ctx, input_pad[2 * i + 2 + 1] = paddings[2 * i + 1] - padding_common[i]; } } - DDim new_input_shape(make_ddim(new_input_shape_vec)); + DDim new_input_shape(common::make_ddim(new_input_shape_vec)); transformed_input.Resize(new_input_shape); ctx.template Alloc(&transformed_input); diff --git a/paddle/phi/kernels/gpudnn/conv_transpose_grad_kernel.cu b/paddle/phi/kernels/gpudnn/conv_transpose_grad_kernel.cu index 50bae0a8bca3e2..07ab10e8f5a542 100644 --- a/paddle/phi/kernels/gpudnn/conv_transpose_grad_kernel.cu +++ b/paddle/phi/kernels/gpudnn/conv_transpose_grad_kernel.cu @@ -16,11 +16,11 @@ limitations under the License. */ #include +#include "paddle/common/ddim.h" #include "paddle/phi/backends/context_pool.h" #include "paddle/phi/backends/dynload/cudnn.h" #include "paddle/phi/common/bfloat16.h" #include "paddle/phi/common/float16.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/cpu/conv_util.h" #include "paddle/phi/kernels/funcs/batch_norm_utils.h" @@ -65,8 +65,8 @@ void ConvTransposeGradRawGPUDNNKernel(const Context& ctx, // if channel_last, transpose to channel_first DenseTensor x_transpose; DenseTensor dout_transpose; - std::vector x_vec = vectorize(x.dims()); - std::vector out_vec = vectorize(dout.dims()); + std::vector x_vec = common::vectorize(x.dims()); + std::vector out_vec = common::vectorize(dout.dims()); if (data_layout == GPUDNNDataLayout::kNHWC) { if (strides.size() == 2U) { std::vector axis = {0, 3, 1, 2}; @@ -96,7 +96,7 @@ void ConvTransposeGradRawGPUDNNKernel(const Context& ctx, DDim x_data_dims; x_data_dims = slice_ddim(x_dims, 2, x_dims.size()); DDim filter_data_dims = slice_ddim(filter_dims, 2, filter_dims.size()); - std::vector ksize = vectorize(filter_data_dims); + std::vector ksize = common::vectorize(filter_data_dims); UpdatePaddingAndDilation( &paddings_, &dilations_, padding_algorithm, x_data_dims, strides, ksize); @@ -121,7 +121,7 @@ void ConvTransposeGradRawGPUDNNKernel(const Context& ctx, x_pad[2 * i + 4 + 1] = paddings_[2 * i + 1] - padding_common[i]; } - transformed_dout.Resize(make_ddim(new_dout_shape_vec)); + transformed_dout.Resize(common::make_ddim(new_dout_shape_vec)); ctx.template Alloc(&transformed_dout); const int rank = x_transpose.dims().size(); @@ -154,7 +154,7 @@ void ConvTransposeGradRawGPUDNNKernel(const Context& ctx, const T* x_data = x_transpose.data(); const T* dout_data = transformed_dout.data(); - out_vec = vectorize(transformed_dout.dims()); + out_vec = common::vectorize(transformed_dout.dims()); // ------------------- cudnn descriptors --------------------- GPUDNNDataLayout layout; @@ -312,7 +312,7 @@ void ConvTransposeGradRawGPUDNNKernel(const Context& ctx, DenseTensor dx_transpose; DenseTensor dx_nchw; dx_nchw.ShareDataWith(*dx); - dx_nchw.Resize(make_ddim(x_vec)); + dx_nchw.Resize(common::make_ddim(x_vec)); if (strides.size() == 2U) { std::vector axis = {0, 2, 3, 1}; dx_transpose = Transpose(ctx, dx_nchw, axis); @@ -483,13 +483,14 @@ void Conv2dTransposeDoubleGradGPUDNNKernel( transformed_dx_channel = *dx; } } - std::vector out_vec = vectorize(transformed_dout_channel.dims()); + std::vector out_vec = + common::vectorize(transformed_dout_channel.dims()); auto x_dims = transformed_x_channel.dims(); auto filter_dims = filter.dims(); DDim x_data_dims = slice_ddim(x_dims, 2, x_dims.size()); DDim filter_data_dims = slice_ddim(filter_dims, 2, filter_dims.size()); - std::vector ksize = vectorize(filter_data_dims); + std::vector ksize = common::vectorize(filter_data_dims); UpdatePaddingAndDilation( &paddings_, &dilations_, padding_algorithm, x_data_dims, strides, ksize); @@ -527,10 +528,10 @@ void Conv2dTransposeDoubleGradGPUDNNKernel( input_pad[2 * i + 4] = paddings_[2 * i] - padding_common[i]; input_pad[2 * i + 4 + 1] = paddings_[2 * i + 1] - padding_common[i]; } - DDim new_input_shape(make_ddim(new_input_shape_vec)); + DDim new_input_shape(common::make_ddim(new_input_shape_vec)); transformed_x.Resize(new_input_shape); transformed_ddx.Resize(new_input_shape); - transformed_dout.Resize(make_ddim(new_output_grad_shape_vec)); + transformed_dout.Resize(common::make_ddim(new_output_grad_shape_vec)); ctx.template Alloc(&transformed_x); ctx.template Alloc(&transformed_ddx); @@ -601,12 +602,12 @@ void Conv2dTransposeDoubleGradGPUDNNKernel( } if (!is_sys_pad) { - transformed_ddout_channel.Resize(make_ddim(transformed_out_vec)); + transformed_ddout_channel.Resize(common::make_ddim(transformed_out_vec)); ctx.template Alloc(&transformed_ddout_channel); } else { ctx.template Alloc(ddout); transformed_ddout_channel = *ddout; - transformed_ddout_channel.Resize(make_ddim(transformed_out_vec)); + transformed_ddout_channel.Resize(common::make_ddim(transformed_out_vec)); } const T* x_ = transformed_x.data(); diff --git a/paddle/phi/kernels/gpudnn/conv_transpose_kernel.cu b/paddle/phi/kernels/gpudnn/conv_transpose_kernel.cu index df360ab388a6d7..fe46ea978f14b3 100644 --- a/paddle/phi/kernels/gpudnn/conv_transpose_kernel.cu +++ b/paddle/phi/kernels/gpudnn/conv_transpose_kernel.cu @@ -16,11 +16,11 @@ limitations under the License. */ #include +#include "paddle/common/ddim.h" #include "paddle/phi/backends/context_pool.h" #include "paddle/phi/backends/dynload/cudnn.h" #include "paddle/phi/common/bfloat16.h" #include "paddle/phi/common/float16.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/cpu/conv_util.h" #include "paddle/phi/kernels/funcs/padding.h" @@ -57,8 +57,8 @@ void ConvTransposeRawGPUDNNKernel(const Context& ctx, const GPUDNNDataLayout data_layout = (data_format != "NHWC" ? GPUDNNDataLayout::kNCHW : GPUDNNDataLayout::kNHWC); - std::vector x_vec = vectorize(x.dims()); - std::vector out_vec = vectorize(out->dims()); + std::vector x_vec = common::vectorize(x.dims()); + std::vector out_vec = common::vectorize(out->dims()); // if channel_last, transpose to channel_first DenseTensor x_transpose; if (data_layout == GPUDNNDataLayout::kNHWC) { @@ -87,7 +87,7 @@ void ConvTransposeRawGPUDNNKernel(const Context& ctx, DDim x_data_dims; x_data_dims = slice_ddim(x_dims, 2, x_dims.size()); DDim filter_data_dims = slice_ddim(filter_dims, 2, filter_dims.size()); - std::vector ksize = vectorize(filter_data_dims); + std::vector ksize = common::vectorize(filter_data_dims); UpdatePaddingAndDilation( &paddings_, &dilations_, padding_algorithm, x_data_dims, strides, ksize); @@ -110,7 +110,7 @@ void ConvTransposeRawGPUDNNKernel(const Context& ctx, x_pad[2 * i + 4] = paddings_[2 * i] - padding_common[i]; x_pad[2 * i + 4 + 1] = paddings_[2 * i + 1] - padding_common[i]; } - DDim new_x_shape(make_ddim(new_x_shape_vec)); + DDim new_x_shape(common::make_ddim(new_x_shape_vec)); transformed_x.Resize(new_x_shape); ctx.template Alloc(&transformed_x); @@ -152,7 +152,7 @@ void ConvTransposeRawGPUDNNKernel(const Context& ctx, } const T* x_data = transformed_x.data(); - x_vec = vectorize(transformed_x.dims()); + x_vec = common::vectorize(transformed_x.dims()); std::vector transformed_out_vec = out_vec; for (size_t i = 0; i < data_dim; ++i) { @@ -163,12 +163,12 @@ void ConvTransposeRawGPUDNNKernel(const Context& ctx, DenseTensor transformed_out; if (!is_sys_pad) { - transformed_out.Resize(make_ddim(transformed_out_vec)); + transformed_out.Resize(common::make_ddim(transformed_out_vec)); ctx.template Alloc(&transformed_out); } else { ctx.template Alloc(out); transformed_out.ShareDataWith(*out); - transformed_out.Resize(make_ddim(transformed_out_vec)); + transformed_out.Resize(common::make_ddim(transformed_out_vec)); } T* transformed_out_data = transformed_out.data(); @@ -288,7 +288,7 @@ void ConvTransposeRawGPUDNNKernel(const Context& ctx, DenseTensor out_transpose; DenseTensor out_nchw; out_nchw.ShareDataWith(*out); - out_nchw.Resize(make_ddim(out_vec)); + out_nchw.Resize(common::make_ddim(out_vec)); if (strides.size() == 2U) { out_transpose = Transpose(ctx, out_nchw, {0, 2, 3, 1}); diff --git a/paddle/phi/kernels/gpudnn/pool_grad_kernel.cu b/paddle/phi/kernels/gpudnn/pool_grad_kernel.cu index 1161040f2163f1..24e79c77a50e1b 100644 --- a/paddle/phi/kernels/gpudnn/pool_grad_kernel.cu +++ b/paddle/phi/kernels/gpudnn/pool_grad_kernel.cu @@ -119,12 +119,12 @@ void PoolGradRawGPUDNNKernel(const Context& ctx, // input transformed_input.Resize(input->dims()); - auto in_dims_vec = vectorize(input->dims()); + auto in_dims_vec = common::vectorize(input->dims()); in_dims_vec[1] = input->dims()[4]; in_dims_vec[2] = input->dims()[1]; in_dims_vec[3] = input->dims()[2]; in_dims_vec[4] = input->dims()[3]; - transformed_input.Resize(make_ddim(in_dims_vec)); + transformed_input.Resize(common::make_ddim(in_dims_vec)); ctx.Alloc(&transformed_input, input->type()); funcs::Transpose trans5; @@ -132,12 +132,12 @@ void PoolGradRawGPUDNNKernel(const Context& ctx, // output transformed_output.Resize(output->dims()); - auto out_dims_vec = vectorize(output->dims()); + auto out_dims_vec = common::vectorize(output->dims()); out_dims_vec[1] = output->dims()[4]; out_dims_vec[2] = output->dims()[1]; out_dims_vec[3] = output->dims()[2]; out_dims_vec[4] = output->dims()[3]; - transformed_output.Resize(make_ddim(out_dims_vec)); + transformed_output.Resize(common::make_ddim(out_dims_vec)); ctx.Alloc(&transformed_output, output->type()); @@ -145,14 +145,14 @@ void PoolGradRawGPUDNNKernel(const Context& ctx, trans5_v2(ctx, *output, &transformed_output, axis); // output grad - transformed_output_grad.Resize(make_ddim(out_dims_vec)); + transformed_output_grad.Resize(common::make_ddim(out_dims_vec)); ctx.Alloc(&transformed_output_grad, output_grad->type()); funcs::Transpose trans5_v3; trans5_v3(ctx, *output_grad, &transformed_output_grad, axis); // input grad - transformed_input_grad.Resize(make_ddim(in_dims_vec)); + transformed_input_grad.Resize(common::make_ddim(in_dims_vec)); #ifdef PADDLE_WITH_HIP // MIOPEN not support NHWC data layout @@ -163,11 +163,11 @@ void PoolGradRawGPUDNNKernel(const Context& ctx, // input transformed_input.Resize(input->dims()); - auto in_dims_vec = vectorize(input->dims()); + auto in_dims_vec = common::vectorize(input->dims()); in_dims_vec[1] = input->dims()[3]; in_dims_vec[2] = input->dims()[1]; in_dims_vec[3] = input->dims()[2]; - transformed_input.Resize(make_ddim(in_dims_vec)); + transformed_input.Resize(common::make_ddim(in_dims_vec)); ctx.Alloc(&transformed_input, input->type()); funcs::Transpose trans4; @@ -175,25 +175,25 @@ void PoolGradRawGPUDNNKernel(const Context& ctx, // output transformed_output.Resize(output->dims()); - auto out_dims_vec = vectorize(output->dims()); + auto out_dims_vec = common::vectorize(output->dims()); out_dims_vec[1] = output->dims()[3]; out_dims_vec[2] = output->dims()[1]; out_dims_vec[3] = output->dims()[2]; - transformed_output.Resize(make_ddim(out_dims_vec)); + transformed_output.Resize(common::make_ddim(out_dims_vec)); ctx.Alloc(&transformed_output, output->type()); funcs::Transpose trans4_v2; trans4_v2(ctx, *output, &transformed_output, axis); // output grad - transformed_output_grad.Resize(make_ddim(out_dims_vec)); + transformed_output_grad.Resize(common::make_ddim(out_dims_vec)); ctx.Alloc(&transformed_output_grad, output_grad->type()); funcs::Transpose trans4_v3; trans4_v3(ctx, *output_grad, &transformed_output_grad, axis); // input grad - transformed_input_grad.Resize(make_ddim(in_dims_vec)); + transformed_input_grad.Resize(common::make_ddim(in_dims_vec)); #endif } else { layout = GetLayoutFromStr(data_format); @@ -214,14 +214,14 @@ void PoolGradRawGPUDNNKernel(const Context& ctx, #ifdef PADDLE_WITH_HIP miopenTensorDescriptor_t cudnn_input_desc = input_desc.descriptor( - layout, vectorize(transformed_input.dims())); + layout, common::vectorize(transformed_input.dims())); miopenTensorDescriptor_t cudnn_output_desc = output_desc.descriptor( - layout, vectorize(transformed_output.dims())); + layout, common::vectorize(transformed_output.dims())); #else cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor( - layout, vectorize(transformed_input.dims())); + layout, common::vectorize(transformed_input.dims())); cudnnTensorDescriptor_t cudnn_output_desc = output_desc.descriptor( - layout, vectorize(transformed_output.dims())); + layout, common::vectorize(transformed_output.dims())); #endif PoolingMode pooling_mode; if (pooling_type == "max") { diff --git a/paddle/phi/kernels/gpudnn/pool_kernel.cu b/paddle/phi/kernels/gpudnn/pool_kernel.cu index b1a79dd8740680..5bd1e2d6a12c1c 100644 --- a/paddle/phi/kernels/gpudnn/pool_kernel.cu +++ b/paddle/phi/kernels/gpudnn/pool_kernel.cu @@ -91,12 +91,12 @@ void PoolRawGPUDNNKernel(const Context& ctx, // input transformed_input.Resize(input->dims()); - auto in_dims_vec = vectorize(input->dims()); + auto in_dims_vec = common::vectorize(input->dims()); in_dims_vec[1] = input->dims()[4]; in_dims_vec[2] = input->dims()[1]; in_dims_vec[3] = input->dims()[2]; in_dims_vec[4] = input->dims()[3]; - transformed_input.Resize(make_ddim(in_dims_vec)); + transformed_input.Resize(common::make_ddim(in_dims_vec)); ctx.Alloc(&transformed_input, input->type()); funcs::Transpose trans5; @@ -105,12 +105,12 @@ void PoolRawGPUDNNKernel(const Context& ctx, // output transformed_output.Resize(output->dims()); - auto out_dims_vec = vectorize(output->dims()); + auto out_dims_vec = common::vectorize(output->dims()); out_dims_vec[1] = output->dims()[4]; out_dims_vec[2] = output->dims()[1]; out_dims_vec[3] = output->dims()[2]; out_dims_vec[4] = output->dims()[3]; - transformed_output.Resize(make_ddim(out_dims_vec)); + transformed_output.Resize(common::make_ddim(out_dims_vec)); #ifdef PADDLE_WITH_HIP // MIOPEN not support NHWC data layout } else if (data_format == str_NHWC) { @@ -119,22 +119,22 @@ void PoolRawGPUDNNKernel(const Context& ctx, std::vector axis{0, 3, 1, 2}; transformed_input.Resize(input->dims()); - auto in_dims_vec = vectorize(input->dims()); + auto in_dims_vec = common::vectorize(input->dims()); in_dims_vec[1] = input->dims()[3]; in_dims_vec[2] = input->dims()[1]; in_dims_vec[3] = input->dims()[2]; - transformed_input.Resize(make_ddim(in_dims_vec)); + transformed_input.Resize(common::make_ddim(in_dims_vec)); ctx.Alloc(&transformed_input, input->type()); funcs::Transpose trans; trans(ctx, *input, &transformed_input, axis); transformed_output.Resize(output->dims()); - auto out_dims_vec = vectorize(output->dims()); + auto out_dims_vec = common::vectorize(output->dims()); out_dims_vec[1] = output->dims()[3]; out_dims_vec[2] = output->dims()[1]; out_dims_vec[3] = output->dims()[2]; - transformed_output.Resize(make_ddim(out_dims_vec)); + transformed_output.Resize(common::make_ddim(out_dims_vec)); #endif } else { layout = GetLayoutFromStr(data_format); @@ -152,14 +152,14 @@ void PoolRawGPUDNNKernel(const Context& ctx, #ifdef PADDLE_WITH_HIP miopenTensorDescriptor_t cudnn_input_desc = input_desc.descriptor( - layout, vectorize(transformed_input.dims())); + layout, common::vectorize(transformed_input.dims())); miopenTensorDescriptor_t cudnn_output_desc = output_desc.descriptor( - layout, vectorize(transformed_output.dims())); + layout, common::vectorize(transformed_output.dims())); #else cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor( - layout, vectorize(transformed_input.dims())); + layout, common::vectorize(transformed_input.dims())); cudnnTensorDescriptor_t cudnn_output_desc = output_desc.descriptor( - layout, vectorize(transformed_output.dims())); + layout, common::vectorize(transformed_output.dims())); #endif PoolingMode pooling_mode; if (pooling_type == "max") { diff --git a/paddle/phi/kernels/impl/amp_kernel_impl.h b/paddle/phi/kernels/impl/amp_kernel_impl.h index 6757e1e6895751..ec857f3f640d56 100644 --- a/paddle/phi/kernels/impl/amp_kernel_impl.h +++ b/paddle/phi/kernels/impl/amp_kernel_impl.h @@ -128,7 +128,8 @@ void UpdateLossScalingKernel(const Context& dev_ctx, if (is_found_inf_on_cpu) { if (*found_inf_data) { for (auto* out : outs) { - Full(dev_ctx, vectorize(out->dims()), static_cast(0), out); + Full( + dev_ctx, common::vectorize(out->dims()), static_cast(0), out); } } } else { diff --git a/paddle/phi/kernels/impl/bilinear_grad_kernel_impl.h b/paddle/phi/kernels/impl/bilinear_grad_kernel_impl.h index dac527e24425d6..9fefa1704b3e6c 100644 --- a/paddle/phi/kernels/impl/bilinear_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/bilinear_grad_kernel_impl.h @@ -42,13 +42,13 @@ void BilinearGradKernel(const Context& ctx, auto& place = *ctx.eigen_device(); // Create the intermediate variable to calculate the Output(Y@Grad). DenseTensor x_scale; - x_scale.Resize(make_ddim({batch_size, x_dim})); + x_scale.Resize(common::make_ddim({batch_size, x_dim})); ctx.template Alloc(&x_scale); auto x_scale_mat = EigenMatrix::From(x_scale); // Create the intermediate variable to calculate the Output(X@Grad). DenseTensor y_scale; - y_scale.Resize(make_ddim({batch_size, y_dim})); + y_scale.Resize(common::make_ddim({batch_size, y_dim})); ctx.template Alloc(&y_scale); auto y_scale_mat = EigenMatrix::From(y_scale); @@ -78,7 +78,7 @@ void BilinearGradKernel(const Context& ctx, for (int i = 0; i < out_dim; ++i) { DenseTensor weight_i = - weight.Slice(i, i + 1).Resize(make_ddim({x_dim, y_dim})); + weight.Slice(i, i + 1).Resize(common::make_ddim({x_dim, y_dim})); auto output_vec = dout_mat.chip(i, 1); if (dx) { @@ -116,8 +116,8 @@ void BilinearGradKernel(const Context& ctx, dy->data()); } if (dweight) { - DenseTensor dweight_i = - dweight->Slice(i, i + 1).Resize(make_ddim({x_dim, y_dim})); + DenseTensor dweight_i = dweight->Slice(i, i + 1).Resize( + common::make_ddim({x_dim, y_dim})); blas.GEMM(CblasTrans, CblasNoTrans, x_dim, diff --git a/paddle/phi/kernels/impl/bilinear_kernel_impl.h b/paddle/phi/kernels/impl/bilinear_kernel_impl.h index 12ad7eda263d0c..2b00cb69d728cc 100644 --- a/paddle/phi/kernels/impl/bilinear_kernel_impl.h +++ b/paddle/phi/kernels/impl/bilinear_kernel_impl.h @@ -44,14 +44,14 @@ void BilinearKernel(const Context& ctx, // Input(X) multiplied by Input(Weight_i), the formula is: // left_mul = X Weight_i. DenseTensor left_mul; - left_mul.Resize(phi::make_ddim({batch_size, y_dim})); + left_mul.Resize(common::make_ddim({batch_size, y_dim})); ctx.template Alloc(&left_mul); auto left_mul_mat = EigenMatrix::From(left_mul); for (int i = 0; i < out_dim; ++i) { auto output_col_vec = output_mat.chip(i, 1); DenseTensor weight_mat = - weight.Slice(i, i + 1).Resize(phi::make_ddim({x_dim, y_dim})); + weight.Slice(i, i + 1).Resize(common::make_ddim({x_dim, y_dim})); phi::funcs::GetBlas(ctx).GEMM(CblasNoTrans, CblasNoTrans, batch_size, diff --git a/paddle/phi/kernels/impl/broadcast_tensors_kernel_impl.h b/paddle/phi/kernels/impl/broadcast_tensors_kernel_impl.h index c61b10d5a21995..144c8fe44dd260 100644 --- a/paddle/phi/kernels/impl/broadcast_tensors_kernel_impl.h +++ b/paddle/phi/kernels/impl/broadcast_tensors_kernel_impl.h @@ -60,7 +60,7 @@ void ApplyBroadcast(const Context& ctx, new_input_dims_vec[out_axis] = input_dims[in_axis]; } } - auto new_input_dims = phi::make_ddim(new_input_dims_vec); + auto new_input_dims = common::make_ddim(new_input_dims_vec); // Initialize input X with new_input_dims_vec, so it's rank-aligned with the // output diff --git a/paddle/phi/kernels/impl/cholesky_solve_kernel_impl.h b/paddle/phi/kernels/impl/cholesky_solve_kernel_impl.h index 562ff25317ec9b..40a12c471b94a7 100644 --- a/paddle/phi/kernels/impl/cholesky_solve_kernel_impl.h +++ b/paddle/phi/kernels/impl/cholesky_solve_kernel_impl.h @@ -75,7 +75,7 @@ void CholeskySolveKernel(const Context& dev_ctx, int x_bst_ndim = x_bst_dims_vec.size(); int M = static_cast(x_bst_dims_vec[x_bst_ndim - 2]); int N = static_cast(x_bst_dims_vec[x_bst_ndim - 1]); - int batchsize = product(phi::slice_ddim(x_bst.dims(), 0, x_bst_ndim - 2)); + int batchsize = product(common::slice_ddim(x_bst.dims(), 0, x_bst_ndim - 2)); DenseTensor info = phi::Empty(dev_ctx, IntArray({batchsize})); int* info_data = info.data(); @@ -94,7 +94,7 @@ void CholeskySolveKernel(const Context& dev_ctx, // calculate out's conjugate for complex result = phi::TransposeLast2Dim(dev_ctx, result); - out->Resize(phi::make_ddim(x_bst_dims_vec)); + out->Resize(common::make_ddim(x_bst_dims_vec)); ConjKernel(dev_ctx, result, out); } diff --git a/paddle/phi/kernels/impl/conv_grad_kernel_impl.h b/paddle/phi/kernels/impl/conv_grad_kernel_impl.h index ec75952aaae8e2..3baf3fd84b0c49 100644 --- a/paddle/phi/kernels/impl/conv_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/conv_grad_kernel_impl.h @@ -68,17 +68,17 @@ void ConvGradKernel(const Context& dev_ctx, auto filter_dims = filter.dims(); DDim in_data_dims = slice_ddim(in_dims, 2, in_dims.size()); DDim filter_data_dims = slice_ddim(filter_dims, 2, filter_dims.size()); - std::vector ksize = vectorize(filter_data_dims); + std::vector ksize = common::vectorize(filter_data_dims); UpdatePaddingAndDilation( &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize); const int batch_size = static_cast(transformed_input.dims()[0]); // filter_shape_vec: {k_o, k_i, k_h, k_w} or {k_o, k_i, k_d, k_h, k_w} - std::vector filter_shape_vec(vectorize(filter.dims())); + std::vector filter_shape_vec(common::vectorize(filter.dims())); // output_shape_vec: {o_n, o_c, o_h, o_w} or {o_n, o_c, o_d, o_h, o_w} std::vector output_shape_vec( - vectorize(transformed_output_grad.dims())); + common::vectorize(transformed_output_grad.dims())); // use col_shape in the im2col calculation // col_shape_vec: {i_c/g, k_h, k_w, o_h, o_w} or {i_c/g, k_d, k_h, k_w, o_d, @@ -90,7 +90,7 @@ void ConvGradKernel(const Context& dev_ctx, col_shape_vec[j + 1] = filter_shape_vec[j + 2]; col_shape_vec[j + 1 + data_dim] = output_shape_vec[j + 2]; } - DDim col_shape(make_ddim(col_shape_vec)); + DDim col_shape(common::make_ddim(col_shape_vec)); // use col_matrix_shape in the gemm calculation // size: (i_c/g * k_h * k_w, o_h * o_w) @@ -310,13 +310,14 @@ void ConvGradGradKernel(const Context& dev_ctx, DDim in_data_dims = slice_ddim(in_dims, 2, in_dims.size()); DDim filter_data_dims = slice_ddim(filter_dims, 2, filter_dims.size()); - std::vector ksize = vectorize(filter_data_dims); + std::vector ksize = common::vectorize(filter_data_dims); UpdatePaddingAndDilation( &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize); const int batch_size = static_cast(transformed_X.dims()[0]); - std::vector filter_shape_vec(vectorize(W.dims())); - std::vector output_shape_vec(vectorize(transformed_dY.dims())); + std::vector filter_shape_vec(common::vectorize(W.dims())); + std::vector output_shape_vec( + common::vectorize(transformed_dY.dims())); size_t data_dim = filter_shape_vec.size() - 2; std::vector col_shape_vec(1 + 2 * data_dim); @@ -326,7 +327,7 @@ void ConvGradGradKernel(const Context& dev_ctx, col_shape_vec[j + 1] = filter_shape_vec[j + 2]; col_shape_vec[j + data_dim + 1] = output_shape_vec[j + 2]; } - DDim col_shape(make_ddim(col_shape_vec)); + DDim col_shape(common::make_ddim(col_shape_vec)); // col_matrix_shape [in_channel/group * kh * kw, oh * ow] DDim col_matrix_shape = flatten_to_2d(col_shape, data_dim + 1); // input_shape [Cin, H, W] diff --git a/paddle/phi/kernels/impl/conv_kernel_impl.h b/paddle/phi/kernels/impl/conv_kernel_impl.h index 06ba3104a81124..e40ba59a2d3a11 100644 --- a/paddle/phi/kernels/impl/conv_kernel_impl.h +++ b/paddle/phi/kernels/impl/conv_kernel_impl.h @@ -66,7 +66,7 @@ void ConvKernelImpl(const Context& dev_ctx, DDim in_data_dims = slice_ddim(trans_in_dims, 2, trans_in_dims.size()); DDim filter_data_dims = slice_ddim(filter_dims, 2, filter_dims.size()); - std::vector ksize = vectorize(filter_data_dims); + std::vector ksize = common::vectorize(filter_data_dims); UpdatePaddingAndDilation( &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize); @@ -74,11 +74,12 @@ void ConvKernelImpl(const Context& dev_ctx, // filter_shape_vec: // {k_o, k_i, k_h, k_w} or {k_o, k_i, k_d, k_h, k_w} - std::vector filter_shape_vec(vectorize(filter.dims())); + std::vector filter_shape_vec(common::vectorize(filter.dims())); // output_shape_vec: // {o_n, o_c, o_h, o_w} or {o_n, o_c, o_d, o_h, o_w} - std::vector output_shape_vec(vectorize(transformed_output.dims())); + std::vector output_shape_vec( + common::vectorize(transformed_output.dims())); // use col_shape in the im2col calculation // col_shape_vec: @@ -93,7 +94,7 @@ void ConvKernelImpl(const Context& dev_ctx, col_shape_vec[j + 1 + data_dim] = output_shape_vec[j + 2]; } - DDim col_shape(make_ddim(col_shape_vec)); + DDim col_shape(common::make_ddim(col_shape_vec)); // use col_matrix_shape in the gemm calculation // size: diff --git a/paddle/phi/kernels/impl/conv_transpose_grad_kernel_impl.h b/paddle/phi/kernels/impl/conv_transpose_grad_kernel_impl.h index 2d92f8156b607d..c4e58838c7e574 100644 --- a/paddle/phi/kernels/impl/conv_transpose_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/conv_transpose_grad_kernel_impl.h @@ -14,8 +14,8 @@ #pragma once -#include "paddle/phi/common/layout.h" -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" +#include "paddle/common/layout.h" #include "paddle/phi/kernels/conv_transpose_grad_kernel.h" #include "paddle/phi/kernels/cpu/conv_util.h" #include "paddle/phi/kernels/funcs/blas/blas.h" @@ -39,7 +39,7 @@ void ConvTransposeGradRawKernel(const Context& ctx, const std::string& data_format, DenseTensor* dx, DenseTensor* dfilter) { - const DataLayout data_layout = phi::StringToDataLayout(data_format); + const DataLayout data_layout = common::StringToDataLayout(data_format); // For filter, we do not use const pointer because we will do reshape, // but we should avoid modifying its value. DenseTensor filter_ = filter; @@ -63,15 +63,15 @@ void ConvTransposeGradRawKernel(const Context& ctx, in_data_dims = slice_ddim(x_dims, 1, x_dims.size() - 1); } DDim filter_data_dims = slice_ddim(filter_dims, 2, filter_dims.size()); - std::vector ksize = vectorize(filter_data_dims); + std::vector ksize = common::vectorize(filter_data_dims); UpdatePaddingAndDilation( &paddings_, &dilations_, padding_algorithm, in_data_dims, strides, ksize); // x_shape_vec: {n, c, h, w} or {n, c, d, h, w} for channel_first // x_shape_vec: {n, h, w, c} or {n, d, h, w, c} for channel_last - std::vector x_shape_vec = vectorize(x.dims()); + std::vector x_shape_vec = common::vectorize(x.dims()); // filter_shape_vec: {i_c, o_c, k_h, k_w} or {i_c, o_c, k_d, k_h, k_w} - std::vector filter_shape_vec = vectorize(filter_.dims()); + std::vector filter_shape_vec = common::vectorize(filter_.dims()); // use col_shape in the im2col and col2im (or vol2col and col2vol) // calculation @@ -91,7 +91,7 @@ void ConvTransposeGradRawKernel(const Context& ctx, col_shape_vec[j + 1 + data_dim] = x_shape_vec[j + 1]; } } - DDim col_shape(make_ddim(col_shape_vec)); + DDim col_shape(common::make_ddim(col_shape_vec)); // use col_matrix_shape in the gemm calculation // size: (o_c * k_h * k_w, h * w) or (o_c * k_d * k_h * k_w, d * h * w) diff --git a/paddle/phi/kernels/impl/conv_transpose_kernel_impl.h b/paddle/phi/kernels/impl/conv_transpose_kernel_impl.h index 9fab3e6735b40d..ac6ce032a9b254 100644 --- a/paddle/phi/kernels/impl/conv_transpose_kernel_impl.h +++ b/paddle/phi/kernels/impl/conv_transpose_kernel_impl.h @@ -14,8 +14,8 @@ #pragma once -#include "paddle/phi/common/layout.h" -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" +#include "paddle/common/layout.h" #include "paddle/phi/kernels/conv_transpose_kernel.h" #include "paddle/phi/kernels/cpu/conv_util.h" #include "paddle/phi/kernels/funcs/blas/blas.h" @@ -37,7 +37,7 @@ void ConvTransposeRawKernel(const Context& ctx, const std::vector& dilations, const std::string& data_format, DenseTensor* out) { - const DataLayout data_layout = phi::StringToDataLayout(data_format); + const DataLayout data_layout = common::StringToDataLayout(data_format); // The filter will be reshaped, so it should not be constant DenseTensor filter_ = filter; std::vector paddings_ = paddings; @@ -55,15 +55,15 @@ void ConvTransposeRawKernel(const Context& ctx, in_data_dims = slice_ddim(x_dims, 1, x_dims.size() - 1); } DDim filter_data_dims = slice_ddim(filter_dims, 2, filter_dims.size()); - std::vector ksize = vectorize(filter_data_dims); + std::vector ksize = common::vectorize(filter_data_dims); UpdatePaddingAndDilation( &paddings_, &dilations_, padding_algorithm, in_data_dims, strides, ksize); // x_shape_vec: {n, c, h, w} or {n, c, d, h, w} for channel_first // x_shape_vec: {n, h, w, c} or {n, d, h, w, c} for channel_last - std::vector x_shape_vec = vectorize(x.dims()); + std::vector x_shape_vec = common::vectorize(x.dims()); // filter_shape_vec: {k_o, k_i, k_h, k_w} or {k_o, k_i, k_d, k_h, k_w} - std::vector filter_shape_vec = vectorize(filter_.dims()); + std::vector filter_shape_vec = common::vectorize(filter_.dims()); // use col_shape in the im2col and col2im (or vol2col and col2vol) // calculation @@ -83,7 +83,7 @@ void ConvTransposeRawKernel(const Context& ctx, col_shape_vec[j + 1 + data_dim] = x_shape_vec[j + 1]; } } - DDim col_shape(make_ddim(col_shape_vec)); + DDim col_shape(common::make_ddim(col_shape_vec)); // use col_matrix_shape in the gemm calculation // size: (o_c/g * k_h * k_w, h * w) or (o_c/g * k_d * k_h * k_w, d * h * w) diff --git a/paddle/phi/kernels/impl/crop_kernel_impl.h b/paddle/phi/kernels/impl/crop_kernel_impl.h index 5aa951d4da09d6..3ad039b05b8465 100644 --- a/paddle/phi/kernels/impl/crop_kernel_impl.h +++ b/paddle/phi/kernels/impl/crop_kernel_impl.h @@ -63,7 +63,7 @@ static phi::DDim ValidateShape(const std::vector& shape, } } - return phi::make_ddim(output_shape); + return common::make_ddim(output_shape); } template diff --git a/paddle/phi/kernels/impl/deformable_conv_grad_kernel_impl.h b/paddle/phi/kernels/impl/deformable_conv_grad_kernel_impl.h index 744c48b2bfbd61..fdd31e510510a6 100644 --- a/paddle/phi/kernels/impl/deformable_conv_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/deformable_conv_grad_kernel_impl.h @@ -177,10 +177,10 @@ void DeformableConvGradKernel(const Context& dev_ctx, DenseTensor* mask_grad) { const int batch_size = static_cast(x.dims()[0]); - DDim input_shape = phi::slice_ddim(x.dims(), 1, x.dims().size()); - std::vector input_shape_vec = phi::vectorize(input_shape); - std::vector filter_shape_vec(phi::vectorize(filter.dims())); - std::vector output_shape_vec(phi::vectorize(out_grad.dims())); + DDim input_shape = common::slice_ddim(x.dims(), 1, x.dims().size()); + std::vector input_shape_vec = common::vectorize(input_shape); + std::vector filter_shape_vec(common::vectorize(filter.dims())); + std::vector output_shape_vec(common::vectorize(out_grad.dims())); std::vector col_buffer_shape_vec(filter_shape_vec.size()); col_buffer_shape_vec[0] = x.dims()[1] * filter.dims()[2] * filter.dims()[3]; @@ -195,7 +195,7 @@ void DeformableConvGradKernel(const Context& dev_ctx, DenseTensor col_buffer = Empty(dev_ctx, col_buffer_shape_vec); DenseTensor output_buffer; output_buffer.ShareDataWith(out_grad).Resize( - make_ddim(output_buffer_shape_vec)); + common::make_ddim(output_buffer_shape_vec)); int64_t M = input_shape_vec[0] / groups * filter_shape_vec[2] * filter_shape_vec[3]; @@ -245,14 +245,14 @@ void DeformableConvGradKernel(const Context& dev_ctx, for (int i = 0; i < batch_size / im2col_step; ++i) { DenseTensor out_grad_3d = out_grad_4d.Slice(i, i + 1).Resize( - phi::slice_ddim(out_grad_4d.dims(), 1, out_grad_4d.dims().size())); + common::slice_ddim(out_grad_4d.dims(), 1, out_grad_4d.dims().size())); for (int g = 0; g < groups; ++g) { DenseTensor weight_3d_slice = weight_3d.Slice(g, g + 1).Resize( - phi::slice_ddim(weight_3d.dims(), 1, weight_3d.dims().size())); + common::slice_ddim(weight_3d.dims(), 1, weight_3d.dims().size())); DenseTensor out_grad_3d_slice = out_grad_3d.Slice(g, g + 1).Resize( - phi::slice_ddim(out_grad_3d.dims(), 1, out_grad_3d.dims().size())); + common::slice_ddim(out_grad_3d.dims(), 1, out_grad_3d.dims().size())); DenseTensor col_buffer_3d_slice = - col_buffer_3d.Slice(g, g + 1).Resize(phi::slice_ddim( + col_buffer_3d.Slice(g, g + 1).Resize(common::slice_ddim( col_buffer_3d.dims(), 1, col_buffer_3d.dims().size())); blas.MatMul(weight_3d_slice, true, @@ -262,7 +262,7 @@ void DeformableConvGradKernel(const Context& dev_ctx, &col_buffer_3d_slice, T(0.0)); } - col_buffer.Resize(make_ddim(col_buffer_shape_vec)); + col_buffer.Resize(common::make_ddim(col_buffer_shape_vec)); T* col_buffer_ptr = col_buffer.data(); const T* input_ptr = x.data(); @@ -329,13 +329,14 @@ void DeformableConvGradKernel(const Context& dev_ctx, DenseTensor dweight_3d = Empty( dev_ctx, {filter_grad_shape.Get(), filter_grad_shape.size()}); for (int g = 0; g < groups; ++g) { - DenseTensor out_grad_3d_slice = out_grad_3d.Slice(g, g + 1).Resize( - phi::slice_ddim(out_grad_3d.dims(), 1, out_grad_3d.dims().size())); + DenseTensor out_grad_3d_slice = + out_grad_3d.Slice(g, g + 1).Resize(common::slice_ddim( + out_grad_3d.dims(), 1, out_grad_3d.dims().size())); DenseTensor col_buffer_3d_slice = - col_buffer_3d.Slice(g, g + 1).Resize(phi::slice_ddim( + col_buffer_3d.Slice(g, g + 1).Resize(common::slice_ddim( col_buffer_3d.dims(), 1, col_buffer_3d.dims().size())); DenseTensor dweight_3d_slice = dweight_3d.Slice(g, g + 1).Resize( - phi::slice_ddim(dweight_3d.dims(), 1, dweight_3d.dims().size())); + common::slice_ddim(dweight_3d.dims(), 1, dweight_3d.dims().size())); blas.MatMul(out_grad_3d_slice, false, diff --git a/paddle/phi/kernels/impl/deformable_conv_kernel_impl.h b/paddle/phi/kernels/impl/deformable_conv_kernel_impl.h index 119c7ad52202bf..d4647128963e5d 100644 --- a/paddle/phi/kernels/impl/deformable_conv_kernel_impl.h +++ b/paddle/phi/kernels/impl/deformable_conv_kernel_impl.h @@ -44,8 +44,8 @@ void DeformableConvKernel(const Context& dev_ctx, im2col_step = temp_step; } - std::vector filter_shape_vec(phi::vectorize(filter.dims())); - std::vector output_shape_vec(phi::vectorize(out->dims())); + std::vector filter_shape_vec(common::vectorize(filter.dims())); + std::vector output_shape_vec(common::vectorize(out->dims())); // col_shape_vec: {c_i * k_h * k_w, im2col_step, o_h, o_w} std::vector col_buffer_shape_vec(filter_shape_vec.size()); @@ -67,18 +67,18 @@ void DeformableConvKernel(const Context& dev_ctx, int64_t K = x.dims()[1] * filter_shape_vec[2] * filter_shape_vec[3] / groups; DenseTensor weight_3d; - weight_3d.ShareDataWith(filter).Resize(phi::make_ddim({groups, M, K})); + weight_3d.ShareDataWith(filter).Resize(common::make_ddim({groups, M, K})); DenseTensor col_buffer_3d; col_buffer_3d.ShareDataWith(col_buffer) - .Resize(phi::make_ddim({groups, K, N})); + .Resize(common::make_ddim({groups, K, N})); DenseTensor output_4d; output_4d.ShareDataWith(output_buffer) - .Resize(phi::make_ddim({batch_size / im2col_step, groups, M, N})); + .Resize(common::make_ddim({batch_size / im2col_step, groups, M, N})); - DDim input_shape = phi::slice_ddim(x.dims(), 1, x.dims().size()); - std::vector input_shape_vec = phi::vectorize(input_shape); + DDim input_shape = common::slice_ddim(x.dims(), 1, x.dims().size()); + std::vector input_shape_vec = common::vectorize(input_shape); int input_dim = x.numel() / x.dims()[0]; int input_offset_dim = offset.numel() / offset.dims()[0]; @@ -107,7 +107,7 @@ void DeformableConvKernel(const Context& dev_ctx, dilations, deformable_groups, col_buffer_ptr); - DenseTensor output_3d = output_4d.Slice(i, i + 1).Resize(phi::slice_ddim( + DenseTensor output_3d = output_4d.Slice(i, i + 1).Resize(common::slice_ddim( output_4d.dims(), 1, output_4d.dims().size())); // group * C/group * (im2step * H * W) @@ -115,12 +115,12 @@ void DeformableConvKernel(const Context& dev_ctx, // get the product of pixel and weight for (int g = 0; g < groups; ++g) { DenseTensor weight_3d_slice = weight_3d.Slice(g, g + 1).Resize( - phi::slice_ddim(weight_3d.dims(), 1, weight_3d.dims().size())); + common::slice_ddim(weight_3d.dims(), 1, weight_3d.dims().size())); DenseTensor col_buffer_3d_slice = - col_buffer_3d.Slice(g, g + 1).Resize(phi::slice_ddim( + col_buffer_3d.Slice(g, g + 1).Resize(common::slice_ddim( col_buffer_3d.dims(), 1, col_buffer_3d.dims().size())); DenseTensor output_3d_slice = - output_3d.Slice(g, g + 1).Resize(phi::slice_ddim( + output_3d.Slice(g, g + 1).Resize(common::slice_ddim( output_3d.dims(), 1, output_3d.dims().size())); // C * ((im2col_step)*H*W)) @@ -145,16 +145,17 @@ void DeformableConvKernel(const Context& dev_ctx, DenseTensor real_output_buffer = phi::Transpose( dev_ctx, output_4d.Resize( - phi::make_ddim({batch_size / im2col_step, - output_shape_vec[1], - im2col_step, - output_shape_vec[2] * output_shape_vec[3]})), + common::make_ddim({batch_size / im2col_step, + output_shape_vec[1], + im2col_step, + output_shape_vec[2] * output_shape_vec[3]})), axis); out->ShareDataWith(real_output_buffer) - .Resize(phi::make_ddim(output_shape_vec)); + .Resize(common::make_ddim(output_shape_vec)); } else { - out->ShareDataWith(output_buffer).Resize(phi::make_ddim(output_shape_vec)); + out->ShareDataWith(output_buffer) + .Resize(common::make_ddim(output_shape_vec)); } } diff --git a/paddle/phi/kernels/impl/determinant_grad_kernel_impl.h b/paddle/phi/kernels/impl/determinant_grad_kernel_impl.h index b17512ad1da879..8b135c4b520ae8 100644 --- a/paddle/phi/kernels/impl/determinant_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/determinant_grad_kernel_impl.h @@ -110,7 +110,7 @@ void DeterminantGradKernel(const Context& dev_ctx, VLOG(3) << "The input matrix not invertible!"; x_grad->Resize(x.dims()); phi::Full( - dev_ctx, phi::vectorize(x.dims()), static_cast(0.0f), x_grad); + dev_ctx, common::vectorize(x.dims()), static_cast(0.0f), x_grad); return; } diff --git a/paddle/phi/kernels/impl/determinant_kernel_impl.h b/paddle/phi/kernels/impl/determinant_kernel_impl.h index 01c54d780b4b0e..4a308a5798192d 100644 --- a/paddle/phi/kernels/impl/determinant_kernel_impl.h +++ b/paddle/phi/kernels/impl/determinant_kernel_impl.h @@ -105,7 +105,7 @@ template void DeterminantKernel(const Context& dev_ctx, const DenseTensor& x, DenseTensor* out) { - auto input_dim = vectorize(x.dims()); + auto input_dim = common::vectorize(x.dims()); auto input_dim_size = input_dim.size(); auto batch_count = detail::GetBatchCount(x.dims()); @@ -121,12 +121,12 @@ void DeterminantKernel(const Context& dev_ctx, "the input matrix should be square matrix.")); auto rank = input_dim[input_dim_size - 1]; // square matrix length DeterminantFunctor()(dev_ctx, x, rank, batch_count, out); - auto output_dims = phi::slice_ddim(x.dims(), 0, input_dim_size - 2); + auto output_dims = common::slice_ddim(x.dims(), 0, input_dim_size - 2); if (input_dim_size > 2) { out->Resize(output_dims); } else { // when input is a two-dimension matrix, The det value is a number. - out->Resize(phi::make_ddim({})); + out->Resize(common::make_ddim({})); } VLOG(10) << "output dim:" << out->dims(); } diff --git a/paddle/phi/kernels/impl/diag_embed_impl.h b/paddle/phi/kernels/impl/diag_embed_impl.h index a4430fde923434..044deccb3c2c35 100644 --- a/paddle/phi/kernels/impl/diag_embed_impl.h +++ b/paddle/phi/kernels/impl/diag_embed_impl.h @@ -82,7 +82,7 @@ void DiagEmbedKernel(const Context& dev_ctx, auto out_dims = out->dims(); int dim1_ = dim1 < 0 ? out_dims.size() + dim1 : dim1; int dim2_ = dim2 < 0 ? out_dims.size() + dim2 : dim2; - auto stride = phi::stride(out_dims); + auto stride = common::stride(out_dims); int64_t diag_size; int64_t storage_offset = 0; if (offset >= 0) { @@ -99,11 +99,11 @@ void DiagEmbedKernel(const Context& dev_ctx, } else { storage_offset -= offset * stride[dim1_]; } - auto strides = vectorize(stride); + auto strides = common::vectorize(stride); strides.erase(strides.begin() + std::max(dim1_, dim2_)); strides.erase(strides.begin() + std::min(dim1_, dim2_)); strides.push_back(stride[dim1_] + stride[dim2_]); - const auto dims = vectorize(x.dims()); + const auto dims = common::vectorize(x.dims()); #if defined(__NVCC__) || defined(__HIPCC__) thrust::device_vector dims_vec(dims); diff --git a/paddle/phi/kernels/impl/dot_grad_kernel_impl.h b/paddle/phi/kernels/impl/dot_grad_kernel_impl.h index add72749d39e1e..3a82ace22860e5 100644 --- a/paddle/phi/kernels/impl/dot_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/dot_grad_kernel_impl.h @@ -104,7 +104,7 @@ struct DotGradFunction> { auto* data_dx = ctx.template Alloc(tensor_dx); const auto* data_y = tensor_y->data(); const DDim& dim = tensor_x->dims(); - size_t N = static_cast(phi::product(dim)); + size_t N = static_cast(common::product(dim)); auto _step = dim.size() > 0 ? dim[dim.size() - 1] : 1; auto step = _step != 0 ? _step : 1; @@ -120,7 +120,7 @@ struct DotGradFunction> { auto* data_dy = ctx.template Alloc(tensor_dy); const auto* data_x = tensor_x->data(); const DDim& dim = tensor_y->dims(); - size_t N = static_cast(phi::product(dim)); + size_t N = static_cast(common::product(dim)); auto _step = dim.size() > 0 ? dim[dim.size() - 1] : 1; auto step = _step != 0 ? _step : 1; diff --git a/paddle/phi/kernels/impl/eigh_grad_kernel_impl.h b/paddle/phi/kernels/impl/eigh_grad_kernel_impl.h index f39786fff2665f..817081a690385a 100644 --- a/paddle/phi/kernels/impl/eigh_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/eigh_grad_kernel_impl.h @@ -48,9 +48,9 @@ void EighGradKernel(const Context& dev_ctx, result.Resize(dims); dev_ctx.template Alloc(&result); - std::vector out_shape = phi::vectorize(dims); + std::vector out_shape = common::vectorize(dims); DenseTensor constant; - constant.Resize(phi::make_ddim(out_shape)); + constant.Resize(common::make_ddim(out_shape)); dev_ctx.template Alloc(&constant); phi::funcs::SetConstant()(dev_ctx, &constant, T(0.5)); result = phi::Subtract( diff --git a/paddle/phi/kernels/impl/einsum_grad_impl.h b/paddle/phi/kernels/impl/einsum_grad_impl.h index ce33d08c1d82db..9557b1609eeef2 100644 --- a/paddle/phi/kernels/impl/einsum_grad_impl.h +++ b/paddle/phi/kernels/impl/einsum_grad_impl.h @@ -61,7 +61,7 @@ DenseTensor PerformTileAndReduction(const Context& dev_ctx, } } } - t.Resize(make_ddim(resize_dims)); + t.Resize(common::make_ddim(resize_dims)); DenseTensor after_tile; if (std::all_of(repeat_times.begin(), repeat_times.end(), [](int x) { return x == 1; @@ -100,7 +100,7 @@ DenseTensor PerformTileAndReduction(const Context& dev_ctx, } VLOG(5) << "PermformTileAndReduction: recover shape: " << paddle::string::join_strings(recover_shape, ","); - ret.Resize(make_ddim(recover_shape)); + ret.Resize(common::make_ddim(recover_shape)); // undiagonalize by einsum equation. only contain undiagonal operations. DenseTensor out; VLOG(5) << "Undiagonal by einsum with args: " << op_label + "->" + equ; diff --git a/paddle/phi/kernels/impl/einsum_impl.h b/paddle/phi/kernels/impl/einsum_impl.h index e32f64f347f4c6..0fec027fdf5e17 100644 --- a/paddle/phi/kernels/impl/einsum_impl.h +++ b/paddle/phi/kernels/impl/einsum_impl.h @@ -491,7 +491,8 @@ DenseTensor PerformDiagonalAndReduction(const Context& dev_ctx, if (cur != label2perm[c]) { // do diagonal, followed by movedim(). VLOG(5) << "Do diagonal with shape=" - << paddle::string::join_strings(vectorize(res.dims()), ',') + << paddle::string::join_strings( + common::vectorize(res.dims()), ',') << ", axis1=" << cur << ", axis2=" << label2perm[c]; res = Diagonal(dev_ctx, res, 0, cur, label2perm[c]); res = Transpose( @@ -623,7 +624,7 @@ DenseTensor PerformContraction( } VLOG(5) << "PerformContraction: mul_dims: " << paddle::string::join_strings(mul_dims, ","); - trans_t.Resize(make_ddim(mul_dims)); + trans_t.Resize(common::make_ddim(mul_dims)); return trans_t; }; @@ -643,7 +644,7 @@ DenseTensor PerformContraction( if (recover_dim.size() == 0) recover_dim.push_back(1); VLOG(5) << "PerformContraction: recover_dim: " << paddle::string::join_strings(recover_dim, ","); - after_contraction.Resize(make_ddim(recover_dim)); + after_contraction.Resize(common::make_ddim(recover_dim)); return after_contraction; } @@ -740,7 +741,7 @@ void EinsumKernelImpl(const Context& dev_ctx, broadcast_dims.size()); *out = PerformUndiagonal( dev_ctx, *out, broadcast_dims.size(), right); - out->Resize(make_ddim(output_dims)); + out->Resize(common::make_ddim(output_dims)); } template diff --git a/paddle/phi/kernels/impl/expand_as_grad_kernel_impl.h b/paddle/phi/kernels/impl/expand_as_grad_kernel_impl.h index f0c32dd32e42f3..54ef6e0c1f9cb7 100644 --- a/paddle/phi/kernels/impl/expand_as_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/expand_as_grad_kernel_impl.h @@ -55,7 +55,7 @@ void ExpandAsGradKernel(const Context& context, return; } - auto vec_in_dims = phi::vectorize(x_dims); + auto vec_in_dims = common::vectorize(x_dims); auto diff = target_shape.size() - vec_in_dims.size(); vec_in_dims.insert(vec_in_dims.begin(), diff, 1); std::vector repeat_times(vec_in_dims.size()); diff --git a/paddle/phi/kernels/impl/expand_as_kernel_impl.h b/paddle/phi/kernels/impl/expand_as_kernel_impl.h index 7e3a1a6656140e..cee562b42778e1 100755 --- a/paddle/phi/kernels/impl/expand_as_kernel_impl.h +++ b/paddle/phi/kernels/impl/expand_as_kernel_impl.h @@ -30,7 +30,7 @@ void ExpandAs(const Context& context, const std::vector& target_shape, DenseTensor* out) { auto in_dims = x.dims(); - auto vec_in_dims = phi::vectorize(in_dims); + auto vec_in_dims = common::vectorize(in_dims); auto diff = target_shape.size() - vec_in_dims.size(); vec_in_dims.insert(vec_in_dims.begin(), diff, 1); std::vector repeat_times(vec_in_dims.size()); @@ -82,8 +82,8 @@ void ExpandAs(const Context& context, bcast_dims[i] = repeat_times[i]; } - phi::DDim new_in_dims = phi::make_ddim(vec_in_dims); - phi::DDim out_dims = phi::make_ddim(target_shape); + phi::DDim new_in_dims = common::make_ddim(vec_in_dims); + phi::DDim out_dims = common::make_ddim(target_shape); out->Resize(out_dims); context.template Alloc(out); @@ -129,7 +129,7 @@ void ExpandAsKernel(const Context& ctx, if (target_shape[i] == -1) { if (y) { if (y->IsInitialized()) { - real_target_shape = phi::vectorize(y->dims()); + real_target_shape = common::vectorize(y->dims()); } } break; diff --git a/paddle/phi/kernels/impl/expand_grad_kernel_impl.h b/paddle/phi/kernels/impl/expand_grad_kernel_impl.h index 700f64863e4fee..4dd9dc4d50337a 100644 --- a/paddle/phi/kernels/impl/expand_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/expand_grad_kernel_impl.h @@ -59,7 +59,7 @@ void ExpandGradKernel(const Context& ctx, phi::Copy(ctx, out_grad, ctx.GetPlace(), false, in_grad); return; } - auto vec_in_dims = phi::vectorize(x_dims); + auto vec_in_dims = common::vectorize(x_dims); auto diff = expand_shape.size() - vec_in_dims.size(); vec_in_dims.insert(vec_in_dims.begin(), diff, 1); // 1. reshape_dims_vec is the broadcast parameter. diff --git a/paddle/phi/kernels/impl/expand_kernel_impl.h b/paddle/phi/kernels/impl/expand_kernel_impl.h index 4738088781de9d..181dd2558fa385 100644 --- a/paddle/phi/kernels/impl/expand_kernel_impl.h +++ b/paddle/phi/kernels/impl/expand_kernel_impl.h @@ -31,7 +31,7 @@ void Expand(const Context& ctx, DenseTensor* out) { auto in_dims = x.dims(); auto expand_shape = shape.GetData(); - auto vec_in_dims = phi::vectorize(in_dims); + auto vec_in_dims = common::vectorize(in_dims); auto diff = expand_shape.size() - vec_in_dims.size(); vec_in_dims.insert(vec_in_dims.begin(), diff, 1); std::vector repeat_times(vec_in_dims.size()); @@ -83,7 +83,7 @@ void Expand(const Context& ctx, bcast_dims[i] = repeat_times[i]; } - DDim new_in_dims = phi::make_ddim(vec_in_dims); + DDim new_in_dims = common::make_ddim(vec_in_dims); DDim out_dims(new_in_dims); for (size_t i = 0; i < repeat_times.size(); ++i) { out_dims[i] *= repeat_times[i]; diff --git a/paddle/phi/kernels/impl/fc_kernel_impl.h b/paddle/phi/kernels/impl/fc_kernel_impl.h index 061f1baad3108b..c30da9d4e50009 100644 --- a/paddle/phi/kernels/impl/fc_kernel_impl.h +++ b/paddle/phi/kernels/impl/fc_kernel_impl.h @@ -46,13 +46,13 @@ void FCKernel(const Context& dev_ctx, std::vector output_dims; phi::funcs::FCOutputSize( input.dims(), w_dims, output_dims, in_num_col_dims, padding_weights); - out->Resize(phi::make_ddim(output_dims)); + out->Resize(common::make_ddim(output_dims)); out->set_lod(input.lod()); auto out_dims = out->dims(); auto w_dims0 = padding_weights ? w_dims[0] - 4 : w_dims[0]; auto w_dims1 = padding_weights ? w_dims[1] - 4 : w_dims[1]; - int M = phi::product(out_dims) / w_dims1; + int M = common::product(out_dims) / w_dims1; const T* input_data = input.data(); const T* w_data = w.data(); diff --git a/paddle/phi/kernels/impl/fft_grad_kernel_impl.h b/paddle/phi/kernels/impl/fft_grad_kernel_impl.h index de4bb8d4bd1734..72c8bc659a632a 100644 --- a/paddle/phi/kernels/impl/fft_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/fft_grad_kernel_impl.h @@ -18,8 +18,8 @@ #include #include +#include "paddle/common/ddim.h" #include "paddle/phi/common/data_type.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_meta.h" #include "paddle/phi/kernels/complex_kernel.h" @@ -92,10 +92,10 @@ void FFTC2RGradKernel(const Context& ctx, const int64_t double_length = out_grad.dims()[axes.back()] - x_grad->dims()[axes.back()]; - const phi::DDim strides = phi::stride(x_grad->dims()); + const phi::DDim strides = common::stride(x_grad->dims()); #if defined(__NVCC__) || defined(__HIPCC__) - const thrust::device_vector strides_g(phi::vectorize(strides)); + const thrust::device_vector strides_g(common::vectorize(strides)); const int64_t* pstrides = thrust::raw_pointer_cast(strides_g.data()); #else const int64_t* pstrides = strides.Get(); diff --git a/paddle/phi/kernels/impl/fft_kernel_impl.h b/paddle/phi/kernels/impl/fft_kernel_impl.h index 13c54182d1d316..eab6c5f5a111bb 100644 --- a/paddle/phi/kernels/impl/fft_kernel_impl.h +++ b/paddle/phi/kernels/impl/fft_kernel_impl.h @@ -18,7 +18,7 @@ #include #include -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/funcs/fft.h" @@ -75,7 +75,7 @@ void FFTR2CKernel(const Context& ctx, out->dims().at(last_fft_axis) / 2 + 1; onesided_out_shape[last_fft_axis] = onesided_last_axis_size; DenseTensor onesided_out = - Empty(ctx, phi::vectorize(onesided_out_shape)); + Empty(ctx, common::vectorize(onesided_out_shape)); fft_r2c_func(ctx, x, &onesided_out, axes, norm_type, forward); funcs::FFTFillConj(ctx, &onesided_out, out, axes); } diff --git a/paddle/phi/kernels/impl/fold_grad_kernel_impl.h b/paddle/phi/kernels/impl/fold_grad_kernel_impl.h index 1cfbb496d7750c..067ca010b31a90 100644 --- a/paddle/phi/kernels/impl/fold_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/fold_grad_kernel_impl.h @@ -53,8 +53,8 @@ void FoldGradKernel(const Context& ctx, int n_output_plane = n_input_plane / (kernel_sizes[0] * kernel_sizes[1]); DDim out_shape = - make_ddim({n_output_plane, output_sizes[0], output_sizes[1]}); - DDim input_matrix_shape = make_ddim( + common::make_ddim({n_output_plane, output_sizes[0], output_sizes[1]}); + DDim input_matrix_shape = common::make_ddim( {1, kernel_sizes[0], kernel_sizes[1], output_height, output_width}); phi::funcs::Im2ColFunctor im2col; diff --git a/paddle/phi/kernels/impl/fold_kernel_impl.h b/paddle/phi/kernels/impl/fold_kernel_impl.h index 694d754ecfb8e4..dfe11b0759aad5 100644 --- a/paddle/phi/kernels/impl/fold_kernel_impl.h +++ b/paddle/phi/kernels/impl/fold_kernel_impl.h @@ -52,9 +52,9 @@ void FoldKernel(const Context& ctx, int n_output_plane = n_input_plane / (kernel_sizes[0] * kernel_sizes[1]); DDim output_shape = - make_ddim({n_output_plane, output_sizes[0], output_sizes[1]}); + common::make_ddim({n_output_plane, output_sizes[0], output_sizes[1]}); - DDim input_matrix_shape = make_ddim( + DDim input_matrix_shape = common::make_ddim( {1, kernel_sizes[0], kernel_sizes[1], output_height, output_width}); phi::funcs::SetConstant set_zero; diff --git a/paddle/phi/kernels/impl/frame_grad_kernel_impl.h b/paddle/phi/kernels/impl/frame_grad_kernel_impl.h index 9f6ceee24f183a..37f5de45cca5cd 100644 --- a/paddle/phi/kernels/impl/frame_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/frame_grad_kernel_impl.h @@ -41,15 +41,15 @@ void FrameGradKernel(const Context& dev_ctx, DDim dx_resized_dims; DDim dout_resized_dims; if (axis == 0) { - preserved_dims = phi::slice_ddim(dx->dims(), 1, dx_rank); - dx_resized_dims = {seq_length, phi::product(preserved_dims)}; + preserved_dims = common::slice_ddim(dx->dims(), 1, dx_rank); + dx_resized_dims = {seq_length, common::product(preserved_dims)}; dout_resized_dims = { - n_frames, frame_length, phi::product(preserved_dims)}; + n_frames, frame_length, common::product(preserved_dims)}; } else { - preserved_dims = phi::slice_ddim(dx->dims(), 0, dx_rank - 1); - dx_resized_dims = {phi::product(preserved_dims), seq_length}; + preserved_dims = common::slice_ddim(dx->dims(), 0, dx_rank - 1); + dx_resized_dims = {common::product(preserved_dims), seq_length}; dout_resized_dims = { - phi::product(preserved_dims), frame_length, n_frames}; + common::product(preserved_dims), frame_length, n_frames}; } dx->Resize(dx_resized_dims); dout_tmp.Resize(dout_resized_dims); @@ -64,31 +64,31 @@ void FrameGradKernel(const Context& dev_ctx, trans_dx = *dx; std::vector perm_dout{1, 0}; - auto dout_dims_vec = phi::vectorize(dout_tmp.dims()); + auto dout_dims_vec = common::vectorize(dout_tmp.dims()); for (int i = 0; i < dout_tmp.dims().size(); ++i) { dout_dims_vec[i] = dout_tmp.dims()[perm_dout[i]]; } - trans_dout.Resize(phi::make_ddim(dout_dims_vec)); + trans_dout.Resize(common::make_ddim(dout_dims_vec)); dev_ctx.template Alloc(&trans_dout); phi::funcs::TransCompute( perm_dout.size(), dev_ctx, dout_tmp, &trans_dout, perm_dout); } else { std::vector perm_dx{1, 0}; - auto dx_dims_vec = phi::vectorize(dx->dims()); + auto dx_dims_vec = common::vectorize(dx->dims()); for (int i = 0; i < dx->dims().size(); ++i) { dx_dims_vec[i] = dx->dims()[perm_dx[i]]; } - trans_dx.Resize(phi::make_ddim(dx_dims_vec)); + trans_dx.Resize(common::make_ddim(dx_dims_vec)); dev_ctx.template Alloc(&trans_dx); phi::funcs::TransCompute( perm_dx.size(), dev_ctx, *dx, &trans_dx, perm_dx); std::vector perm_dout{2, 1, 0}; - auto dout_dims_vec = phi::vectorize(dout_tmp.dims()); + auto dout_dims_vec = common::vectorize(dout_tmp.dims()); for (int i = 0; i < dout_tmp.dims().size(); ++i) { dout_dims_vec[i] = dout_tmp.dims()[perm_dout[i]]; } - trans_dout.Resize(phi::make_ddim(dout_dims_vec)); + trans_dout.Resize(common::make_ddim(dout_dims_vec)); dev_ctx.template Alloc(&trans_dout); phi::funcs::TransCompute( perm_dout.size(), dev_ctx, dout_tmp, &trans_dout, perm_dout); @@ -129,7 +129,7 @@ void FrameGradKernel(const Context& dev_ctx, restored_dx_shape.push_back(seq_length); } - dx->Resize(phi::make_ddim(restored_dx_shape)); + dx->Resize(common::make_ddim(restored_dx_shape)); } } } // namespace phi diff --git a/paddle/phi/kernels/impl/frame_kernel_impl.h b/paddle/phi/kernels/impl/frame_kernel_impl.h index b6a0b2ab6a3e48..fa0c5658efe550 100644 --- a/paddle/phi/kernels/impl/frame_kernel_impl.h +++ b/paddle/phi/kernels/impl/frame_kernel_impl.h @@ -42,13 +42,15 @@ void FrameKernel(const Context& dev_ctx, DDim x_resized_dims; DDim out_resized_dims; if (axis == 0) { - preserved_dims = phi::slice_ddim(x_tmp.dims(), 1, x_rank); - x_resized_dims = {seq_length, phi::product(preserved_dims)}; - out_resized_dims = {n_frames, frame_length, phi::product(preserved_dims)}; + preserved_dims = common::slice_ddim(x_tmp.dims(), 1, x_rank); + x_resized_dims = {seq_length, common::product(preserved_dims)}; + out_resized_dims = { + n_frames, frame_length, common::product(preserved_dims)}; } else { - preserved_dims = phi::slice_ddim(x_tmp.dims(), 0, x_rank - 1); - x_resized_dims = {phi::product(preserved_dims), seq_length}; - out_resized_dims = {phi::product(preserved_dims), frame_length, n_frames}; + preserved_dims = common::slice_ddim(x_tmp.dims(), 0, x_rank - 1); + x_resized_dims = {common::product(preserved_dims), seq_length}; + out_resized_dims = { + common::product(preserved_dims), frame_length, n_frames}; } x_tmp.Resize(x_resized_dims); out->Resize(out_resized_dims); @@ -63,32 +65,32 @@ void FrameKernel(const Context& dev_ctx, trans_x = x_tmp; std::vector perm_out{1, 0}; - auto out_dims_vec = phi::vectorize(out->dims()); + auto out_dims_vec = common::vectorize(out->dims()); for (int i = 0; i < out->dims().size(); ++i) { out_dims_vec[i] = out->dims()[perm_out[i]]; } - trans_out.Resize(phi::make_ddim(out_dims_vec)); + trans_out.Resize(common::make_ddim(out_dims_vec)); dev_ctx.template Alloc(&trans_out); phi::funcs::TransCompute( perm_out.size(), dev_ctx, *out, &trans_out, perm_out); } else { std::vector perm_x{1, 0}; - auto x_dims_vec = phi::vectorize(x_tmp.dims()); + auto x_dims_vec = common::vectorize(x_tmp.dims()); for (int i = 0; i < x_tmp.dims().size(); ++i) { x_dims_vec[i] = x_tmp.dims()[perm_x[i]]; } - trans_x.Resize(phi::make_ddim(x_dims_vec)); + trans_x.Resize(common::make_ddim(x_dims_vec)); dev_ctx.template Alloc(&trans_x); phi::funcs::TransCompute( perm_x.size(), dev_ctx, x_tmp, &trans_x, perm_x); std::vector perm_out{2, 1, 0}; - auto out_dims_vec = phi::vectorize(out->dims()); + auto out_dims_vec = common::vectorize(out->dims()); for (int i = 0; i < out->dims().size(); ++i) { out_dims_vec[i] = out->dims()[perm_out[i]]; } - trans_out.Resize(phi::make_ddim(out_dims_vec)); + trans_out.Resize(common::make_ddim(out_dims_vec)); dev_ctx.template Alloc(&trans_out); phi::funcs::TransCompute( perm_out.size(), dev_ctx, *out, &trans_out, perm_out); @@ -137,7 +139,7 @@ void FrameKernel(const Context& dev_ctx, restored_out_shape.push_back(n_frames); } - out->Resize(phi::make_ddim(restored_out_shape)); + out->Resize(common::make_ddim(restored_out_shape)); } } diff --git a/paddle/phi/kernels/impl/full_whit_tensor_kernel_impl.h b/paddle/phi/kernels/impl/full_whit_tensor_kernel_impl.h index a78af4f98c2b5c..ae7ce8a3f41a86 100644 --- a/paddle/phi/kernels/impl/full_whit_tensor_kernel_impl.h +++ b/paddle/phi/kernels/impl/full_whit_tensor_kernel_impl.h @@ -25,7 +25,7 @@ void FullWithTensorKernel(const Context& dev_ctx, DataType dtype, DenseTensor* out) { auto shape_tmp = IntArray(shape); - out->Resize(phi::make_ddim(shape_tmp.GetData())); + out->Resize(common::make_ddim(shape_tmp.GetData())); FullKernel(dev_ctx, shape_tmp, Scalar(value), dtype, out); } } // namespace phi diff --git a/paddle/phi/kernels/impl/graph_message_passing_impl.h b/paddle/phi/kernels/impl/graph_message_passing_impl.h index dc1477e77227b9..448836c0f84052 100644 --- a/paddle/phi/kernels/impl/graph_message_passing_impl.h +++ b/paddle/phi/kernels/impl/graph_message_passing_impl.h @@ -90,8 +90,8 @@ inline BroadCastInfo CalcBCastInfo(const phi::DDim& l_dims, inline std::vector InferBroadcastShape(const phi::DDim& x_dims, const phi::DDim& e_dims, const std::string& type = "x") { - auto x_dims1 = phi::vectorize(x_dims); - auto e_dims1 = phi::vectorize(e_dims); + auto x_dims1 = common::vectorize(x_dims); + auto e_dims1 = common::vectorize(e_dims); std::vector x_dims2(x_dims1.begin() + 1, x_dims1.end()); std::vector e_dims2(e_dims1.begin() + 1, e_dims1.end()); int max_dim = std::max(x_dims2.size(), e_dims2.size()); @@ -100,8 +100,8 @@ inline std::vector InferBroadcastShape(const phi::DDim& x_dims, std::vector e_dims_array(max_dim); std::vector out_dims_array(max_dim); // Only need to broadcast dimensions other than the 0th dimension. - phi::funcs::GetBroadcastDimsArrays(phi::make_ddim(x_dims2), - phi::make_ddim(e_dims2), + phi::funcs::GetBroadcastDimsArrays(common::make_ddim(x_dims2), + common::make_ddim(e_dims2), x_dims_array.data(), e_dims_array.data(), out_dims_array.data(), @@ -117,7 +117,7 @@ inline std::vector InferBroadcastShape(const phi::DDim& x_dims, inline bool ReduceGrad(const phi::DDim& out_grad_dims, const phi::DDim& x_dims, - std::vector& axis) { + std::vector& axis) { // NOLINT // We must ensure the ndim of out_grad and x are the same. bool reduce = false; for (int i = 1; i < out_grad_dims.size(); i++) { diff --git a/paddle/phi/kernels/impl/kron_grad_kernel_impl.h b/paddle/phi/kernels/impl/kron_grad_kernel_impl.h index 352e4d30067197..3b195d6fa8b0ad 100644 --- a/paddle/phi/kernels/impl/kron_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/kron_grad_kernel_impl.h @@ -168,11 +168,11 @@ struct KronGradOpFunctor { const phi::DDim &dim_y = y.dims(); const phi::DDim &dim_dout = dout.dims(); const phi::DDim stride_x = - dim_x.size() == 0 ? phi::DDim(dim_x) : phi::stride(dim_x); + dim_x.size() == 0 ? phi::DDim(dim_x) : common::stride(dim_x); const phi::DDim stride_y = - dim_y.size() == 0 ? phi::DDim(dim_y) : phi::stride(dim_y); + dim_y.size() == 0 ? phi::DDim(dim_y) : common::stride(dim_y); const phi::DDim stride_dout = - dim_dout.size() == 0 ? phi::DDim(dim_dout) : phi::stride(dim_dout); + dim_dout.size() == 0 ? phi::DDim(dim_dout) : common::stride(dim_dout); const int64_t *p_stride_x = nullptr; const int64_t *p_stride_y = nullptr; diff --git a/paddle/phi/kernels/impl/kron_kernel_impl.h b/paddle/phi/kernels/impl/kron_kernel_impl.h index e1fcb49949a748..e90c45c01879fc 100644 --- a/paddle/phi/kernels/impl/kron_kernel_impl.h +++ b/paddle/phi/kernels/impl/kron_kernel_impl.h @@ -45,7 +45,7 @@ inline DenseTensor UnsqueezeTo(const DenseTensor &src, int ndims) { for (int i = ndims - rank; i < ndims; i++) { new_dim[i] = shape[i - ndims + rank]; } - res.Resize(phi::make_ddim(new_dim)); + res.Resize(common::make_ddim(new_dim)); } return res; } @@ -109,11 +109,11 @@ struct KronOpFunctor { const phi::DDim &dim_y = y.dims(); const phi::DDim &dim_out = out->dims(); const phi::DDim stride_x = - dim_x.size() == 0 ? phi::DDim(dim_x) : phi::stride(dim_x); + dim_x.size() == 0 ? phi::DDim(dim_x) : common::stride(dim_x); const phi::DDim stride_y = - dim_y.size() == 0 ? phi::DDim(dim_y) : phi::stride(dim_y); + dim_y.size() == 0 ? phi::DDim(dim_y) : common::stride(dim_y); const phi::DDim stride_out = - dim_out.size() == 0 ? phi::DDim(dim_out) : phi::stride(dim_out); + dim_out.size() == 0 ? phi::DDim(dim_out) : common::stride(dim_out); const int64_t *p_stride_x = nullptr, *p_stride_y = nullptr, *p_stride_out = nullptr, *p_shape_y = nullptr; diff --git a/paddle/phi/kernels/impl/lamb_kernel_impl.h b/paddle/phi/kernels/impl/lamb_kernel_impl.h index 5b1eb43129f203..91f73402411ec3 100644 --- a/paddle/phi/kernels/impl/lamb_kernel_impl.h +++ b/paddle/phi/kernels/impl/lamb_kernel_impl.h @@ -249,11 +249,11 @@ void ComputeImpl(const Context& dev_ctx, auto* trust_ratio_div_norm_ptr = trust_ratio_div_norm_t.data(); // DenseTensor p_norm_t; - // p_norm_t.Resize(phi::make_ddim({1})); + // p_norm_t.Resize(common::make_ddim({1})); // auto* p_norm_ptr = dev_ctx.template Alloc(&p_norm_t); // DenseTensor trust_ratio_div_norm_t; - // trust_ratio_div_norm_t.Resize(phi::make_ddim({1})); + // trust_ratio_div_norm_t.Resize(common::make_ddim({1})); // auto* trust_ratio_div_norm_ptr = // dev_ctx.template Alloc(&trust_ratio_div_norm_t); diff --git a/paddle/phi/kernels/impl/lerp_grad_kernel_impl.h b/paddle/phi/kernels/impl/lerp_grad_kernel_impl.h index 54a6172501aeae..316d00f07a35c6 100644 --- a/paddle/phi/kernels/impl/lerp_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/lerp_grad_kernel_impl.h @@ -105,7 +105,7 @@ static void LerpGradFunctionZero(const Context& ctx, const DenseTensor& out_grad, DenseTensor* x_grad, DenseTensor* y_grad) { - auto dim = make_ddim(std::vector(1, 1)); + auto dim = common::make_ddim(std::vector(1, 1)); auto eigen_w = phi::EigenTensor::From(weight, dim); auto eigen_dout = phi::EigenTensor::From(out_grad, dim); diff --git a/paddle/phi/kernels/impl/lerp_kernel_impl.h b/paddle/phi/kernels/impl/lerp_kernel_impl.h index 9509d3300e5bdf..0c17f0e61ab30a 100644 --- a/paddle/phi/kernels/impl/lerp_kernel_impl.h +++ b/paddle/phi/kernels/impl/lerp_kernel_impl.h @@ -62,7 +62,7 @@ static void LerpFunctionZero(const Context& ctx, DenseTensor* out) { ctx.template Alloc(out); - auto dim = make_ddim(std::vector(1, 1)); + auto dim = common::make_ddim(std::vector(1, 1)); auto eigen_x = phi::EigenTensor::From(x, dim); auto eigen_y = phi::EigenTensor::From(y, dim); auto eigen_w = phi::EigenTensor::From(weight, dim); diff --git a/paddle/phi/kernels/impl/lstsq_kernel_impl.h b/paddle/phi/kernels/impl/lstsq_kernel_impl.h index 2f26391bc6be3f..0aafee5788fa91 100644 --- a/paddle/phi/kernels/impl/lstsq_kernel_impl.h +++ b/paddle/phi/kernels/impl/lstsq_kernel_impl.h @@ -122,7 +122,7 @@ inline void BatchedOrmqr(const GPUContext& dev_ctx, PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cusolverDnSormqr_bufferSize( handle, side, trans, m, n, k, a, lda, tau, other, ldc, &lwork)); DenseTensor* info = new DenseTensor(); - info->Resize(make_ddim({1})); + info->Resize(common::make_ddim({1})); int* info_d = dev_ctx.template Alloc(info); for (int i = 0; i < batch_size; ++i) { @@ -132,7 +132,7 @@ inline void BatchedOrmqr(const GPUContext& dev_ctx, handle = dev_ctx.cusolver_dn_handle(); DenseTensor* workspace = new DenseTensor(); - workspace->Resize(make_ddim({lwork})); + workspace->Resize(common::make_ddim({lwork})); float* workspace_ptr = dev_ctx.template Alloc(workspace); // compute ormgr @@ -191,7 +191,7 @@ inline void BatchedOrmqr(const GPUContext& dev_ctx, PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cusolverDnDormqr_bufferSize( handle, side, trans, m, n, k, a, lda, tau, other, ldc, &lwork)); DenseTensor* info = new DenseTensor(); - info->Resize(make_ddim({1})); + info->Resize(common::make_ddim({1})); int* info_d = dev_ctx.template Alloc(info); for (int i = 0; i < batch_size; ++i) { @@ -201,7 +201,7 @@ inline void BatchedOrmqr(const GPUContext& dev_ctx, handle = dev_ctx.cusolver_dn_handle(); DenseTensor* workspace = new DenseTensor(); - workspace->Resize(make_ddim({lwork})); + workspace->Resize(common::make_ddim({lwork})); double* workspace_ptr = dev_ctx.template Alloc(workspace); // compute ormgr diff --git a/paddle/phi/kernels/impl/lu_grad_kernel_impl.h b/paddle/phi/kernels/impl/lu_grad_kernel_impl.h index 8f3a37d25b2fb9..71747addfcdbd2 100644 --- a/paddle/phi/kernels/impl/lu_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/lu_grad_kernel_impl.h @@ -109,7 +109,7 @@ void LUGradKernel(const Context& dev_ctx, std::vector axes = {xrank - 2, xrank - 1}; std::vector slice_starts(2, 0); std::vector slice_ends(2, 0); - auto valuedims = vectorize(xdims); + auto valuedims = common::vectorize(xdims); DenseTensor Pmat; Unpack_Pivot(dev_ctx, pivots, &Pmat, m, k); diff --git a/paddle/phi/kernels/impl/lu_kernel_impl.h b/paddle/phi/kernels/impl/lu_kernel_impl.h index d2838551ff20a7..feca05cf734100 100644 --- a/paddle/phi/kernels/impl/lu_kernel_impl.h +++ b/paddle/phi/kernels/impl/lu_kernel_impl.h @@ -79,7 +79,7 @@ void SetValueCompute(const Context& dev_ctx, none_axes_cur++; } - slice_dims_for_assign = phi::make_ddim(slice_dims_with_none); + slice_dims_for_assign = common::make_ddim(slice_dims_with_none); } auto place = dev_ctx.GetPlace(); @@ -158,7 +158,7 @@ void SetValueCompute(const Context& dev_ctx, dev_ctx, slice_tensor, *value_tensor, SubFunctor(), &slice_tensor); } else { DenseTensor value_t(dtype); - auto value_dims = phi::make_ddim(shape); + auto value_dims = common::make_ddim(shape); CheckIsDimsMatch(slice_dims_for_assign, value_dims); value_t.Resize(value_dims); @@ -389,7 +389,7 @@ void arange(const Context& dev_ctx, int w, int batchsize = 1, int h = 1) { - tmp->Resize(phi::make_ddim({batchsize * w})); + tmp->Resize(common::make_ddim({batchsize * w})); dev_ctx.template HostAlloc(tmp); auto tmpdata = tmp->data(); for (int b = 0; b < batchsize; b++) { @@ -439,7 +439,7 @@ void LU_Unpack(const Context& dev_ctx, // set L's diagonal 1 auto dim = std::min(H, W); DenseTensor rowtensor, rt_dev; - auto batchsize = product(phi::slice_ddim(udims, 0, udims.size() - 2)); + auto batchsize = product(common::slice_ddim(udims, 0, udims.size() - 2)); // if udims is [0, ..., H, W], it should be 0 if (udims.size() == 2) batchsize = std::max(static_cast(batchsize), 1); @@ -477,7 +477,7 @@ void Unpack_Pivot(const Context& dev_ctx, int h, int w UNUSED) { auto dims = Pivot.dims(); - auto Pdimvec = vectorize(dims); + auto Pdimvec = common::vectorize(dims); auto prank = Pdimvec.size(); auto Pnum = dims[prank - 1]; DenseTensor Pivot_cpu; @@ -486,14 +486,14 @@ void Unpack_Pivot(const Context& dev_ctx, auto pdataptr = Pivot_cpu.data(); Pdimvec[prank - 1] = h; Pdimvec.emplace_back(h); - auto Pdim = phi::make_ddim(Pdimvec); + auto Pdim = common::make_ddim(Pdimvec); P->Resize(Pdim); dev_ctx.template Alloc(P); auto pdata = P->data(); phi::funcs::SetConstant setter; setter(dev_ctx, P, static_cast(0)); - auto batchsize = product(phi::slice_ddim(dims, 0, prank - 1)); + auto batchsize = product(common::slice_ddim(dims, 0, prank - 1)); if (prank == 1) batchsize = std::max(static_cast(batchsize), 1); DenseTensor idt; @@ -525,7 +525,7 @@ DenseTensor Transpose2DTo6D(const Context& dev_ctx, const DenseTensor& x) { // transpose the last two dimision DenseTensor ret; auto x_dim = x.dims(); - auto x_vec = phi::vectorize(x_dim); + auto x_vec = common::vectorize(x_dim); int rank = x_vec.size(); for (int i = 0; i < x_dim.size(); i++) { @@ -542,7 +542,7 @@ DenseTensor Transpose2DTo6D(const Context& dev_ctx, const DenseTensor& x) { axis[i] = i; } std::swap(axis[rank - 1], axis[rank - 2]); - ret.Resize(phi::make_ddim(x_vec)); + ret.Resize(common::make_ddim(x_vec)); dev_ctx.template Alloc(&ret); switch (rank) { case 2: { diff --git a/paddle/phi/kernels/impl/lu_unpack_grad_kernel_impl.h b/paddle/phi/kernels/impl/lu_unpack_grad_kernel_impl.h index 7098b745e6d255..f1d904663a7233 100644 --- a/paddle/phi/kernels/impl/lu_unpack_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/lu_unpack_grad_kernel_impl.h @@ -64,7 +64,7 @@ void LUUnpackGradKernel(const Context& dev_ctx, std::vector axes = {xrank - 2, xrank - 1}; std::vector slice_starts(2, 0); std::vector slice_ends(2, 0); - auto valuedims = vectorize(xdims); + auto valuedims = common::vectorize(xdims); phi::funcs::SetConstant setter; setter(dev_ctx, x_grad, static_cast(0)); diff --git a/paddle/phi/kernels/impl/matmul_grad_kernel_impl.h b/paddle/phi/kernels/impl/matmul_grad_kernel_impl.h index 4125e49db6eef6..40ff69c50f1d7f 100644 --- a/paddle/phi/kernels/impl/matmul_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/matmul_grad_kernel_impl.h @@ -134,7 +134,7 @@ static DDim RowMatrixFromVector(const DDim& x_dim) { if (x_dim.size() > 1) { return x_dim; } - return phi::make_ddim({1, x_dim[0]}); + return common::make_ddim({1, x_dim[0]}); } /** @@ -145,7 +145,7 @@ static DDim ColumnMatrixFromVector(const DDim& y_dim) { if (y_dim.size() > 1) { return y_dim; } - return phi::make_ddim({y_dim[0], 1}); + return common::make_ddim({y_dim[0], 1}); } /** @@ -229,9 +229,9 @@ void MatmulGradKernel(const Context& dev_ctx, DenseTensor* dx, DenseTensor* dy) { // get dims - std::vector x_dims = vectorize(x.dims()); - std::vector y_dims = vectorize(y.dims()); - std::vector dout_dims = vectorize(out_grad.dims()); + std::vector x_dims = common::vectorize(x.dims()); + std::vector y_dims = common::vectorize(y.dims()); + std::vector dout_dims = common::vectorize(out_grad.dims()); int x_ndim = x_dims.size(); int y_ndim = y_dims.size(); @@ -422,8 +422,10 @@ void MatmulGradKernel(const Context& dev_ctx, } // get help dims - const std::vector dx_help_dims = vectorize(dx_help.dims()); - const std::vector dy_help_dims = vectorize(dy_help.dims()); + const std::vector dx_help_dims = + common::vectorize(dx_help.dims()); + const std::vector dy_help_dims = + common::vectorize(dy_help.dims()); std::vector dx_broadcast_dims(ndim); std::vector dy_broadcast_dims(ndim); @@ -485,9 +487,9 @@ void MatmulDoubleGradKernel(const Context& dev_ctx, DenseTensor* dy, DenseTensor* ddout) { // Get dims from the input x, y, output_grad - std::vector x_dims = vectorize(x.dims()); - std::vector y_dims = vectorize(y.dims()); - std::vector dout_dims = vectorize(dout.dims()); + std::vector x_dims = common::vectorize(x.dims()); + std::vector y_dims = common::vectorize(y.dims()); + std::vector dout_dims = common::vectorize(dout.dims()); int x_ndim = x_dims.size(); int y_ndim = y_dims.size(); @@ -791,8 +793,10 @@ void MatmulDoubleGradKernel(const Context& dev_ctx, } // get help dims - const std::vector dx_help_dims = vectorize(dx_help.dims()); - const std::vector dy_help_dims = vectorize(dy_help.dims()); + const std::vector dx_help_dims = + common::vectorize(dx_help.dims()); + const std::vector dy_help_dims = + common::vectorize(dy_help.dims()); std::vector dx_broadcast_dims(ndim); std::vector dy_broadcast_dims(ndim); @@ -888,9 +892,9 @@ void MatmulTripleGradKernel(const Context& dev_ctx, DenseTensor* out_d_ddx, DenseTensor* out_d_ddy) { // Get dims from the input x, y, output_grad - std::vector x_dims = vectorize(x.dims()); - std::vector y_dims = vectorize(y.dims()); - std::vector dout_dims = vectorize(dout.dims()); + std::vector x_dims = common::vectorize(x.dims()); + std::vector y_dims = common::vectorize(y.dims()); + std::vector dout_dims = common::vectorize(dout.dims()); int x_ndim = x_dims.size(); int y_ndim = y_dims.size(); @@ -1539,9 +1543,9 @@ void MatmulTripleGradKernel(const Context& dev_ctx, // get help dims const std::vector dx_help_dims = - vectorize(out_dx_help.dims()); + common::vectorize(out_dx_help.dims()); const std::vector dy_help_dims = - vectorize(out_dx_help.dims()); + common::vectorize(out_dx_help.dims()); std::vector dx_broadcast_dims(ndim); std::vector dy_broadcast_dims(ndim); @@ -1883,8 +1887,8 @@ void MatmulWithFlattenGradKernel(const Context& dev_ctx, auto* dout = &out_grad; DenseTensor dout_mat(*dout); - dout_mat.Resize({phi::flatten_to_2d(x.dims(), x_num_col_dims)[0], - phi::flatten_to_2d(y.dims(), y_num_col_dims)[1]}); + dout_mat.Resize({common::flatten_to_2d(x.dims(), x_num_col_dims)[0], + common::flatten_to_2d(y.dims(), y_num_col_dims)[1]}); auto* dx = x_grad; auto* dy = y_grad; @@ -1932,8 +1936,8 @@ void MatmulWithFlattenDoubleGradKernel( auto y_mat = y.dims().size() > 2 ? phi::ReshapeToMatrix(y, y_num_col_dims) : y; - const int m = phi::flatten_to_2d(x.dims(), x_num_col_dims)[0]; - const int n = phi::flatten_to_2d(y.dims(), y_num_col_dims)[1]; + const int m = common::flatten_to_2d(x.dims(), x_num_col_dims)[0]; + const int n = common::flatten_to_2d(y.dims(), y_num_col_dims)[1]; auto* dout = &out_grad; DenseTensor dout_mat(*dout); diff --git a/paddle/phi/kernels/impl/matmul_kernel_impl.h b/paddle/phi/kernels/impl/matmul_kernel_impl.h index 373453d1eefa45..85826728f404c4 100644 --- a/paddle/phi/kernels/impl/matmul_kernel_impl.h +++ b/paddle/phi/kernels/impl/matmul_kernel_impl.h @@ -131,7 +131,7 @@ void MatMulFunctionImplWithBlas( M, N)); VLOG(3) << "MatMul's case 1"; - Out->Resize(phi::make_ddim({})); + Out->Resize(common::make_ddim({})); dev_ctx.template Alloc(Out); blas.GEMM(CblasNoTrans, CblasTrans, @@ -178,7 +178,7 @@ void MatMulFunctionImplWithBlas( std::copy_n(y_dims.cbegin(), y_ndim - 2, out_dims.begin()); out_dims.back() = y_dims.back(); } - Out->ResizeAndAllocate(phi::make_ddim(out_dims)); + Out->ResizeAndAllocate(common::make_ddim(out_dims)); dev_ctx.template Alloc(Out); if (trans_y) { const int M = Y.numel() / N; @@ -256,7 +256,7 @@ void MatMulFunctionImplWithBlas( } else { std::copy_n(x_dims.cbegin(), x_ndim - 1, out_dims.begin()); } - Out->ResizeAndAllocate(phi::make_ddim(out_dims)); + Out->ResizeAndAllocate(common::make_ddim(out_dims)); dev_ctx.template Alloc(Out); if (trans_x) { @@ -344,7 +344,7 @@ void MatMulFunctionImplWithBlas( out_broadcast_dims[ndim - 2] = M; out_broadcast_dims[ndim - 1] = N; - Out->ResizeAndAllocate(phi::make_ddim(out_broadcast_dims)); + Out->ResizeAndAllocate(common::make_ddim(out_broadcast_dims)); dev_ctx.template Alloc(Out); const int batch_dim = ndim - 2; @@ -521,7 +521,7 @@ void MatMulFunctionImplWithCublasLt( N)); // MatMul's case 0 => vector * vector - Out->Resize(phi::make_ddim({})); + Out->Resize(common::make_ddim({})); dev_ctx.template Alloc(Out); VLOG(3) << "MatMul with blaslt case 1"; blaslt::Run(dev_ctx, @@ -569,7 +569,7 @@ void MatMulFunctionImplWithCublasLt( std::copy_n(y_dims.cbegin(), y_ndim - 2, out_dims.begin()); out_dims.back() = y_dims.back(); } - Out->ResizeAndAllocate(phi::make_ddim(out_dims)); + Out->ResizeAndAllocate(common::make_ddim(out_dims)); dev_ctx.template Alloc(Out); if (trans_y) { const int M = Y.numel() / N; @@ -652,7 +652,7 @@ void MatMulFunctionImplWithCublasLt( } else { std::copy_n(x_dims.cbegin(), x_ndim - 1, out_dims.begin()); } - Out->ResizeAndAllocate(phi::make_ddim(out_dims)); + Out->ResizeAndAllocate(common::make_ddim(out_dims)); dev_ctx.template Alloc(Out); if (trans_x) { @@ -745,7 +745,7 @@ void MatMulFunctionImplWithCublasLt( out_broadcast_dims[ndim - 2] = M; out_broadcast_dims[ndim - 1] = N; - Out->ResizeAndAllocate(phi::make_ddim(out_broadcast_dims)); + Out->ResizeAndAllocate(common::make_ddim(out_broadcast_dims)); dev_ctx.template Alloc(Out); const int batch_dim = ndim - 2; @@ -1030,7 +1030,7 @@ bool inline MatMulInt8Function(const phi::GPUContext& ctx, return false; } - out->Resize(phi::make_ddim({})); + out->Resize(common::make_ddim({})); ctx.template Alloc(out); blaslt::Run(ctx, y_data, @@ -1083,7 +1083,7 @@ bool inline MatMulInt8Function(const phi::GPUContext& ctx, std::copy_n(y_dims.cbegin(), y_ndim - 2, out_dims.begin()); out_dims.back() = y_dims.back(); } - out->ResizeAndAllocate(phi::make_ddim(out_dims)); + out->ResizeAndAllocate(common::make_ddim(out_dims)); ctx.template Alloc(out); if (trans_y) { const int M = y.numel() / N; @@ -1170,7 +1170,7 @@ bool inline MatMulInt8Function(const phi::GPUContext& ctx, } else { std::copy_n(x_dims.cbegin(), x_ndim - 1, out_dims.begin()); } - out->ResizeAndAllocate(phi::make_ddim(out_dims)); + out->ResizeAndAllocate(common::make_ddim(out_dims)); ctx.template Alloc(out); if (trans_x) { @@ -1259,7 +1259,7 @@ bool inline MatMulInt8Function(const phi::GPUContext& ctx, out_broadcast_dims[ndim - 2] = M; out_broadcast_dims[ndim - 1] = N; - out->ResizeAndAllocate(phi::make_ddim(out_broadcast_dims)); + out->ResizeAndAllocate(common::make_ddim(out_broadcast_dims)); ctx.template Alloc(out); const int batch_dim = ndim - 2; @@ -1475,17 +1475,17 @@ void MatmulKernel(const Context& ctx, bool transpose_y, DenseTensor* out) { PADDLE_ENFORCE_NE( - phi::product(x.dims()), + common::product(x.dims()), 0, phi::errors::InvalidArgument("The Input(X) dims size must not be equal 0," " but reviced dims size is 0. ")); PADDLE_ENFORCE_NE( - phi::product(y.dims()), + common::product(y.dims()), 0, phi::errors::InvalidArgument("The Input(Y) dims size must not be equal 0," " but reviced dims size is 0. ")); - const std::vector x_dims = vectorize(x.dims()); - const std::vector y_dims = vectorize(y.dims()); + const std::vector x_dims = common::vectorize(x.dims()); + const std::vector y_dims = common::vectorize(y.dims()); MatmulJudgeDtypeKernel( ctx, x, y, x_dims, y_dims, out, transpose_x, transpose_y); } diff --git a/paddle/phi/kernels/impl/matrix_rank_kernel_impl.h b/paddle/phi/kernels/impl/matrix_rank_kernel_impl.h index b0dd76a17eeb36..23924a93f947b3 100644 --- a/paddle/phi/kernels/impl/matrix_rank_kernel_impl.h +++ b/paddle/phi/kernels/impl/matrix_rank_kernel_impl.h @@ -21,37 +21,37 @@ namespace phi { namespace detail { static DDim GetEigenvalueDim(const DDim& dim, int k) { - auto vec = phi::vectorize(dim); + auto vec = common::vectorize(dim); vec.erase(vec.end() - 2, vec.end()); vec.push_back(k); - return phi::make_ddim(vec); + return common::make_ddim(vec); } static DDim NewAxisDim(const DDim& dim, int k) { - auto vec = phi::vectorize(dim); + auto vec = common::vectorize(dim); vec.push_back(k); - return phi::make_ddim(vec); + return common::make_ddim(vec); } static DDim RemoveLastDim(const DDim& dim) { - auto vec = phi::vectorize(dim); + auto vec = common::vectorize(dim); if (vec.size() <= 1) { - return phi::make_ddim({1}); + return common::make_ddim({1}); } vec.erase(vec.end() - 1, vec.end()); - return phi::make_ddim(vec); + return common::make_ddim(vec); } static DDim GetUDDim(const DDim& x_dim, int k) { - auto x_vec = phi::vectorize(x_dim); + auto x_vec = common::vectorize(x_dim); x_vec[x_vec.size() - 1] = k; - return phi::make_ddim(x_vec); + return common::make_ddim(x_vec); } static DDim GetVHDDim(const DDim& x_dim, int k) { - auto x_vec = phi::vectorize(x_dim); + auto x_vec = common::vectorize(x_dim); x_vec[x_vec.size() - 2] = k; - return phi::make_ddim(x_vec); + return common::make_ddim(x_vec); } } // namespace detail diff --git a/paddle/phi/kernels/impl/merged_momentum_impl.h b/paddle/phi/kernels/impl/merged_momentum_impl.h index cdf90cba70690e..85f253fd32d492 100644 --- a/paddle/phi/kernels/impl/merged_momentum_impl.h +++ b/paddle/phi/kernels/impl/merged_momentum_impl.h @@ -16,10 +16,10 @@ #include "glog/logging.h" +#include "paddle/common/macros.h" #include "paddle/phi/common/amp_type_traits.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/hostdevice.h" -#include "paddle/phi/core/macros.h" #include "paddle/phi/kernels/funcs/for_range.h" #include "paddle/phi/kernels/impl/momentum_kernel_impl.h" #include "paddle/phi/kernels/merged_momentum_kernel.h" diff --git a/paddle/phi/kernels/impl/meshgrid_grad_kernel_impl.h b/paddle/phi/kernels/impl/meshgrid_grad_kernel_impl.h index bdedcee0957074..566f7ac38bdcf0 100644 --- a/paddle/phi/kernels/impl/meshgrid_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/meshgrid_grad_kernel_impl.h @@ -14,8 +14,8 @@ #pragma once +#include "paddle/common/macros.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/macros.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/eigen/eigen_function.h" #include "paddle/phi/kernels/meshgrid_grad_kernel.h" diff --git a/paddle/phi/kernels/impl/meshgrid_kernel_impl.h b/paddle/phi/kernels/impl/meshgrid_kernel_impl.h index dfe162a270a9b5..3507086a1964b3 100644 --- a/paddle/phi/kernels/impl/meshgrid_kernel_impl.h +++ b/paddle/phi/kernels/impl/meshgrid_kernel_impl.h @@ -58,9 +58,9 @@ void MeshgridForward(const Context& ctx, DenseTensor reshape_ins_tensor; phi::Copy(ctx, *ins[i], ctx.GetPlace(), false, &reshape_ins_tensor); - DDim out_dims_reshape = phi::make_ddim(view_shape); + DDim out_dims_reshape = common::make_ddim(view_shape); reshape_ins_tensor.Resize(out_dims_reshape); - DDim out_dims = phi::make_ddim(shape); + DDim out_dims = common::make_ddim(shape); Eigen::DSizes bcast_dims; for (int64_t j = 0; j < size; j++) { diff --git a/paddle/phi/kernels/impl/multi_dot_kernel_impl.h b/paddle/phi/kernels/impl/multi_dot_kernel_impl.h index e63ee31190757e..d3d854ef541fc4 100644 --- a/paddle/phi/kernels/impl/multi_dot_kernel_impl.h +++ b/paddle/phi/kernels/impl/multi_dot_kernel_impl.h @@ -42,7 +42,7 @@ inline DenseTensor MatMul(const Context& ctx, auto blas = phi::funcs::GetBlas(ctx); DenseTensor matrix_c; - phi::DDim c_dim = phi::make_ddim({a_dim[0], b_dim[1]}); + phi::DDim c_dim = common::make_ddim({a_dim[0], b_dim[1]}); matrix_c.Resize(c_dim); ctx.template Alloc(&matrix_c); @@ -175,9 +175,9 @@ inline void GetDims(const std::vector& ins, for (size_t i = 0; i < n; i++) { (*ins_dims)[i] = ins[i]->dims(); if (i == 0 && (*ins_dims)[i].size() == 1) { - (*ins_dims)[i] = phi::make_ddim({1, (*ins_dims)[i][0]}); + (*ins_dims)[i] = common::make_ddim({1, (*ins_dims)[i][0]}); } else if (i == n - 1 && (*ins_dims)[i].size() == 1) { - (*ins_dims)[i] = phi::make_ddim({(*ins_dims)[i][0], 1}); + (*ins_dims)[i] = common::make_ddim({(*ins_dims)[i][0], 1}); } } } @@ -212,7 +212,7 @@ void MultiDotKernel(const Context& ctx, auto mat_dim_c = phi::funcs::CreateMatrixDescriptor(ins_dims[2], 0, false); if (cost1 < cost2) { DenseTensor tmp_out; - phi::DDim tmp_dim = phi::make_ddim({Ma, Nb}); + phi::DDim tmp_dim = common::make_ddim({Ma, Nb}); tmp_out.Resize(tmp_dim); ctx.template Alloc(&tmp_out); blas.MatMul( @@ -221,7 +221,7 @@ void MultiDotKernel(const Context& ctx, blas.MatMul(tmp_out, mat_dim_tmp, *ins[2], mat_dim_c, scale, out, T(0)); } else { DenseTensor tmp_out; - phi::DDim tmp_dim = phi::make_ddim({Ka, Nc}); + phi::DDim tmp_dim = common::make_ddim({Ka, Nc}); tmp_out.Resize(tmp_dim); ctx.template Alloc(&tmp_out); blas.MatMul( @@ -357,14 +357,14 @@ void MultiDotGradKernel(const Context& ctx, phi::DDim dout_dim = dout.dims(); if (ins[0]->dims().size() == 1 && ins[n - 1]->dims().size() == 1) { - dout_dim = phi::make_ddim({1, 1}); + dout_dim = common::make_ddim({1, 1}); } else if (ins[0]->dims().size() == 1) { if (dout_dim.size() == 1) { - dout_dim = phi::make_ddim({1, dout_dim[0]}); + dout_dim = common::make_ddim({1, dout_dim[0]}); } } else if (ins[n - 1]->dims().size() == 1) { if (dout_dim.size() == 1) { - dout_dim = phi::make_ddim({dout_dim[0], 1}); + dout_dim = common::make_ddim({dout_dim[0], 1}); } } diff --git a/paddle/phi/kernels/impl/pool_grad_kernel_impl.h b/paddle/phi/kernels/impl/pool_grad_kernel_impl.h index e3e19370c86bf1..cf00a9b82b8dd8 100644 --- a/paddle/phi/kernels/impl/pool_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/pool_grad_kernel_impl.h @@ -14,7 +14,7 @@ limitations under the License. */ #pragma once -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" #include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/pooling.h" #include "paddle/phi/kernels/pool_grad_kernel.h" diff --git a/paddle/phi/kernels/impl/pool_kernel_impl.h b/paddle/phi/kernels/impl/pool_kernel_impl.h index a2a6705a68302b..dc0b7ad2108ac5 100644 --- a/paddle/phi/kernels/impl/pool_kernel_impl.h +++ b/paddle/phi/kernels/impl/pool_kernel_impl.h @@ -16,7 +16,7 @@ limitations under the License. */ #include -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" #include "paddle/phi/kernels/funcs/pooling.h" #include "paddle/phi/kernels/pool_kernel.h" diff --git a/paddle/phi/kernels/impl/pow2_decay_with_linear_warmup_kernel_impl.h b/paddle/phi/kernels/impl/pow2_decay_with_linear_warmup_kernel_impl.h index da28f52f6173b8..006a8f1e058626 100644 --- a/paddle/phi/kernels/impl/pow2_decay_with_linear_warmup_kernel_impl.h +++ b/paddle/phi/kernels/impl/pow2_decay_with_linear_warmup_kernel_impl.h @@ -14,8 +14,8 @@ #pragma once +#include "paddle/common/macros.h" #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/macros.h" #include "paddle/phi/kernels/funcs/for_range.h" namespace phi { diff --git a/paddle/phi/kernels/impl/qr_grad_kernel_impl.h b/paddle/phi/kernels/impl/qr_grad_kernel_impl.h index d22eca3c73393e..e015909d6e7b56 100644 --- a/paddle/phi/kernels/impl/qr_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/qr_grad_kernel_impl.h @@ -38,7 +38,7 @@ static DenseTensor Fill(const Context& ctx, std::vector shape, float fill_value) { DenseTensor ret; - ret.Resize(make_ddim(shape)); + ret.Resize(common::make_ddim(shape)); ctx.template Alloc(&ret); funcs::SetConstant()(ctx, &ret, T(fill_value)); return ret; @@ -101,7 +101,7 @@ void QrGradKernel(const Context& ctx, R_term = Matmul(ctx, R, TransposeLast2Dim(ctx, dR)); } else { - R_term = Fill(ctx, phi::vectorize(R.dims()), 0); + R_term = Fill(ctx, common::vectorize(R.dims()), 0); } // dQ^H * Q @@ -110,7 +110,7 @@ void QrGradKernel(const Context& ctx, Q_term = Matmul(ctx, TransposeLast2Dim(ctx, dQ), Q); } else { - Q_term = Fill(ctx, phi::vectorize(R.dims()), 0); + Q_term = Fill(ctx, common::vectorize(R.dims()), 0); } DenseTensor M_tmp1 = Subtract(ctx, R_term, Q_term); @@ -160,8 +160,8 @@ void QrGradKernel(const Context& ctx, dQ_prime = Matmul(ctx, Y, TransposeLast2Dim(ctx, dV)); } else { - dV = Fill(ctx, phi::vectorize(Y.dims()), 0); - dQ_prime = Fill(ctx, phi::vectorize(Q.dims()), 0); + dV = Fill(ctx, common::vectorize(Y.dims()), 0); + dQ_prime = Fill(ctx, common::vectorize(Q.dims()), 0); } if (dQ.initialized()) { diff --git a/paddle/phi/kernels/impl/quant_linear_kernel_impl.h b/paddle/phi/kernels/impl/quant_linear_kernel_impl.h index dbd548f7af6da2..f48e871dce1659 100644 --- a/paddle/phi/kernels/impl/quant_linear_kernel_impl.h +++ b/paddle/phi/kernels/impl/quant_linear_kernel_impl.h @@ -37,7 +37,7 @@ void QuantLinearKernel(const Context& dev_ctx, auto input_dims = x.dims(); std::vector output_dims; - auto in_mat_dims = phi::flatten_to_2d(input_dims, in_num_col_dims); + auto in_mat_dims = common::flatten_to_2d(input_dims, in_num_col_dims); auto w_dims0 = padding_weights ? w_dims[0] - 4 : w_dims[0]; auto w_dims1 = padding_weights ? w_dims[1] - 4 : w_dims[1]; PADDLE_ENFORCE_EQ( @@ -51,7 +51,7 @@ void QuantLinearKernel(const Context& dev_ctx, in_mat_dims[1], in_mat_dims, w_dims0, - phi::make_ddim({w_dims0, w_dims1}))); + common::make_ddim({w_dims0, w_dims1}))); output_dims.reserve(static_cast(in_num_col_dims + 1)); for (int i = 0; i < in_num_col_dims; ++i) { @@ -59,11 +59,11 @@ void QuantLinearKernel(const Context& dev_ctx, } output_dims.push_back(w_dims1); - y->Resize(phi::make_ddim(output_dims)); + y->Resize(common::make_ddim(output_dims)); y->set_lod(x.lod()); auto out_dims = y->dims(); - int M = phi::product(out_dims) / w_dims1; + int M = common::product(out_dims) / w_dims1; const T* input_data = x.data(); auto* output_data = dev_ctx.template Alloc(y, y->numel() * sizeof(T)); diff --git a/paddle/phi/kernels/impl/reduce_grad.h b/paddle/phi/kernels/impl/reduce_grad.h index 5665c9713c4764..2449d4decd965f 100644 --- a/paddle/phi/kernels/impl/reduce_grad.h +++ b/paddle/phi/kernels/impl/reduce_grad.h @@ -14,7 +14,7 @@ #pragma once -#include "paddle/phi/core/macros.h" +#include "paddle/common/macros.h" #include "paddle/phi/kernels/cast_kernel.h" #include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/funcs/reduce_grad_functions.h" diff --git a/paddle/phi/kernels/impl/renorm_impl.h b/paddle/phi/kernels/impl/renorm_impl.h index 554ccb6c1833f9..409c0a5c4e1f31 100644 --- a/paddle/phi/kernels/impl/renorm_impl.h +++ b/paddle/phi/kernels/impl/renorm_impl.h @@ -280,8 +280,8 @@ void RenormFunc(const phi::GPUContext& ctx, int64_t dim_divisor = 1, pre_mul = 1; for (int i = dim + 1; i < dim_size; i++) dim_divisor *= input_dims[i]; for (int i = 0; i < dim; i++) pre_mul *= input_dims[i]; - pow_value.Resize(phi::make_ddim({pre_mul, dimension_each, dim_divisor})); - dim_value.Resize(phi::make_ddim({dimension_each})); + pow_value.Resize(common::make_ddim({pre_mul, dimension_each, dim_divisor})); + dim_value.Resize(common::make_ddim({dimension_each})); T* pow_value_data = ctx.template Alloc(&pow_value); T* dim_value_data = ctx.template Alloc(&dim_value); auto stream = ctx.stream(); @@ -317,11 +317,11 @@ void RenormGradFunc(const phi::GPUContext& ctx, for (int i = dim + 1; i < dim_size; i++) dim_divisor *= input_dims[i]; for (int i = 0; i < dim; i++) pre_mul *= input_dims[i]; DenseTensor pow_value, mul_value, dim_value, dim_power_sum, weight_derivative; - pow_value.Resize(phi::make_ddim({pre_mul, dimension_each, dim_divisor})); - mul_value.Resize(phi::make_ddim({pre_mul, dimension_each, dim_divisor})); - dim_value.Resize(phi::make_ddim({dimension_each})); - dim_power_sum.Resize(phi::make_ddim({dimension_each})); - weight_derivative.Resize(phi::make_ddim({dimension_each})); + pow_value.Resize(common::make_ddim({pre_mul, dimension_each, dim_divisor})); + mul_value.Resize(common::make_ddim({pre_mul, dimension_each, dim_divisor})); + dim_value.Resize(common::make_ddim({dimension_each})); + dim_power_sum.Resize(common::make_ddim({dimension_each})); + weight_derivative.Resize(common::make_ddim({dimension_each})); auto stream = ctx.stream(); int block = std::min(numel, static_cast(256)); int grid = (numel + block - 1) / block; diff --git a/paddle/phi/kernels/impl/repeat_interleave_grad_kernel_impl.h b/paddle/phi/kernels/impl/repeat_interleave_grad_kernel_impl.h index 806e2be66332cb..d8c56000639bbc 100644 --- a/paddle/phi/kernels/impl/repeat_interleave_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/repeat_interleave_grad_kernel_impl.h @@ -107,7 +107,7 @@ void RepeatInterleaveWithTensorIndexGradKernel( #if defined(__NVCC__) || defined(__HIPCC__) auto output_dim = out_grad.dims(); - auto stride_dim = phi::stride(input_dim); + auto stride_dim = common::stride(input_dim); int64_t stride = stride_dim[dim]; int64_t size = output_dim[dim]; int64_t delta = input_dim[dim] - size; @@ -181,7 +181,7 @@ void RepeatInterleaveGradKernel(const Context& ctx, DenseTensor index; #if defined(__NVCC__) || defined(__HIPCC__) auto output_dim = out_grad.dims(); - auto stride_dim = phi::stride(input_dim); + auto stride_dim = common::stride(input_dim); int64_t stride = stride_dim[dim]; int64_t size = output_dim[dim]; int64_t delta = input_dim[dim] - size; @@ -201,7 +201,7 @@ void RepeatInterleaveGradKernel(const Context& ctx, for (int i = 0; i < x_grad->dims()[dim]; i++) { std::fill_n(index_vec.begin() + i * repeats, repeats, i); } - index.Resize(phi::make_ddim({index_size})); + index.Resize(common::make_ddim({index_size})); phi::TensorFromVector(index_vec, ctx, &index); const int* index_data = index.data(); diff --git a/paddle/phi/kernels/impl/repeat_interleave_kernel_impl.h b/paddle/phi/kernels/impl/repeat_interleave_kernel_impl.h index 9ac7ac6072db44..05f1bba3c0ea68 100644 --- a/paddle/phi/kernels/impl/repeat_interleave_kernel_impl.h +++ b/paddle/phi/kernels/impl/repeat_interleave_kernel_impl.h @@ -77,24 +77,24 @@ void RepeatInterleaveKernel(const Context& ctx, for (int i = 0; i < input_dim[dim]; i++) { std::fill_n(index_vec.begin() + i * repeats, repeats, i); } - index.Resize(phi::make_ddim({index_size})); + index.Resize(common::make_ddim({index_size})); if (place == cpu_place) { DenseTensor x_copy = x; phi::TensorFromVector(index_vec, ctx, &index); - auto output_dim = phi::vectorize(x.dims()); + auto output_dim = common::vectorize(x.dims()); output_dim[dim] = index_size; - out->Resize(phi::make_ddim(output_dim)); + out->Resize(common::make_ddim(output_dim)); phi::IndexSelectInner(ctx, &x_copy, index, out, dim); #if defined(__NVCC__) || defined(__HIPCC__) } else { - auto stride_dim = phi::stride(input_dim); + auto stride_dim = common::stride(input_dim); int64_t stride = stride_dim[dim]; phi::TensorFromVector(index_vec, ctx, &index); auto stream = ctx.stream(); - auto output_dim = phi::vectorize(x.dims()); + auto output_dim = common::vectorize(x.dims()); output_dim[dim] = index_size; - out->Resize(phi::make_ddim(output_dim)); + out->Resize(common::make_ddim(output_dim)); ctx.template Alloc(out); auto* out_data = out->data(); int64_t numel = out->numel(); @@ -153,21 +153,21 @@ void RepeatInterleaveWithTensorIndexKernel(const Context& ctx, if (index_type == phi::DataType::INT32) { phi::funcs::RepeatsTensor2IndexTensor( ctx, repeats_tensor, &index); - auto output_dim = phi::vectorize(x.dims()); + auto output_dim = common::vectorize(x.dims()); output_dim[dim] = index.dims()[0]; - out->Resize(phi::make_ddim(output_dim)); + out->Resize(common::make_ddim(output_dim)); IndexSelectInner(ctx, &x_copy, index, out, dim); } else if (index_type == phi::DataType::INT64) { phi::funcs::RepeatsTensor2IndexTensor( ctx, repeats_tensor, &index); - auto output_dim = phi::vectorize(x.dims()); + auto output_dim = common::vectorize(x.dims()); output_dim[dim] = index.dims()[0]; - out->Resize(phi::make_ddim(output_dim)); + out->Resize(common::make_ddim(output_dim)); IndexSelectInner(ctx, &x_copy, index, out, dim); } #if defined(__NVCC__) || defined(__HIPCC__) } else { - auto stride_dim = phi::stride(input_dim); + auto stride_dim = common::stride(input_dim); int64_t stride = stride_dim[dim]; auto stream = ctx.stream(); auto* in_data = x.data(); @@ -176,9 +176,9 @@ void RepeatInterleaveWithTensorIndexKernel(const Context& ctx, ctx, repeats_tensor, &index); const int64_t* index_data = index.data(); - auto output_dim = phi::vectorize(x.dims()); + auto output_dim = common::vectorize(x.dims()); output_dim[dim] = index.dims()[0]; - out->Resize(phi::make_ddim(output_dim)); + out->Resize(common::make_ddim(output_dim)); T* out_data = ctx.template Alloc(out); int64_t numel = out->numel(); int64_t size = output_dim[dim]; @@ -195,9 +195,9 @@ void RepeatInterleaveWithTensorIndexKernel(const Context& ctx, ctx, repeats_tensor, &index); const int* index_data = index.data(); - auto output_dim = phi::vectorize(x.dims()); + auto output_dim = common::vectorize(x.dims()); output_dim[dim] = index.dims()[0]; - out->Resize(phi::make_ddim(output_dim)); + out->Resize(common::make_ddim(output_dim)); T* out_data = ctx.template Alloc(out); int64_t numel = out->numel(); int64_t size = output_dim[dim]; diff --git a/paddle/phi/kernels/impl/searchsorted_kernel_impl.h b/paddle/phi/kernels/impl/searchsorted_kernel_impl.h index b3be4b9d556645..f933b718a28fe8 100644 --- a/paddle/phi/kernels/impl/searchsorted_kernel_impl.h +++ b/paddle/phi/kernels/impl/searchsorted_kernel_impl.h @@ -16,7 +16,7 @@ #include -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" #include "paddle/phi/kernels/funcs/algorithm.h" #include "paddle/phi/kernels/funcs/for_range.h" diff --git a/paddle/phi/kernels/impl/segment_pool_kernel_impl.h b/paddle/phi/kernels/impl/segment_pool_kernel_impl.h index 216d5e6100d6cf..3b6f9998a00129 100644 --- a/paddle/phi/kernels/impl/segment_pool_kernel_impl.h +++ b/paddle/phi/kernels/impl/segment_pool_kernel_impl.h @@ -67,7 +67,7 @@ void SegmentKernelLaunchHelper(const Context& dev_ctx, #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) if (!cpu_place) { DenseTensor length; - length.Resize(phi::make_ddim({1})); + length.Resize(common::make_ddim({1})); IndexT* length_data = dev_ctx.template HostAlloc(&length); const IndexT* segment_ids_ptr = segment_ids.data(); diff --git a/paddle/phi/kernels/impl/sequence_mask_kernel_impl.h b/paddle/phi/kernels/impl/sequence_mask_kernel_impl.h index 80834fae85411e..f2eb1f8a39f970 100644 --- a/paddle/phi/kernels/impl/sequence_mask_kernel_impl.h +++ b/paddle/phi/kernels/impl/sequence_mask_kernel_impl.h @@ -44,9 +44,9 @@ void SequenceMaskKernel(const Context& ctx, maxlen = *max_len_tensor.get_ptr()->data(); } - auto y_dim = phi::vectorize(x.dims()); + auto y_dim = common::vectorize(x.dims()); y_dim.push_back(maxlen); - y->Resize(phi::make_ddim(y_dim)); + y->Resize(common::make_ddim(y_dim)); PADDLE_ENFORCE_GT( maxlen, @@ -76,9 +76,9 @@ void SequenceMaskKernel(const Context& ctx, maxlen = static_cast(*std::max_element(x_data, x_data + x_numel)); #endif } - auto y_dim = phi::vectorize(x.dims()); + auto y_dim = common::vectorize(x.dims()); y_dim.push_back(maxlen); - y->Resize(phi::make_ddim(y_dim)); + y->Resize(common::make_ddim(y_dim)); } phi::VisitDataType(phi::TransToPhiDataType(out_dtype), diff --git a/paddle/phi/kernels/impl/set_value_grad_kernel_impl.h b/paddle/phi/kernels/impl/set_value_grad_kernel_impl.h index 3d2a0a3d0db67c..3f78361b92b8bd 100644 --- a/paddle/phi/kernels/impl/set_value_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/set_value_grad_kernel_impl.h @@ -84,7 +84,7 @@ void SetValueGradImpl(const Context& dev_ctx, axes.size(), false); - DDim out_dims(phi::make_ddim(out_dims_vector)); + DDim out_dims(common::make_ddim(out_dims_vector)); std::vector reverse_vector(starts_local.size(), 0); funcs::StridedSliceFunctor(starts_local.data(), diff --git a/paddle/phi/kernels/impl/set_value_kernel_impl.h b/paddle/phi/kernels/impl/set_value_kernel_impl.h index 2c545ac06ada11..f9e582d440f7b4 100644 --- a/paddle/phi/kernels/impl/set_value_kernel_impl.h +++ b/paddle/phi/kernels/impl/set_value_kernel_impl.h @@ -113,7 +113,7 @@ void SetValueImpl(const Context& dev_ctx, none_axes_cur++; } - slice_dims_for_assign = phi::make_ddim(slice_dims_with_none); + slice_dims_for_assign = common::make_ddim(slice_dims_with_none); } auto place = dev_ctx.GetPlace(); @@ -336,7 +336,7 @@ void SetValueKernel(const Context& dev_ctx, } DenseTensor value_tensor = Empty(dev_ctx, shape); phi::TensorFromVector(assgin_values, dev_ctx, &value_tensor); - value_tensor.Resize(phi::make_ddim(shape)); + value_tensor.Resize(common::make_ddim(shape)); SetTensorValueKernel(dev_ctx, x, diff --git a/paddle/phi/kernels/impl/slice_grad_kernel_impl.h b/paddle/phi/kernels/impl/slice_grad_kernel_impl.h index ac2769e041e398..fa3ef0318fbb17 100644 --- a/paddle/phi/kernels/impl/slice_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/slice_grad_kernel_impl.h @@ -14,7 +14,7 @@ #pragma once -#include "paddle/phi/core/macros.h" +#include "paddle/common/macros.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/eigen/eigen_function.h" @@ -109,8 +109,8 @@ void EigenPaddingCompute( out_tore_shape[1] = out_dims[pad_dim]; // convert array from std::vector to DDim - DDim reshaped_in_dims = make_ddim(in_tore_shape); - DDim reshaped_out_dims = make_ddim(out_tore_shape); + DDim reshaped_in_dims = common::make_ddim(in_tore_shape); + DDim reshaped_out_dims = common::make_ddim(out_tore_shape); // after reshape: the first dimension do not need padding, // set padding[0] zero @@ -142,8 +142,8 @@ void EigenPaddingCompute( } // convert array from std::vector to DDim - DDim reshaped_in_dims = make_ddim(in_tore_shape); - DDim reshaped_out_dims = make_ddim(out_tore_shape); + DDim reshaped_in_dims = common::make_ddim(in_tore_shape); + DDim reshaped_out_dims = common::make_ddim(out_tore_shape); // after reshape: // the first dimension is the previous padding dimension @@ -180,8 +180,8 @@ void EigenPaddingCompute( } // convert array from std::vector to DDim - DDim reshaped_in_dims = make_ddim(in_tore_shape); - DDim reshaped_out_dims = make_ddim(out_tore_shape); + DDim reshaped_in_dims = common::make_ddim(in_tore_shape); + DDim reshaped_out_dims = common::make_ddim(out_tore_shape); // after reshape: // the first dimension do not need padding, set padding[0] zero @@ -228,7 +228,7 @@ void SliceGradCompute(const Context& ctx, if (decrease_size == static_cast(in_dims.size())) { // all dims decrease std::vector origin_out_shape(decrease_size, 1); - out_dims = make_ddim(std::vector(decrease_size, 1)); + out_dims = common::make_ddim(std::vector(decrease_size, 1)); } else { std::vector origin_out_shape(out_dims.size() + decrease_size, -1); for (size_t i = 0; i < decrease_size; ++i) { @@ -243,7 +243,7 @@ void SliceGradCompute(const Context& ctx, } } - out_dims = make_ddim(origin_out_shape); + out_dims = common::make_ddim(origin_out_shape); } } diff --git a/paddle/phi/kernels/impl/slogdeterminant_grad_kernel_impl.h b/paddle/phi/kernels/impl/slogdeterminant_grad_kernel_impl.h index e7fa5edf9ad4ab..c964f91c690037 100644 --- a/paddle/phi/kernels/impl/slogdeterminant_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/slogdeterminant_grad_kernel_impl.h @@ -60,7 +60,7 @@ void SlogDeterminantGradKernel(const Context& dev_ctx, VLOG(3) << "The input matrix not invertible!"; x_grad->Resize(x.dims()); phi::Full(dev_ctx, - phi::vectorize(x.dims()), + common::vectorize(x.dims()), std::numeric_limits::quiet_NaN(), x_grad); return; diff --git a/paddle/phi/kernels/impl/slogdeterminant_kernel_impl.h b/paddle/phi/kernels/impl/slogdeterminant_kernel_impl.h index a5798d66ee5c7e..05bd6097554ca2 100644 --- a/paddle/phi/kernels/impl/slogdeterminant_kernel_impl.h +++ b/paddle/phi/kernels/impl/slogdeterminant_kernel_impl.h @@ -75,7 +75,7 @@ template void SlogDeterminantKernel(const Context& dev_ctx, const DenseTensor& x, DenseTensor* out) { - auto input_dim = vectorize(x.dims()); + auto input_dim = common::vectorize(x.dims()); auto input_dim_size = input_dim.size(); auto batch_count = detail::GetBatchCount(x.dims()); @@ -98,7 +98,7 @@ void SlogDeterminantKernel(const Context& dev_ctx, } output_dim_vec.insert(output_dim_vec.begin(), 2); // make the output dims as same as numpy - auto output_dims = phi::make_ddim(output_dim_vec); + auto output_dims = common::make_ddim(output_dim_vec); out->Resize(output_dims); VLOG(2) << "output dim:" << out->dims(); } diff --git a/paddle/phi/kernels/impl/solve_grad_kernel_impl.h b/paddle/phi/kernels/impl/solve_grad_kernel_impl.h index 7386e8beb22cbb..fa25f2a0887972 100644 --- a/paddle/phi/kernels/impl/solve_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/solve_grad_kernel_impl.h @@ -100,17 +100,17 @@ void SolveGradKernel(const Context& dev_ctx, get_broadcast_dims(tmp_x, tmp_y); // tmp_dx DenseTensor tmp_dx; - tmp_dx.Resize(phi::make_ddim(x_broadcast_dims)); + tmp_dx.Resize(common::make_ddim(x_broadcast_dims)); dev_ctx.template Alloc(&tmp_dx); // tmp_dy DenseTensor tmp_dy; - tmp_dy.Resize(phi::make_ddim(y_broadcast_dims)); + tmp_dy.Resize(common::make_ddim(y_broadcast_dims)); dev_ctx.template Alloc(&tmp_dy); DenseTensor tmp_input(x.dtype()); const auto& new_dims_vec = phi::funcs::getNewDimsVec(x.dims()); - tmp_input.Resize(phi::make_ddim(new_dims_vec)); + tmp_input.Resize(common::make_ddim(new_dims_vec)); dev_ctx.template Alloc(&tmp_input); phi::funcs::TransposeNormal trans; @@ -174,9 +174,9 @@ void SolveGradKernel(const Context& dev_ctx, phi::Copy(dev_ctx, tmp_dy, dev_ctx.GetPlace(), false, &dy_help); // get dims - std::vector x_dims = vectorize(x.dims()); - std::vector y_dims = vectorize(y.dims()); - std::vector dout_dims = vectorize(dout.dims()); + std::vector x_dims = common::vectorize(x.dims()); + std::vector y_dims = common::vectorize(y.dims()); + std::vector dout_dims = common::vectorize(dout.dims()); if (is_vector_rhs(x, y)) { dout_dims.push_back(1); @@ -185,7 +185,8 @@ void SolveGradKernel(const Context& dev_ctx, int y_ndim = y_dims.size(); int ndim = dout_dims.size(); - const std::vector dy_help_dims = vectorize(dy_help.dims()); + const std::vector dy_help_dims = + common::vectorize(dy_help.dims()); std::vector dy_broadcast_dims(ndim); std::fill( @@ -224,13 +225,14 @@ void SolveGradKernel(const Context& dev_ctx, dev_ctx.Alloc(&dx_help, tmp_dx.dtype()); phi::Copy(dev_ctx, tmp_dx, dev_ctx.GetPlace(), false, &dx_help); // get dims - std::vector x_dims = vectorize(x.dims()); - std::vector y_dims = vectorize(y.dims()); + std::vector x_dims = common::vectorize(x.dims()); + std::vector y_dims = common::vectorize(y.dims()); int x_ndim = x_dims.size(); int ndim = x_broadcast_dims.size(); - const std::vector dx_help_dims = vectorize(dx_help.dims()); + const std::vector dx_help_dims = + common::vectorize(dx_help.dims()); std::vector dx_broadcast_dims(ndim); std::fill( dx_broadcast_dims.data(), dx_broadcast_dims.data() + ndim - x_ndim, 1); diff --git a/paddle/phi/kernels/impl/solve_kernel_impl.h b/paddle/phi/kernels/impl/solve_kernel_impl.h index d5ecfdff21a998..ddfc18db7fc312 100644 --- a/paddle/phi/kernels/impl/solve_kernel_impl.h +++ b/paddle/phi/kernels/impl/solve_kernel_impl.h @@ -32,8 +32,8 @@ static inline bool is_vector_rhs(const DenseTensor& input, auto y_dim = other.dims(); auto x_dim_size = x_dim.size(); auto y_dim_size = y_dim.size(); - std::vector x_dims_vec = phi::vectorize(x_dim); - std::vector y_dims_vec = phi::vectorize(y_dim); + std::vector x_dims_vec = common::vectorize(x_dim); + std::vector y_dims_vec = common::vectorize(y_dim); std::vector::const_iterator f = x_dims_vec.begin(); std::vector::const_iterator l = x_dims_vec.end() - 1; @@ -88,8 +88,8 @@ static inline std::vector convert_to_int_vec(std::vector a) { // broadcast the batch dimensions of tensor x and tensor y. static inline std::tuple, std::vector> get_broadcast_dims(const Tensor& x, const Tensor& y) { - std::vector x_dims_vec = phi::vectorize(x.dims()); - std::vector y_dims_vec = phi::vectorize(y.dims()); + std::vector x_dims_vec = common::vectorize(x.dims()); + std::vector y_dims_vec = common::vectorize(y.dims()); std::vector::const_iterator f1 = x_dims_vec.begin(); std::vector::const_iterator l1 = x_dims_vec.end() - 2; std::vector x_dims_vec_cut(f1, l1); diff --git a/paddle/phi/kernels/impl/spectral_norm_grad_kernel_impl.h b/paddle/phi/kernels/impl/spectral_norm_grad_kernel_impl.h index 5bdb874bc89c47..dd9489da089c7b 100644 --- a/paddle/phi/kernels/impl/spectral_norm_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/spectral_norm_grad_kernel_impl.h @@ -48,9 +48,9 @@ void SpectralNormGradKernel(const Context& dev_ctx, real_dims.push_back(dims[i]); } } - weight_mat.Resize(phi::make_ddim(real_dims)); + weight_mat.Resize(common::make_ddim(real_dims)); dev_ctx.template Alloc(&weight_mat); - out_grad_mat.Resize(phi::make_ddim(real_dims)); + out_grad_mat.Resize(common::make_ddim(real_dims)); dev_ctx.template Alloc(&out_grad_mat); TransCompute2DTo5D(dev_ctx, weight, rank, perm, &weight_mat); TransCompute2DTo5D( @@ -114,7 +114,7 @@ void SpectralNormGradKernel(const Context& dev_ctx, dev_ctx.template Alloc(weight_grad); TransCompute2DTo5D( dev_ctx, - weight_grad_mat.Resize(phi::make_ddim(real_dims)), + weight_grad_mat.Resize(common::make_ddim(real_dims)), rank, perm, weight_grad); diff --git a/paddle/phi/kernels/impl/spectral_norm_kernel_impl.h b/paddle/phi/kernels/impl/spectral_norm_kernel_impl.h index 57c5c69a63d614..86312b06c76950 100644 --- a/paddle/phi/kernels/impl/spectral_norm_kernel_impl.h +++ b/paddle/phi/kernels/impl/spectral_norm_kernel_impl.h @@ -129,7 +129,7 @@ void SpectralNormKernel(const Context& dev_ctx, real_dims.push_back(dims[i]); } } - weight_mat.Resize(phi::make_ddim(real_dims)); + weight_mat.Resize(common::make_ddim(real_dims)); dev_ctx.template Alloc(&weight_mat); TransCompute2DTo5D(dev_ctx, weight, rank, perm, &weight_mat); } else { @@ -168,7 +168,11 @@ void SpectralNormKernel(const Context& dev_ctx, out->Resize(dims); dev_ctx.template Alloc(out); TransCompute2DTo5D( - dev_ctx, weight_mat.Resize(phi::make_ddim(real_dims)), rank, perm, out); + dev_ctx, + weight_mat.Resize(common::make_ddim(real_dims)), + rank, + perm, + out); } else { phi::Copy(dev_ctx, weight_mat.Resize(dims), dev_ctx.GetPlace(), true, out); } diff --git a/paddle/phi/kernels/impl/svd_grad_kernel_impl.h b/paddle/phi/kernels/impl/svd_grad_kernel_impl.h index 13c86aa576104e..57556ff1990fb7 100644 --- a/paddle/phi/kernels/impl/svd_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/svd_grad_kernel_impl.h @@ -30,7 +30,7 @@ static DenseTensor Fill(const Context& ctx, std::vector shape, float fill_value) { DenseTensor ret; - ret.Resize(make_ddim(shape)); + ret.Resize(common::make_ddim(shape)); ctx.template Alloc(&ret); funcs::SetConstant()(ctx, &ret, T(fill_value)); return ret; @@ -53,7 +53,7 @@ static DenseTensor Unsqueeze(const DenseTensor& x, int axis = 0) { // don't copy data, only change the dims DenseTensor out; out.ShareDataWith(x); - std::vector out_shape = phi::vectorize(x.dims()); + std::vector out_shape = common::vectorize(x.dims()); if (axis >= 0) { auto index = (out_shape.begin() + axis); out_shape.insert(index, 1); @@ -61,7 +61,7 @@ static DenseTensor Unsqueeze(const DenseTensor& x, int axis = 0) { auto index = (out_shape.end() + axis + 1); out_shape.insert(index, 1); } - out.Resize(phi::make_ddim(out_shape)); + out.Resize(common::make_ddim(out_shape)); return out; } diff --git a/paddle/phi/kernels/impl/tile_grad_kernel_impl.h b/paddle/phi/kernels/impl/tile_grad_kernel_impl.h index d9b97956ce9d1e..a5a95b7bacd987 100644 --- a/paddle/phi/kernels/impl/tile_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/tile_grad_kernel_impl.h @@ -56,7 +56,7 @@ void TileGradKernel(const Context& dev_ctx, const IntArray& repeat_times, DenseTensor* x_grad) { auto x_dims = x.dims(); - auto vec_x_dims = phi::vectorize(x_dims); + auto vec_x_dims = common::vectorize(x_dims); auto repeat_times_data = repeat_times.GetData(); if (repeat_times_data.size() < vec_x_dims.size()) { int diff = vec_x_dims.size() - repeat_times_data.size(); diff --git a/paddle/phi/kernels/impl/tile_kernel_impl.h b/paddle/phi/kernels/impl/tile_kernel_impl.h index f7b923b00b1ca1..4e19d9183f4048 100644 --- a/paddle/phi/kernels/impl/tile_kernel_impl.h +++ b/paddle/phi/kernels/impl/tile_kernel_impl.h @@ -37,7 +37,7 @@ void Tile(const Context& dev_ctx, "be positive integers, but the value received is %d.", repeat_times[i])); } - auto vec_x_dims = phi::vectorize(x_dims); + auto vec_x_dims = common::vectorize(x_dims); if (repeat_times.size() < vec_x_dims.size()) { int diff = vec_x_dims.size() - repeat_times.size(); repeat_times.insert(repeat_times.begin(), diff, 1); @@ -63,7 +63,7 @@ void Tile(const Context& dev_ctx, bcast_dims[i] = repeat_times[i]; } - DDim new_x_dims = make_ddim(vec_x_dims); + DDim new_x_dims = common::make_ddim(vec_x_dims); DDim out_dims(new_x_dims); for (size_t i = 0; i < repeat_times.size(); ++i) { out_dims[i] *= repeat_times[i]; diff --git a/paddle/phi/kernels/impl/trace_grad_kernel_impl.h b/paddle/phi/kernels/impl/trace_grad_kernel_impl.h index 640fd07a92a2be..964d5871bf9319 100644 --- a/paddle/phi/kernels/impl/trace_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/trace_grad_kernel_impl.h @@ -89,10 +89,10 @@ void TraceGradKernel(const Context& ctx, int axis2, DenseTensor* in_grad) { auto input_dims = in_grad->dims(); - auto input_stride = phi::stride(input_dims); + auto input_stride = common::stride(input_dims); auto output_dims = out_grad.dims(); auto output_stride = output_dims.size() == 0 ? phi::DDim(output_dims) - : phi::stride(output_dims); + : common::stride(output_dims); auto* out_data = out_grad.data(); T* x_data = ctx.template Alloc(in_grad); @@ -121,9 +121,9 @@ void TraceGradKernel(const Context& ctx, int64_t pos = std::abs(offset) * offset_stride; if (diag_size > 0) { #if defined(__NVCC__) || defined(__HIPCC__) - thrust::device_vector output_vec(vectorize(output_stride)); + thrust::device_vector output_vec(common::vectorize(output_stride)); const int64_t* output_arr = thrust::raw_pointer_cast(output_vec.data()); - thrust::device_vector input_vec(vectorize(input_stride)); + thrust::device_vector input_vec(common::vectorize(input_stride)); const int64_t* input_arr = thrust::raw_pointer_cast(input_vec.data()); #else diff --git a/paddle/phi/kernels/impl/unfold_grad_kernel_impl.h b/paddle/phi/kernels/impl/unfold_grad_kernel_impl.h index 28f034209188cd..a0e7c3c2ef7cd9 100644 --- a/paddle/phi/kernels/impl/unfold_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/unfold_grad_kernel_impl.h @@ -52,8 +52,8 @@ void UnfoldGradKernel(const Context& ctx, paddings[3], strides[1]); - DDim x_shape = make_ddim({x_dims[1], x_dims[2], x_dims[3]}); - DDim out_matrix_shape = make_ddim( + DDim x_shape = common::make_ddim({x_dims[1], x_dims[2], x_dims[3]}); + DDim out_matrix_shape = common::make_ddim( {x_dims[1], kernel_sizes[0], kernel_sizes[1], out_height, out_width}); phi::funcs::Col2ImFunctor col2im; diff --git a/paddle/phi/kernels/impl/unfold_kernel_impl.h b/paddle/phi/kernels/impl/unfold_kernel_impl.h index 7b7e9923d0004d..b1791af358cacd 100644 --- a/paddle/phi/kernels/impl/unfold_kernel_impl.h +++ b/paddle/phi/kernels/impl/unfold_kernel_impl.h @@ -50,8 +50,8 @@ void UnfoldKernel(const Context& ctx, paddings[3], strides[1]); - DDim x_shape = make_ddim({x_dims[1], x_dims[2], x_dims[3]}); - DDim out_matrix_shape = make_ddim( + DDim x_shape = common::make_ddim({x_dims[1], x_dims[2], x_dims[3]}); + DDim out_matrix_shape = common::make_ddim( {x_dims[1], kernel_sizes[0], kernel_sizes[1], out_height, out_width}); for (int i = 0; i < batch_size; i++) { diff --git a/paddle/phi/kernels/impl/warpctc_kernel_impl.h b/paddle/phi/kernels/impl/warpctc_kernel_impl.h index 4b4bd6f5143dd3..275f32f0333cd5 100644 --- a/paddle/phi/kernels/impl/warpctc_kernel_impl.h +++ b/paddle/phi/kernels/impl/warpctc_kernel_impl.h @@ -336,7 +336,7 @@ void WarpctcKernel(const Context& dev_ctx, max_sequence_length = phi::funcs::MaximumSequenceLength(logits_lod); } - auto loss_dims = phi::make_ddim({static_cast(num_sequences), 1}); + auto loss_dims = common::make_ddim({static_cast(num_sequences), 1}); // warpctc needs sequences data stored in transposed padding format DenseTensor warpctc_logits_tmp = diff --git a/paddle/phi/kernels/impl/warprnnt_kernel_impl.h b/paddle/phi/kernels/impl/warprnnt_kernel_impl.h index f51041285aaee9..80ccf6e21b5377 100644 --- a/paddle/phi/kernels/impl/warprnnt_kernel_impl.h +++ b/paddle/phi/kernels/impl/warprnnt_kernel_impl.h @@ -313,7 +313,7 @@ void WarprnntKernel(const Context& dev_ctx, dev_ctx, warprnntgrad, static_cast(0)); // loss on cpu (B,) - auto loss_dims = phi::make_ddim({static_cast(B)}); + auto loss_dims = common::make_ddim({static_cast(B)}); DenseTensor warprnnt_loss; warprnnt_loss.Resize(loss_dims); T* warprnnt_loss_data = dev_ctx.template HostAlloc(&warprnnt_loss); diff --git a/paddle/phi/kernels/is_empty_kernel.cc b/paddle/phi/kernels/is_empty_kernel.cc index 4b86f2dfe69504..dadaa2132e95ed 100644 --- a/paddle/phi/kernels/is_empty_kernel.cc +++ b/paddle/phi/kernels/is_empty_kernel.cc @@ -27,7 +27,7 @@ void IsEmptyKernel(const Context& dev_ctx, // always be allocated for CPUPlace. We reigister CUDA kernel for this op to // avoid the unnecessary data transform. bool* out_data = dev_ctx.template HostAlloc(out); - out_data[0] = phi::product(x.dims()) == 0; + out_data[0] = common::product(x.dims()) == 0; } } // namespace phi diff --git a/paddle/phi/kernels/kps/reduce_kernel.cu b/paddle/phi/kernels/kps/reduce_kernel.cu index 506bd36e828bc5..74020a8f0975b4 100644 --- a/paddle/phi/kernels/kps/reduce_kernel.cu +++ b/paddle/phi/kernels/kps/reduce_kernel.cu @@ -173,7 +173,7 @@ void ReduceSumEigen(const KPDevice& dev_ctx, (*reduce_dims)[i] += added_dims; } auto eigen_reduce_dim = - EigenDim::From(phi::make_ddim(*reduce_dims)); + EigenDim::From(common::make_ddim(*reduce_dims)); // Caculate eigen_out_tensor.device(*dev_ctx.eigen_device()) = eigen_x_tensor.sum(eigen_reduce_dim); diff --git a/paddle/phi/kernels/legacy/cpu/randint_kernel.cc b/paddle/phi/kernels/legacy/cpu/randint_kernel.cc index 6b988f6294aac8..cf4a0e9a6d2770 100644 --- a/paddle/phi/kernels/legacy/cpu/randint_kernel.cc +++ b/paddle/phi/kernels/legacy/cpu/randint_kernel.cc @@ -29,7 +29,7 @@ void RandintWithSeedKernel(const Context& dev_ctx, DataType dtype UNUSED, int seed, DenseTensor* out) { - out->Resize(phi::make_ddim(shape.GetData())); + out->Resize(common::make_ddim(shape.GetData())); T* data = dev_ctx.template Alloc(out); auto numel = out->numel(); std::shared_ptr engine; diff --git a/paddle/phi/kernels/legacy/cpu/uniform_kernel.cc b/paddle/phi/kernels/legacy/cpu/uniform_kernel.cc index 3aa697b2409ee9..897b57a8b27b4e 100644 --- a/paddle/phi/kernels/legacy/cpu/uniform_kernel.cc +++ b/paddle/phi/kernels/legacy/cpu/uniform_kernel.cc @@ -28,7 +28,7 @@ void UniformRawKernel(const Context &dev_ctx, int diag_step, float diag_val, DenseTensor *out) { - out->Resize(phi::make_ddim(shape.GetData())); + out->Resize(common::make_ddim(shape.GetData())); T *data = dev_ctx.template Alloc(out); auto size = out->numel(); std::shared_ptr engine; diff --git a/paddle/phi/kernels/legacy/gpu/randint_kernel.cu b/paddle/phi/kernels/legacy/gpu/randint_kernel.cu index b4aa5e9d8c47ac..5aa0bf07d7ccb5 100644 --- a/paddle/phi/kernels/legacy/gpu/randint_kernel.cu +++ b/paddle/phi/kernels/legacy/gpu/randint_kernel.cu @@ -31,7 +31,7 @@ void RandintWithSeedKernel(const Context& dev_ctx, DataType dtype, int seed, DenseTensor* out) { - out->Resize(phi::make_ddim(shape.GetData())); + out->Resize(common::make_ddim(shape.GetData())); T* data = dev_ctx.template Alloc(out); funcs::uniform_distribution dist; funcs::uniform_int_transform trans(low, high); diff --git a/paddle/phi/kernels/legacy/gpu/uniform_kernel.cu b/paddle/phi/kernels/legacy/gpu/uniform_kernel.cu index abf51cf61f2b5c..c576608cc0d9f2 100644 --- a/paddle/phi/kernels/legacy/gpu/uniform_kernel.cu +++ b/paddle/phi/kernels/legacy/gpu/uniform_kernel.cu @@ -64,7 +64,7 @@ void UniformRawKernel(const Context& dev_ctx, int diag_step, float diag_val, DenseTensor* out) { - out->Resize(phi::make_ddim(shape.GetData())); + out->Resize(common::make_ddim(shape.GetData())); dev_ctx.template Alloc(out); if (seed == 0) { // Use global Generator seed diff --git a/paddle/phi/kernels/legacy/xpu/compare_kernel.cc b/paddle/phi/kernels/legacy/xpu/compare_kernel.cc index 5dd06c3fb88105..8957f09be78182 100644 --- a/paddle/phi/kernels/legacy/xpu/compare_kernel.cc +++ b/paddle/phi/kernels/legacy/xpu/compare_kernel.cc @@ -33,8 +33,8 @@ void XPUCompareRawKernelImpl(const Context& dev_ctx, bool*, const std::vector&, const std::vector&)> func) { - auto x_shape = vectorize(x.dims()); - auto y_shape = vectorize(y.dims()); + auto x_shape = common::vectorize(x.dims()); + auto y_shape = common::vectorize(y.dims()); if (x.dims().size() == 0) { x_shape = std::vector({1}); diff --git a/paddle/phi/kernels/legacy/xpu/randint_kernel.cc b/paddle/phi/kernels/legacy/xpu/randint_kernel.cc index 0349ad964c41a9..5f2f91cf0ac07d 100644 --- a/paddle/phi/kernels/legacy/xpu/randint_kernel.cc +++ b/paddle/phi/kernels/legacy/xpu/randint_kernel.cc @@ -32,7 +32,7 @@ void RandintWithSeedKernel(const Context& dev_ctx, int seed, DenseTensor* out) { int64_t size = out->numel(); - out->Resize(phi::make_ddim(shape.GetData())); + out->Resize(common::make_ddim(shape.GetData())); T* data = dev_ctx.template Alloc(out); auto numel = out->numel(); std::shared_ptr engine; diff --git a/paddle/phi/kernels/legacy/xpu/uniform_kernel.cc b/paddle/phi/kernels/legacy/xpu/uniform_kernel.cc index f1907b13e5f967..9e4296dcb4efa1 100644 --- a/paddle/phi/kernels/legacy/xpu/uniform_kernel.cc +++ b/paddle/phi/kernels/legacy/xpu/uniform_kernel.cc @@ -34,7 +34,7 @@ void UniformRawKernel(const Context &dev_ctx, int diag_step, float diag_val, DenseTensor *out) { - out->Resize(phi::make_ddim(shape.GetData())); + out->Resize(common::make_ddim(shape.GetData())); T *data = dev_ctx.template Alloc(out); int64_t size = out->numel(); diff --git a/paddle/phi/kernels/onednn/add_n_kernel.cc b/paddle/phi/kernels/onednn/add_n_kernel.cc index fcd48bdccc3636..f852254043e877 100644 --- a/paddle/phi/kernels/onednn/add_n_kernel.cc +++ b/paddle/phi/kernels/onednn/add_n_kernel.cc @@ -28,7 +28,7 @@ class SumOneDNNHandler : public OneDNNHandlerNoCachingT { : OneDNNHandlerNoCachingT(engine, cpu_place), num_inputs_(0) { - auto dst_tz = vectorize(out->dims()); + auto dst_tz = common::vectorize(out->dims()); auto src_tz = dst_tz; std::vector srcs_md; diff --git a/paddle/phi/kernels/onednn/batch_norm_grad_kernel.cc b/paddle/phi/kernels/onednn/batch_norm_grad_kernel.cc index e648686f3d2e7c..55f8dab5e8673f 100644 --- a/paddle/phi/kernels/onednn/batch_norm_grad_kernel.cc +++ b/paddle/phi/kernels/onednn/batch_norm_grad_kernel.cc @@ -71,7 +71,7 @@ void BatchNormGradFunctor(const Context& dev_ctx, std::vector scale_tz; std::vector bias_tz; if (use_scale) { - scale_tz = vectorize(Scale->dims()); + scale_tz = common::vectorize(Scale->dims()); PADDLE_ENFORCE_EQ( scale_tz.size(), 1, @@ -80,7 +80,7 @@ void BatchNormGradFunctor(const Context& dev_ctx, scale_tz.size())); } if (use_bias) { - bias_tz = vectorize(Bias->dims()); + bias_tz = common::vectorize(Bias->dims()); PADDLE_ENFORCE_EQ( bias_tz.size(), 1, diff --git a/paddle/phi/kernels/onednn/batch_norm_kernel.cc b/paddle/phi/kernels/onednn/batch_norm_kernel.cc index 070058062b6f49..9925aed9932565 100644 --- a/paddle/phi/kernels/onednn/batch_norm_kernel.cc +++ b/paddle/phi/kernels/onednn/batch_norm_kernel.cc @@ -98,7 +98,7 @@ void BatchNormKernel(const Context &dev_ctx, astream.wait(); if (!global_stats) { - const unsigned int C = phi::vectorize(mean.dims())[0]; + const unsigned int C = common::vectorize(mean.dims())[0]; // mkldnn only compute stats for current batch // so we need compute momentum stats via Eigen lib diff --git a/paddle/phi/kernels/onednn/cast_kernel.cc b/paddle/phi/kernels/onednn/cast_kernel.cc index 74298cc055e0cb..9bf0a3e8a875fa 100644 --- a/paddle/phi/kernels/onednn/cast_kernel.cc +++ b/paddle/phi/kernels/onednn/cast_kernel.cc @@ -29,7 +29,7 @@ void CastKernel(const Context& dev_ctx, dnnl::memory::data_type in_dnnl_dtype = funcs::ToOneDNNDataType(in_dtype); dnnl::memory::data_type out_dnnl_dtype = funcs::ToOneDNNDataType(out_dtype); - auto x_tz = phi::vectorize(x.dims()); + auto x_tz = common::vectorize(x.dims()); funcs::ReorderOneDNNHandler reorder_handler(x_tz, in_dtype, diff --git a/paddle/phi/kernels/onednn/concat_grad_kernel.cc b/paddle/phi/kernels/onednn/concat_grad_kernel.cc index 29477a3ead8ae2..bbc57328ac2d6e 100644 --- a/paddle/phi/kernels/onednn/concat_grad_kernel.cc +++ b/paddle/phi/kernels/onednn/concat_grad_kernel.cc @@ -38,7 +38,7 @@ void ConcatGradKernel(const Context& dev_ctx, int axis = axis_scalar.to(); - auto out_grad_vec_dims = vectorize(out_grad.dims()); + auto out_grad_vec_dims = common::vectorize(out_grad.dims()); axis = funcs::ComputeAxis(axis, out_grad_vec_dims.size()); @@ -53,7 +53,7 @@ void ConcatGradKernel(const Context& dev_ctx, for (auto& grad : x_grad) { if (grad->numel() != 0UL) { - auto x_grad_vec_dims = vectorize(grad->dims()); + auto x_grad_vec_dims = common::vectorize(grad->dims()); auto slice_mem_p = reorder_handler.AcquireSubmemory( x_grad_vec_dims, offset, reorder_src_memory_p); diff --git a/paddle/phi/kernels/onednn/concat_kernel.cc b/paddle/phi/kernels/onednn/concat_kernel.cc index 0c9dbf5a85497a..f3ff30e2fa8613 100644 --- a/paddle/phi/kernels/onednn/concat_kernel.cc +++ b/paddle/phi/kernels/onednn/concat_kernel.cc @@ -56,7 +56,7 @@ class ConcatOneDNNHandler : public OneDNNHandlerNoCachingT { srcs_md.push_back(input->mem_desc()); } - auto dst_dims = vectorize(output->dims()); + auto dst_dims = common::vectorize(output->dims()); memory::desc dst_md = memory::desc(dst_dims, dt, OneDNNMemoryFormat::any); @@ -104,7 +104,7 @@ void ConcatKernel(const Context& dev_ctx, auto multi_input = ReduceMultiInput(x); EnforceLayouts(multi_input); - auto out_dims_vec = vectorize(out->dims()); + auto out_dims_vec = common::vectorize(out->dims()); if (std::any_of(out_dims_vec.begin(), out_dims_vec.end(), [](int64_t i) { return i < 0; })) { diff --git a/paddle/phi/kernels/onednn/conv_grad_kernel.cc b/paddle/phi/kernels/onednn/conv_grad_kernel.cc index 93df685293fc3f..230d93f56966bd 100644 --- a/paddle/phi/kernels/onednn/conv_grad_kernel.cc +++ b/paddle/phi/kernels/onednn/conv_grad_kernel.cc @@ -125,7 +125,7 @@ void ConvGradKernel(const Context& dev_ctx, funcs::ToOneDNNDataType(filter.dtype()); // for 3d conv with groups (six dimensional data reorder to // goidhw) for 2d conv with groups (five dimensional data reorder - // to goihw) auto weights_tz = phi::vectorize(filter->dims()); + // to goihw) auto weights_tz = common::vectorize(filter->dims()); auto weights_tz = diff_weights_memory_p->get_desc().get_dims(); dnnl::memory::format_tag out_format = @@ -151,10 +151,10 @@ void ConvGradKernel(const Context& dev_ctx, dnnl::memory::format_tag target_format = weights_tz.size() == 6 ? dnnl::memory::format_tag::oidhw : dnnl::memory::format_tag::oihw; - filter_grad->set_mem_desc( - dnnl::memory::desc(phi::vectorize(filter_grad->dims()), - in_type, - target_format)); + filter_grad->set_mem_desc(dnnl::memory::desc( + common::vectorize(filter_grad->dims()), + in_type, + target_format)); } else { filter_grad->set_mem_desc(diff_weights_memory_p->get_desc()); } @@ -248,7 +248,7 @@ KernelKey ConvGradGetKernelTypeForVar(const GetKernelTypeForVarContext* ctx) { (tensor.layout() != phi::DataLayout::ONEDNN)) { auto it = attrs.find("data_format"); const std::string data_format = PADDLE_GET_CONST(std::string, it->second); - auto dl = phi::StringToDataLayout(data_format); + auto dl = common::StringToDataLayout(data_format); // Some models may have intentionally set "AnyLayout" for pool // op. Treat this as NCHW (default data_format value) if (dl != phi::DataLayout::kAnyLayout) { diff --git a/paddle/phi/kernels/onednn/conv_handler.h b/paddle/phi/kernels/onednn/conv_handler.h index 86baabf45afc10..3d41c274de24e6 100644 --- a/paddle/phi/kernels/onednn/conv_handler.h +++ b/paddle/phi/kernels/onednn/conv_handler.h @@ -14,10 +14,10 @@ #pragma once +#include "paddle/common/macros.h" #include "paddle/phi/backends/onednn/onednn_helper.h" #include "paddle/phi/backends/onednn/onednn_reuse.h" #include "paddle/phi/core/expect.h" -#include "paddle/phi/core/macros.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/cpu/conv_util.h" @@ -68,7 +68,7 @@ class ConvOneDNNHandlerT onednn_engine, cpu_place, funcs::CreateKey( - dev_ctx, phi::vectorize(input->dims()), unique_name)) { + dev_ctx, common::vectorize(input->dims()), unique_name)) { if (unlikely(!this->isCached())) { PADDLE_ENFORCE_EQ( input->layout(), @@ -133,11 +133,12 @@ class ConvOneDNNHandlerT bias->dims().size())); } const auto input_dims = input->dims(); - const auto data_dims = phi::slice_ddim(input_dims, 2, input_dims.size()); + const auto data_dims = + common::slice_ddim(input_dims, 2, input_dims.size()); const auto filter_dims = filter->dims(); const auto filter_data_dims = - phi::slice_ddim(filter_dims, 2, filter_dims.size()); - const auto ksize = phi::vectorize(filter_data_dims); + common::slice_ddim(filter_dims, 2, filter_dims.size()); + const auto ksize = common::vectorize(filter_data_dims); std::vector strides(begin(strides_in), end(strides_in)); std::vector paddings(begin(paddings_in), end(paddings_in)); std::vector dilations(begin(dilations_in), end(dilations_in)); @@ -148,12 +149,12 @@ class ConvOneDNNHandlerT return i - 1; }); - const auto src_tz = phi::vectorize(input->dims()); + const auto src_tz = common::vectorize(input->dims()); - auto weights_tz = phi::vectorize(filter->dims()); + auto weights_tz = common::vectorize(filter->dims()); funcs::GetGroupConvWeightsTz(weights_tz, groups); - const auto dst_tz = phi::vectorize(output->dims()); + const auto dst_tz = common::vectorize(output->dims()); const dnnl::memory::dims stride_dims = strides; const auto onednn_paddings = funcs::ToOneDNNPadding(paddings); @@ -193,7 +194,7 @@ class ConvOneDNNHandlerT fuse_activation); if (bias) { - auto bias_tz = phi::vectorize(bias->dims()); + auto bias_tz = common::vectorize(bias->dims()); dnnl::memory::desc bias_md = funcs::OneDNNMemDesc(bias_tz, dnnl::memory::data_type::f32, @@ -251,7 +252,7 @@ class ConvOneDNNHandlerT dev_ctx.GetEngine(), cpu_place, funcs::CreateKey( - dev_ctx, phi::vectorize(in->dims()), unique_name)) { + dev_ctx, common::vectorize(in->dims()), unique_name)) { if (unlikely(!this->isBwdCached())) { PADDLE_ENFORCE_EQ( in->layout(), @@ -288,21 +289,21 @@ class ConvOneDNNHandlerT std::vector dilations(begin(dilations_in), end(dilations_in)); auto input_dims = in->dims(); - auto data_dims = phi::slice_ddim(input_dims, 2, input_dims.size()); + auto data_dims = common::slice_ddim(input_dims, 2, input_dims.size()); auto filter_dims = filter->dims(); auto filter_data_dims = - phi::slice_ddim(filter_dims, 2, filter_dims.size()); - auto ksize = phi::vectorize(filter_data_dims); + common::slice_ddim(filter_dims, 2, filter_dims.size()); + auto ksize = common::vectorize(filter_data_dims); UpdatePaddingAndDilation( &paddings, &dilations, padding_algorithm, data_dims, strides, ksize); - auto src_tz = phi::vectorize(in->dims()); - auto weights_tz = phi::vectorize(filter->dims()); + auto src_tz = common::vectorize(in->dims()); + auto weights_tz = common::vectorize(filter->dims()); int g = std::max(groups, 1); funcs::GetGroupConvWeightsTz(weights_tz, g); - auto dst_tz = phi::vectorize(out_grad->dims()); + auto dst_tz = common::vectorize(out_grad->dims()); /* create memory descriptor for conv backward without specified format * ('any') which lets a primitive (conv backward in this case) choose @@ -335,7 +336,7 @@ class ConvOneDNNHandlerT // Recreating FWD PD. For training there are no post ops in convolution dnnl::primitive_attr conv_attr; if (bias) { - auto bias_tz = phi::vectorize(bias->dims()); + auto bias_tz = common::vectorize(bias->dims()); dnnl::memory::desc bias_md = funcs::OneDNNMemDesc(bias_tz, dnnl::memory::data_type::f32, @@ -443,7 +444,7 @@ class ConvOneDNNHandlerT AcquireWeightsMemoryWithReorderFromDataPrimitive( const phi::DenseTensor* filter, const int groups, const bool is_conv3d) { const K* filter_data = filter->data(); - auto weights_tz = phi::vectorize(filter->dims()); + auto weights_tz = common::vectorize(filter->dims()); funcs::GetGroupConvWeightsTz(weights_tz, groups); auto user_src_md = @@ -538,7 +539,7 @@ class ConvOneDNNHandlerT return weights_mem_p; } else if (is_test) { const K* filter_data = filter->data(); - auto weights_tz = phi::vectorize(filter->dims()); + auto weights_tz = common::vectorize(filter->dims()); funcs::GetGroupConvWeightsTz(weights_tz, groups); auto user_src_md = @@ -556,7 +557,7 @@ class ConvOneDNNHandlerT mask); } else { const T* filter_data = filter->data(); - auto weights_tz = phi::vectorize(filter->dims()); + auto weights_tz = common::vectorize(filter->dims()); funcs::GetGroupConvWeightsTz(weights_tz, groups); auto user_src_md = diff --git a/paddle/phi/kernels/onednn/conv_kernel.cc b/paddle/phi/kernels/onednn/conv_kernel.cc index 8039dab862c66c..0007c717a4d9db 100644 --- a/paddle/phi/kernels/onednn/conv_kernel.cc +++ b/paddle/phi/kernels/onednn/conv_kernel.cc @@ -124,7 +124,7 @@ KernelKey ConvGetKernelTypeForVar(const GetKernelTypeForVarContext* ctx) { (tensor.layout() != phi::DataLayout::ONEDNN)) { auto it = attrs.find("data_format"); const std::string data_format = PADDLE_GET_CONST(std::string, it->second); - auto dl = phi::StringToDataLayout(data_format); + auto dl = common::StringToDataLayout(data_format); // Some models may have intentionally set "AnyLayout" for conv // op. Treat this as NCHW (default data_format value) if (dl != phi::DataLayout::kAnyLayout) { diff --git a/paddle/phi/kernels/onednn/conv_transpose_kernel.cc b/paddle/phi/kernels/onednn/conv_transpose_kernel.cc index 1a056a48859318..fcf13bda144cc1 100644 --- a/paddle/phi/kernels/onednn/conv_transpose_kernel.cc +++ b/paddle/phi/kernels/onednn/conv_transpose_kernel.cc @@ -26,7 +26,7 @@ namespace phi { inline dnnl::memory::dims GetWeightsTz(const phi::DenseTensor* filter, const int groups) { - auto weights_tz = phi::vectorize(filter->dims()); + auto weights_tz = common::vectorize(filter->dims()); int g = std::max(groups, 1); int g_dim = (g > 1) ? 1 : 0; funcs::GetGroupConvWeightsTz(weights_tz, g); @@ -119,11 +119,11 @@ class ConvTransposeOneDNNHandlerT "Now we only support 2d oneDNN convolution transpose op")); const auto x_dims = x->dims(); - const auto x_data_dims = phi::slice_ddim(x_dims, 2, x_dims.size()); + const auto x_data_dims = common::slice_ddim(x_dims, 2, x_dims.size()); const auto filter_dims = filter->dims(); const auto filter_data_dims = - phi::slice_ddim(filter_dims, 2, filter_dims.size()); - const auto ksize = phi::vectorize(filter_data_dims); + common::slice_ddim(filter_dims, 2, filter_dims.size()); + const auto ksize = common::vectorize(filter_data_dims); UpdatePaddingAndDilation( &paddings, &dilations, padding_algorithm, x_data_dims, strides, ksize); @@ -132,9 +132,9 @@ class ConvTransposeOneDNNHandlerT return i - 1; }); - const auto src_tz = phi::vectorize(x->dims()); + const auto src_tz = common::vectorize(x->dims()); const auto weights_tz = GetWeightsTz(filter, groups); - const auto dst_tz = phi::vectorize(out->dims()); + const auto dst_tz = common::vectorize(out->dims()); const auto onednn_paddings = funcs::ToOneDNNPadding(paddings); /* create memory descriptor for convolution without specified format @@ -164,7 +164,7 @@ class ConvTransposeOneDNNHandlerT : dnnl::prop_kind::forward_training; if (bias) { - std::vector bias_tz = phi::vectorize(bias->dims()); + std::vector bias_tz = common::vectorize(bias->dims()); const auto bias_md = funcs::OneDNNMemDesc( bias_tz, data_type, funcs::OneDNNMemoryFormat::x); this->AcquireForwardPrimitiveDescriptor( @@ -312,7 +312,7 @@ class ConvTransposeOneDNNHandlerT const std::string& key, const phi::DenseTensor* bias) { const K* bias_data = bias->data(); - auto user_bias_md = funcs::OneDNNMemDesc(phi::vectorize(bias->dims()), + auto user_bias_md = funcs::OneDNNMemDesc(common::vectorize(bias->dims()), funcs::OneDNNGetDataType(), funcs::OneDNNMemoryFormat::x); return this->AcquireMemoryWithReorder(dev_ctx, @@ -446,7 +446,7 @@ KernelKey ConvTransposeGetKernelTypeForVar( (tensor.layout() != phi::DataLayout::ONEDNN)) { auto it = attrs.find("data_format"); const std::string data_format = PADDLE_GET_CONST(std::string, it->second); - auto dl = phi::StringToDataLayout(data_format); + auto dl = common::StringToDataLayout(data_format); // Some models may have intentionally set "AnyLayout" for pool // op. Treat this as NCHW (default data_format value) if (dl != phi::DataLayout::kAnyLayout) { diff --git a/paddle/phi/kernels/onednn/dequantize_kernel.cc b/paddle/phi/kernels/onednn/dequantize_kernel.cc index 384ca7ea1e6383..9ce975733f3e46 100644 --- a/paddle/phi/kernels/onednn/dequantize_kernel.cc +++ b/paddle/phi/kernels/onednn/dequantize_kernel.cc @@ -44,7 +44,7 @@ void DeQuantKernel(const Context& dev_ctx, const bool with_shift = q_shift != 0; - auto x_tz = phi::vectorize(x.dims()); + auto x_tz = common::vectorize(x.dims()); auto x_type = phi::funcs::ToOneDNNDataType(x.dtype()); auto out_type = phi::funcs::ToOneDNNDataType(out->dtype()); diff --git a/paddle/phi/kernels/onednn/elementwise_grad_kernel.cc b/paddle/phi/kernels/onednn/elementwise_grad_kernel.cc index bec2aa8228c21b..c7a3a7ee93e84f 100644 --- a/paddle/phi/kernels/onednn/elementwise_grad_kernel.cc +++ b/paddle/phi/kernels/onednn/elementwise_grad_kernel.cc @@ -25,8 +25,8 @@ namespace funcs { inline std::vector CalculateBroadcastedDims( const phi::DenseTensor* x, const phi::DenseTensor* y) { - const auto src_tz = phi::vectorize(x->dims()); - const auto dst_tz = phi::vectorize(y->dims()); + const auto src_tz = common::vectorize(x->dims()); + const auto dst_tz = common::vectorize(y->dims()); std::vector dst_tz_ex(src_tz.size(), 1); @@ -103,7 +103,7 @@ inline void BroadcastReduction(const Place& place, astream.wait(); auto grad_shape = grad_tensor->dims().size() == 0 ? std::vector{1} - : phi::vectorize(grad_tensor->dims()); + : common::vectorize(grad_tensor->dims()); grad_tensor->set_mem_desc(dst_memory->get_desc().reshape(grad_shape)); } @@ -135,7 +135,7 @@ void ElementwiseGradKernel(const OneDNNContext& dev_ctx, scale = (BINARY_OP == dnnl::algorithm::binary_add) ? 1 : -1; } - auto tz = phi::vectorize(dout.dims()); + auto tz = common::vectorize(dout.dims()); funcs::ReorderOneDNNHandler reorder_handler( tz, dout.dtype(), funcs::ToOneDNNDataType(dout.dtype()), onednn_engine); diff --git a/paddle/phi/kernels/onednn/expand_grad_kernel.cc b/paddle/phi/kernels/onednn/expand_grad_kernel.cc index 4f4ef1fd544e44..a8b1beb45832f8 100644 --- a/paddle/phi/kernels/onednn/expand_grad_kernel.cc +++ b/paddle/phi/kernels/onednn/expand_grad_kernel.cc @@ -26,8 +26,8 @@ void ExpandGradKernel(const Context& dev_ctx, DenseTensor* in_grad) { const auto& onednn_engine = dev_ctx.GetEngine(); - auto in_grad_vec_dims = vectorize(in_grad->dims()); - auto out_grad_vec_dims = vectorize(out_grad.dims()); + auto in_grad_vec_dims = common::vectorize(in_grad->dims()); + auto out_grad_vec_dims = common::vectorize(out_grad.dims()); if (in_grad_vec_dims.size() != out_grad_vec_dims.size()) { in_grad_vec_dims.insert(in_grad_vec_dims.begin(), @@ -81,9 +81,10 @@ void ExpandGradKernel(const Context& dev_ctx, reduction_p->execute(astream, reduction_args); astream.wait(); in_grad->set_layout(DataLayout::ONEDNN); - const auto in_grad_md_dims = in_grad->dims().size() != 0 - ? vectorize(in_grad->dims()) - : std::vector{1}; + const auto in_grad_md_dims = + in_grad->dims().size() != 0 + ? common::vectorize(in_grad->dims()) + : std::vector{1}; in_grad->set_mem_desc(dst_memory_p->get_desc().reshape(in_grad_md_dims)); } } diff --git a/paddle/phi/kernels/onednn/expand_kernel.cc b/paddle/phi/kernels/onednn/expand_kernel.cc index 229a80c6b623bd..140fbbed6fc71d 100644 --- a/paddle/phi/kernels/onednn/expand_kernel.cc +++ b/paddle/phi/kernels/onednn/expand_kernel.cc @@ -36,7 +36,7 @@ void ExpandKernel(const Context& dev_ctx, DenseTensor* out) { const auto& onednn_engine = dev_ctx.GetEngine(); - auto x_vec_dims = vectorize(x.dims()); + auto x_vec_dims = common::vectorize(x.dims()); auto out_new_dims = shape.GetData(); @@ -48,7 +48,7 @@ void ExpandKernel(const Context& dev_ctx, x_vec_dims = GetExtendedXDims(x_vec_dims, out_new_dims.size()); } - out->Resize(make_ddim(out_new_dims)); + out->Resize(common::make_ddim(out_new_dims)); funcs::BroadcastDataOneDNNHandler handler(dnnl::algorithm::binary_add, onednn_engine, dev_ctx.GetPlace(), diff --git a/paddle/phi/kernels/onednn/full_kernel.cc b/paddle/phi/kernels/onednn/full_kernel.cc index 6ce5625c7f54cd..886c715693e9f7 100644 --- a/paddle/phi/kernels/onednn/full_kernel.cc +++ b/paddle/phi/kernels/onednn/full_kernel.cc @@ -61,7 +61,7 @@ void FullKernel(const Context& dev_ctx, const auto& onednn_engine = dev_ctx.GetEngine(); T fill_value = val.to(); - out->Resize(make_ddim(shape.GetData())); + out->Resize(common::make_ddim(shape.GetData())); funcs::FillConstantOneDNNHandler handler( out, onednn_engine, dev_ctx.GetPlace()); @@ -92,7 +92,7 @@ void FullKernel(const Context& dev_ctx, // src0_memory_p's md was just to allow the usage of a binary // primitive as a memset, and now we need to create a real one - out->set_mem_desc({vectorize(out->dims()), + out->set_mem_desc({common::vectorize(out->dims()), funcs::OneDNNGetDataType(), funcs::GetPlainOneDNNFormat(out->dims().size())}); } diff --git a/paddle/phi/kernels/onednn/gaussian_kernel.cc b/paddle/phi/kernels/onednn/gaussian_kernel.cc index a850aee10c31bb..98197961a9df6b 100644 --- a/paddle/phi/kernels/onednn/gaussian_kernel.cc +++ b/paddle/phi/kernels/onednn/gaussian_kernel.cc @@ -41,7 +41,7 @@ void GaussianKernel(const Context& ctx, data[i] = dist(*engine); } - out->Resize(phi::make_ddim(shape.GetData())); + out->Resize(common::make_ddim(shape.GetData())); dnnl::memory::desc out_mem_desc = phi::funcs::make_memory_desc(*out, DataLayout::NCHW); out->set_mem_desc(out_mem_desc); diff --git a/paddle/phi/kernels/onednn/interpolate_kernel.cc b/paddle/phi/kernels/onednn/interpolate_kernel.cc index 082e21bafa0e2e..be3e158cf384a9 100644 --- a/paddle/phi/kernels/onednn/interpolate_kernel.cc +++ b/paddle/phi/kernels/onednn/interpolate_kernel.cc @@ -33,7 +33,7 @@ KernelKey InterpolateGetKernelTypeForVar( (tensor.layout() != DataLayout::ONEDNN)) { auto it = attrs.find("data_layout"); const std::string data_layout = PADDLE_GET_CONST(std::string, it->second); - auto dl = StringToDataLayout(data_layout); + auto dl = common::StringToDataLayout(data_layout); // Some models may have intentionally set "AnyLayout" for pool // op. Treat this as NCHW (default data_format value) if (dl != DataLayout::kAnyLayout) { @@ -62,7 +62,7 @@ class InterpolateOneDNNHandler DenseTensor* out) : OneDNNHandlerNoCachingT(engine, cpu_place) { - const auto dst_tz = vectorize(out->dims()); + const auto dst_tz = common::vectorize(out->dims()); const auto dst_md = dnnl::memory::desc( dst_tz, OneDNNGetDataType(), OneDNNMemoryFormat::any); this->AcquireForwardPrimitiveDescriptor( @@ -126,7 +126,7 @@ std::vector ComputeOutputShape( if (scale.size() == 3 && scale[0] > 0.0f && scale[1] > 0.0f && scale[2] > 0.0f) { int j = 0; - std::vector in_dhw_vec = vectorize(in_dhw_dims); + std::vector in_dhw_vec = common::vectorize(in_dhw_dims); std::transform( in_dhw_vec.begin(), in_dhw_vec.end(), @@ -176,7 +176,7 @@ void InterpolateKernel( out_h, out_w, scale); - DDim dim_out = make_ddim(out_dims_vec); + DDim dim_out = common::make_ddim(out_dims_vec); out->Resize(dim_out); funcs::InterpolateOneDNNHandler handler( diff --git a/paddle/phi/kernels/onednn/matmul_grad_kernel.cc b/paddle/phi/kernels/onednn/matmul_grad_kernel.cc index 0dcc7195800c1d..3866a2d06ae45c 100644 --- a/paddle/phi/kernels/onednn/matmul_grad_kernel.cc +++ b/paddle/phi/kernels/onednn/matmul_grad_kernel.cc @@ -74,9 +74,9 @@ void CalculateGradMatrixDims(const OneDNNContext &dev_ctx, } } - dx_tmp->Resize(make_ddim(*dx_bd_dims)); + dx_tmp->Resize(common::make_ddim(*dx_bd_dims)); dev_ctx.template Alloc(dx_tmp); - dy_tmp->Resize(make_ddim(*dy_bd_dims)); + dy_tmp->Resize(common::make_ddim(*dy_bd_dims)); dev_ctx.template Alloc(dy_tmp); } @@ -117,9 +117,9 @@ void MatmulGradKernel(const Context &dev_ctx, bool transpose_y, DenseTensor *dx, DenseTensor *dy) { - auto x_dims = vectorize(x.dims()); - auto y_dims = vectorize(y.dims()); - auto dout_dims = vectorize(dout.dims()); + auto x_dims = common::vectorize(x.dims()); + auto y_dims = common::vectorize(y.dims()); + auto dout_dims = common::vectorize(dout.dims()); size_t ndims = std::max(x_dims.size(), y_dims.size()); ndims = std::max(ndims, 3); diff --git a/paddle/phi/kernels/onednn/matmul_kernel.cc b/paddle/phi/kernels/onednn/matmul_kernel.cc index 4a7081dfac0254..d11cf70eaa0251 100644 --- a/paddle/phi/kernels/onednn/matmul_kernel.cc +++ b/paddle/phi/kernels/onednn/matmul_kernel.cc @@ -77,7 +77,7 @@ void CalculateMatrixDims(const std::vector &x_dims, } if (x_dims.size() > 2 && y_dims.size() > 2) { - auto out_dims = vectorize(out->dims()); + auto out_dims = common::vectorize(out->dims()); for (size_t i = 0; i < (*x_bd_dims).size() - 2; ++i) { PADDLE_ENFORCE_EQ( (*x_bd_dims)[i] == (*y_bd_dims)[i] || (*x_bd_dims)[i] == 1 || @@ -93,7 +93,7 @@ void CalculateMatrixDims(const std::vector &x_dims, (*y_bd_dims)[i])); (out_dims)[i] = std::max((*x_bd_dims)[i], (*y_bd_dims)[i]); } - out->Resize(make_ddim((out_dims))); + out->Resize(common::make_ddim((out_dims))); } } @@ -123,8 +123,8 @@ void MatmulKernel(const Context &dev_ctx, ? PADDLE_GET_CONST(bool, dev_ctx.GetDnnAttr("force_fp32_output")) : false; - auto x_dims = vectorize(x.dims()); - auto y_dims = vectorize(y.dims()); + auto x_dims = common::vectorize(x.dims()); + auto y_dims = common::vectorize(y.dims()); int ndims = std::max(x_dims.size(), y_dims.size()); ndims = std::max(ndims, 3); @@ -374,7 +374,7 @@ class MulPrimitiveFactory { const DenseTensor *tensor, funcs::OneDNNMemoryFormat format, memory::data_type type = funcs::OneDNNGetDataType()) { - auto dims = vectorize(tensor->dims()); + auto dims = common::vectorize(tensor->dims()); return funcs::OneDNNMemDesc(dims, type, format); } @@ -423,7 +423,7 @@ class MulPrimitiveFactory { } memory TransposeInputY(const DenseTensor *input_y) { - auto dims = vectorize(input_y->dims()); + auto dims = common::vectorize(input_y->dims()); std::swap(dims[0], dims[1]); // Correct output dimensions auto src_desc = CreateMemDescriptor(dims, funcs::OneDNNMemoryFormat::io); @@ -451,9 +451,9 @@ std::shared_ptr> GetPrimitiveFactory( const engine &onednn_engine) { std::string key = funcs::CreateKey(dev_ctx, phi::TransToProtoVarType(input_x->dtype()), - vectorize(input_x->dims()), + common::vectorize(input_x->dims()), phi::TransToProtoVarType(input_y->dtype()), - vectorize(input_y->dims()), + common::vectorize(input_y->dims()), dev_ctx.GetOutputsName("Out")[0]); key = funcs::ExtendKeyWithThreadInfoIfNeeded(dev_ctx, key); @@ -527,8 +527,8 @@ void MatmulWithFlattenKernelINT8(const Context &dev_ctx, mul.get_primitive_desc(), dnnl_query_dst_md, 0); dnnl_memory_desc_t cloned_in_md = nullptr; dnnl_memory_desc_clone(&cloned_in_md, in_md); - out->set_mem_desc( - memory::desc(cloned_in_md).reshape(vectorize(out->dims()))); + out->set_mem_desc(memory::desc(cloned_in_md) + .reshape(common::vectorize(out->dims()))); } template diff --git a/paddle/phi/kernels/onednn/pad3d_kernel.cc b/paddle/phi/kernels/onednn/pad3d_kernel.cc index 2a489c258889a5..c184dd4cbf4a1e 100644 --- a/paddle/phi/kernels/onednn/pad3d_kernel.cc +++ b/paddle/phi/kernels/onednn/pad3d_kernel.cc @@ -30,7 +30,7 @@ KernelKey Pad3dGetKernelTypeForVar(const GetKernelTypeForVarContext* ctx) { auto it = attrs.find("data_format"); const std::string data_format = PADDLE_GET_CONST(std::string, it->second); return phi::KernelKey(tensor.place(), - phi::StringToDataLayout(data_format), + common::StringToDataLayout(data_format), expected_kernel_type.dtype()); } #endif diff --git a/paddle/phi/kernels/onednn/pad_kernel_impl.h b/paddle/phi/kernels/onednn/pad_kernel_impl.h index eabe18855b796e..0c360e1dabbc31 100644 --- a/paddle/phi/kernels/onednn/pad_kernel_impl.h +++ b/paddle/phi/kernels/onednn/pad_kernel_impl.h @@ -112,7 +112,7 @@ void PadOpKernel(const Context& dev_ctx, const auto& onednn_engine = dev_ctx.GetEngine(); auto& astream = OneDNNContext::tls().get_stream(); - std::vector x_tz = vectorize(x.dims()); + std::vector x_tz = common::vectorize(x.dims()); // due to the need of supporting NDHWC, inferring out shape // must be done inside the kernel std::vector out_tz(x_tz); @@ -120,7 +120,7 @@ void PadOpKernel(const Context& dev_ctx, for (size_t i = 0; i < paddings.size() / 2; ++i) { out_tz[out_tz.size() - 1 - i] += paddings[2 * i] + paddings[2 * i + 1]; } - out->Resize(make_ddim(out_tz)); + out->Resize(common::make_ddim(out_tz)); funcs::ReorderOneDNNHandler reorder_handler( x_tz, x.dtype(), funcs::ToOneDNNDataType(x.dtype()), onednn_engine); diff --git a/paddle/phi/kernels/onednn/pool_grad_kernel.cc b/paddle/phi/kernels/onednn/pool_grad_kernel.cc index 037c6f1b7f35c7..f5b10186a4ebc6 100644 --- a/paddle/phi/kernels/onednn/pool_grad_kernel.cc +++ b/paddle/phi/kernels/onednn/pool_grad_kernel.cc @@ -83,7 +83,7 @@ phi::KernelKey PoolOpGradGetKernelTypeForVar( auto it = attrs.find("data_format"); const std::string data_format = PADDLE_GET_CONST(std::string, it->second); return phi::KernelKey(tensor.place(), - phi::StringToDataLayout(data_format), + common::StringToDataLayout(data_format), expected_kernel_type.dtype()); } #endif diff --git a/paddle/phi/kernels/onednn/pool_kernel.cc b/paddle/phi/kernels/onednn/pool_kernel.cc index 4d853421267a13..655cd67ab52df8 100644 --- a/paddle/phi/kernels/onednn/pool_kernel.cc +++ b/paddle/phi/kernels/onednn/pool_kernel.cc @@ -81,7 +81,7 @@ phi::KernelKey PoolOpGetKernelTypeForVar( const AttributeMap& attrs = ctx->GetAttrs(); auto it = attrs.find("data_format"); const std::string data_format = PADDLE_GET_CONST(std::string, it->second); - auto dl = phi::StringToDataLayout(data_format); + auto dl = common::StringToDataLayout(data_format); // Some models may have intentionally set "AnyLayout" for pool // op. Treat this as NCHW (default data_format value) if (dl != phi::DataLayout::kAnyLayout) { diff --git a/paddle/phi/kernels/onednn/reduce_kernel_impl.h b/paddle/phi/kernels/onednn/reduce_kernel_impl.h index 1381b37d57107e..8542bce6437e2a 100644 --- a/paddle/phi/kernels/onednn/reduce_kernel_impl.h +++ b/paddle/phi/kernels/onednn/reduce_kernel_impl.h @@ -23,11 +23,11 @@ inline std::vector CalculateReducedDims( const std::vector& reduce_dims, // NOLINT bool reduce_all, bool keep_dim) { - if (keep_dim) return vectorize(output->dims()); + if (keep_dim) return common::vectorize(output->dims()); if (reduce_all) return std::vector(input->dims().size(), 1); - std::vector output_dims(vectorize(input->dims())); + std::vector output_dims(common::vectorize(input->dims())); for (size_t i = 0; i < reduce_dims.size(); ++i) { // handle negative dims, f.e. "-1" means rightmost dimension int index = (reduce_dims[i] >= 0) ? reduce_dims[i] @@ -48,7 +48,7 @@ void ReduceKernel(const Context& dev_ctx, dnnl::algorithm reduction_type) { reduce_all = recompute_reduce_all(x, dims, reduce_all); const auto& onednn_engine = dev_ctx.GetEngine(); - auto x_tz = vectorize(x.dims()); + auto x_tz = common::vectorize(x.dims()); auto out_tz = CalculateReducedDims(&x, out, dims.GetData(), reduce_all, keep_dim); @@ -78,7 +78,7 @@ void ReduceKernel(const Context& dev_ctx, astream.wait(); const auto reshape_dims = out->dims().size() != 0 - ? vectorize(out->dims()) + ? common::vectorize(out->dims()) : std::vector{1}; out->set_mem_desc(reorder_dst_memory_p->get_desc().reshape(reshape_dims)); } else { @@ -103,7 +103,7 @@ void ReduceKernel(const Context& dev_ctx, astream.wait(); const auto reshape_dims = out->dims().size() != 0 - ? vectorize(out->dims()) + ? common::vectorize(out->dims()) : std::vector{1}; out->set_mem_desc(dst_memory_p->get_desc().reshape(reshape_dims)); } @@ -125,7 +125,7 @@ void ReduceGradKernel(const Context& dev_ctx, const auto& onednn_engine = dev_ctx.GetEngine(); auto out_grad_tz = CalculateReducedDims( x_grad, &out_grad, dims.GetData(), reduce_all, keep_dim); - auto x_grad_tz = vectorize(x_grad->dims()); + auto x_grad_tz = common::vectorize(x_grad->dims()); funcs::BroadcastDataOneDNNHandler handler(binary_type, onednn_engine, diff --git a/paddle/phi/kernels/onednn/reduce_mean_grad_kernel.cc b/paddle/phi/kernels/onednn/reduce_mean_grad_kernel.cc index fd566782b182e7..e59a02c1b1a102 100644 --- a/paddle/phi/kernels/onednn/reduce_mean_grad_kernel.cc +++ b/paddle/phi/kernels/onednn/reduce_mean_grad_kernel.cc @@ -26,7 +26,7 @@ void MeanGradKernel(const Context& dev_ctx, bool reduce_all, DenseTensor* x_grad) { reduce_all = recompute_reduce_all(x, dims, reduce_all); - auto input_dims = phi::vectorize(x.dims()); + auto input_dims = common::vectorize(x.dims()); std::vector reduce_dims = dims.GetData(); int number_of_elements = 1; if (reduce_all == false) { diff --git a/paddle/phi/kernels/onednn/reshape_kernel.cc b/paddle/phi/kernels/onednn/reshape_kernel.cc index 47e1fad0936b29..c7d83ba53569bd 100644 --- a/paddle/phi/kernels/onednn/reshape_kernel.cc +++ b/paddle/phi/kernels/onednn/reshape_kernel.cc @@ -17,7 +17,7 @@ namespace phi { static DDim ValidateShape(const std::vector& shape, const DDim& in_dims) { const int64_t in_size = product(in_dims); - auto in_dims_vec = vectorize(in_dims); + auto in_dims_vec = common::vectorize(in_dims); bool all_positive = std::all_of(in_dims_vec.cbegin(), in_dims_vec.cend(), [](int64_t i) { return i > 0; }); @@ -37,7 +37,7 @@ static DDim ValidateShape(const std::vector& shape, errors::InvalidArgument( "Only one dimension value of 'shape' in ReshapeOp can " "be -1. But received shape = [%s], shape[%d] is also -1.", - make_ddim(shape), + common::make_ddim(shape), i)); unk_dim_idx = i; } else if (shape[i] == copy_dim_val) { @@ -49,7 +49,7 @@ static DDim ValidateShape(const std::vector& shape, "the input tensor X's dimensions. " "But received shape = [%s], shape[%d] = 0, X's shape = [%s], " "X's dimensions = %d.", - make_ddim(shape), + common::make_ddim(shape), i, in_dims, in_dims.size())); @@ -61,7 +61,7 @@ static DDim ValidateShape(const std::vector& shape, "Each dimension value of 'shape' in ReshapeOp must not " "be negative except one unknown dimension. " "But received shape = [%s], shape[%d] = %d.", - make_ddim(shape), + common::make_ddim(shape), i, shape[i])); } @@ -88,7 +88,7 @@ static DDim ValidateShape(const std::vector& shape, "'shape' is [%s], known capacity of 'shape' is %d.", in_dims, in_size, - make_ddim(shape), + common::make_ddim(shape), capacity)); } else { output_shape[unk_dim_idx] = -1; @@ -106,11 +106,11 @@ static DDim ValidateShape(const std::vector& shape, "[%s], the capacity of 'shape' is %d.", in_dims, in_size, - make_ddim(shape), + common::make_ddim(shape), capacity)); } } - return make_ddim(output_shape); + return common::make_ddim(output_shape); } template @@ -143,8 +143,8 @@ void ExecuteReshape(const Context& dev_ctx, astream.wait(); out->Resize(out_dims); - const auto reshape_dims = - out_dims.size() != 0 ? vectorize(out_dims) : std::vector{1}; + const auto reshape_dims = out_dims.size() != 0 ? common::vectorize(out_dims) + : std::vector{1}; out->set_mem_desc(reorder_dst_memory_p->get_desc().reshape(reshape_dims)); } diff --git a/paddle/phi/kernels/onednn/shape_kernel.cc b/paddle/phi/kernels/onednn/shape_kernel.cc index 51b35ae4a3c0c7..0d3b6eda6700f2 100644 --- a/paddle/phi/kernels/onednn/shape_kernel.cc +++ b/paddle/phi/kernels/onednn/shape_kernel.cc @@ -30,9 +30,9 @@ void ShapeKernel(const Context& dev_ctx, // allocated if (OneDNNContext::tls().get_cur_paddle_data_layout() == DataLayout::kNHWC && x_dims.size() >= 3) { - auto rdims = vectorize(x_dims); + auto rdims = common::vectorize(x_dims); std::rotate(rdims.begin() + 1, rdims.begin() + 2, rdims.end()); - x_dims = make_ddim(rdims); + x_dims = common::make_ddim(rdims); } out->Resize({x_dims.size()}); @@ -42,7 +42,7 @@ void ShapeKernel(const Context& dev_ctx, } dnnl::memory::desc out_mem_desc( - vectorize(out->dims()), + common::vectorize(out->dims()), funcs::ToOneDNNDataType(out->dtype()), funcs::GetPlainOneDNNFormat(out->dims().size())); out->set_mem_desc(out_mem_desc); diff --git a/paddle/phi/kernels/onednn/slice_grad_kernel.cc b/paddle/phi/kernels/onednn/slice_grad_kernel.cc index bc8776362193a6..7f8f6b815b4f0e 100644 --- a/paddle/phi/kernels/onednn/slice_grad_kernel.cc +++ b/paddle/phi/kernels/onednn/slice_grad_kernel.cc @@ -31,7 +31,7 @@ void SliceGradKernel(const Context& dev_ctx, DenseTensor* input_grad) { const auto& onednn_engine = dev_ctx.GetEngine(); - auto dx_dims = vectorize(input_grad->dims()); + auto dx_dims = common::vectorize(input_grad->dims()); auto starts_vec = starts.GetData(); auto ends_vec = ends.GetData(); diff --git a/paddle/phi/kernels/onednn/slice_kernel.cc b/paddle/phi/kernels/onednn/slice_kernel.cc index 9b098a3f14119e..bd59d61c17e794 100644 --- a/paddle/phi/kernels/onednn/slice_kernel.cc +++ b/paddle/phi/kernels/onednn/slice_kernel.cc @@ -30,7 +30,7 @@ void SliceKernel(const Context& dev_ctx, DenseTensor* out) { const auto& onednn_engine = dev_ctx.GetEngine(); - auto x_vec_dims = vectorize(x.dims()); + auto x_vec_dims = common::vectorize(x.dims()); auto starts_vec = starts.GetData(); auto ends_vec = ends.GetData(); @@ -48,7 +48,7 @@ void SliceKernel(const Context& dev_ctx, std::max(static_cast(0), ends_vec[i] - starts_vec[i]); } - out->Resize(make_ddim(slice_dims)); + out->Resize(common::make_ddim(slice_dims)); // Note(0x45f): To support slice Tensors with shapes like [0, 0, 0]. if (!x.initialized()) { @@ -93,7 +93,7 @@ void SliceKernel(const Context& dev_ctx, } astream.wait(); - out->Resize(make_ddim(new_out_dims)); + out->Resize(common::make_ddim(new_out_dims)); out->set_mem_desc(reorder_dst_memory_p->get_desc().reshape(new_out_dims)); } diff --git a/paddle/phi/kernels/onednn/split_kernel.cc b/paddle/phi/kernels/onednn/split_kernel.cc index a700bc016cd807..cf0cd1d62a0200 100644 --- a/paddle/phi/kernels/onednn/split_kernel.cc +++ b/paddle/phi/kernels/onednn/split_kernel.cc @@ -48,7 +48,7 @@ void SplitKernel(const Context& dev_ctx, auto outs_number = out.size(); const auto x_dims = x.dims(); - auto x_vec_dims = vectorize(x_dims); + auto x_vec_dims = common::vectorize(x_dims); dnnl::memory::data_type x_type = funcs::ToOneDNNDataType(x.dtype()); @@ -61,7 +61,7 @@ void SplitKernel(const Context& dev_ctx, x.mem_desc(), funcs::to_void_cast(x.data())); for (size_t i = 0; i < outs_number; ++i) { - auto out_vec_dims = vectorize(out[i]->dims()); + auto out_vec_dims = common::vectorize(out[i]->dims()); auto slice_mem_p = reorder_handler.AcquireSubmemory( out_vec_dims, offset, reorder_src_memory_p); diff --git a/paddle/phi/kernels/onednn/squeeze_grad_kernel.cc b/paddle/phi/kernels/onednn/squeeze_grad_kernel.cc index 724335b7e51af7..d8ff4e72c1b117 100644 --- a/paddle/phi/kernels/onednn/squeeze_grad_kernel.cc +++ b/paddle/phi/kernels/onednn/squeeze_grad_kernel.cc @@ -25,7 +25,7 @@ void SqueezeGradKernel(const Context& dev_ctx, const DenseTensor& dout, const IntArray& axes UNUSED, DenseTensor* dx) { - auto dout_vec_dims = dout.dims().size() != 0 ? vectorize(dout.dims()) + auto dout_vec_dims = dout.dims().size() != 0 ? common::vectorize(dout.dims()) : std::vector{1}; auto dout_type = funcs::ToOneDNNDataType(dout.dtype()); @@ -48,7 +48,7 @@ void SqueezeGradKernel(const Context& dev_ctx, auto dx_dims = slice_ddim(xshape.dims(), 1, xshape.dims().size()); dx->Resize(dx_dims); - reorder_dst_memory_p->get_desc().reshape(vectorize(dx_dims)); + reorder_dst_memory_p->get_desc().reshape(common::vectorize(dx_dims)); } } // namespace phi diff --git a/paddle/phi/kernels/onednn/squeeze_kernel.cc b/paddle/phi/kernels/onednn/squeeze_kernel.cc index 9b86f9e1a9c74d..2de2cbb2ecbab8 100644 --- a/paddle/phi/kernels/onednn/squeeze_kernel.cc +++ b/paddle/phi/kernels/onednn/squeeze_kernel.cc @@ -26,7 +26,7 @@ void ExecuteSqueeze(const Context& dev_ctx, const DDim& x_dims, const DDim& out_dims, DenseTensor* out) { - auto x_vec_dims = vectorize(x_dims); + auto x_vec_dims = common::vectorize(x_dims); funcs::ReorderOneDNNHandler reorder_handler( x_vec_dims, @@ -48,8 +48,8 @@ void ExecuteSqueeze(const Context& dev_ctx, out->Resize(out_dims); - auto reshape_dims = - out_dims.size() != 0 ? vectorize(out_dims) : std::vector{1}; + auto reshape_dims = out_dims.size() != 0 ? common::vectorize(out_dims) + : std::vector{1}; out->set_mem_desc(reorder_dst_memory_p->get_desc().reshape(reshape_dims)); } diff --git a/paddle/phi/kernels/onednn/stack_kernel.cc b/paddle/phi/kernels/onednn/stack_kernel.cc index 130f91ddf3195e..c6a478c62183ac 100644 --- a/paddle/phi/kernels/onednn/stack_kernel.cc +++ b/paddle/phi/kernels/onednn/stack_kernel.cc @@ -36,7 +36,7 @@ class StackOneDNNHandler : public OneDNNHandlerNoCachingT { } // in stack op all inputs must have same dims - auto input_dims = vectorize(inputs[0]->dims()); + auto input_dims = common::vectorize(inputs[0]->dims()); dnnl::memory::data_type dt = ToOneDNNDataType(inputs[0]->dtype()); std::vector srcs_md; @@ -56,7 +56,7 @@ class StackOneDNNHandler : public OneDNNHandlerNoCachingT { input_dims[stack_axis] *= inputs.size(); dst_md = dnnl::memory::desc(input_dims, dt, OneDNNMemoryFormat::any); } else { - auto extended_input_dims = vectorize(output->dims()); + auto extended_input_dims = common::vectorize(output->dims()); extended_input_dims[stack_axis] = 1; for (auto input : inputs) { @@ -67,7 +67,8 @@ class StackOneDNNHandler : public OneDNNHandlerNoCachingT { // distinguish between f.e. abcd and abdc if last dim is equal to 1 so // enforcing is needed for better performance dst_fmt = GetPlainOneDNNFormat(extended_input_dims.size()); - dst_md = dnnl::memory::desc(vectorize(output->dims()), dt, dst_fmt); + dst_md = + dnnl::memory::desc(common::vectorize(output->dims()), dt, dst_fmt); } this->AcquireForwardPrimitiveDescriptor(dst_md, stack_axis, srcs_md); @@ -109,7 +110,8 @@ void StackKernel(const Context& dev_ctx, concat_p->execute(astream, args); astream.wait(); - output->set_mem_desc(dst_mem->get_desc().reshape(vectorize(output->dims()))); + output->set_mem_desc( + dst_mem->get_desc().reshape(common::vectorize(output->dims()))); } } // namespace phi diff --git a/paddle/phi/kernels/onednn/transpose_grad_kernel.cc b/paddle/phi/kernels/onednn/transpose_grad_kernel.cc index ca969b5625b5af..24b63dd1dbce7e 100644 --- a/paddle/phi/kernels/onednn/transpose_grad_kernel.cc +++ b/paddle/phi/kernels/onednn/transpose_grad_kernel.cc @@ -36,7 +36,7 @@ void TransposeGradKernel(const Context& dev_ctx, return; } - std::vector out_grad_tz = vectorize(out_grad.dims()); + std::vector out_grad_tz = common::vectorize(out_grad.dims()); funcs::ReorderOneDNNHandler reorder_handler( out_grad_tz, out_grad.dtype(), diff --git a/paddle/phi/kernels/onednn/transpose_kernel.cc b/paddle/phi/kernels/onednn/transpose_kernel.cc index fe744ffef1ca43..44449fa0ea642e 100644 --- a/paddle/phi/kernels/onednn/transpose_kernel.cc +++ b/paddle/phi/kernels/onednn/transpose_kernel.cc @@ -40,7 +40,7 @@ void TransposeKernel(const Context& dev_ctx, formated_axis[i] = axis[i] + axis_size; } } - auto dims = phi::vectorize(x_dims); + auto dims = common::vectorize(x_dims); std::rotate(dims.begin() + 1, dims.begin() + 2, dims.end()); x_dims = x_dims.reshape(dims); @@ -65,7 +65,7 @@ void TransposeKernel(const Context& dev_ctx, return; } - auto x_vec_dims = vectorize(x.dims()); + auto x_vec_dims = common::vectorize(x.dims()); auto x_type = funcs::ToOneDNNDataType(x.dtype()); funcs::ReorderOneDNNHandler reorder_handler( x_vec_dims, x.dtype(), x_type, dev_ctx.GetEngine()); diff --git a/paddle/phi/kernels/primitive/datamover_primitives.h b/paddle/phi/kernels/primitive/datamover_primitives.h index 2a3579d99cfe67..a78045aa0dc7ca 100644 --- a/paddle/phi/kernels/primitive/datamover_primitives.h +++ b/paddle/phi/kernels/primitive/datamover_primitives.h @@ -20,7 +20,7 @@ #ifdef PADDLE_WITH_HIP #include #endif -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" namespace phi { namespace kps { diff --git a/paddle/phi/kernels/selected_rows/elementwise_multiply_kernel.cc b/paddle/phi/kernels/selected_rows/elementwise_multiply_kernel.cc index dccbba6947a1be..74d2bdc0a673fa 100644 --- a/paddle/phi/kernels/selected_rows/elementwise_multiply_kernel.cc +++ b/paddle/phi/kernels/selected_rows/elementwise_multiply_kernel.cc @@ -32,7 +32,7 @@ void MultiplyRawKernel(const Context& dev_ctx, int axis, SelectedRows* out) { PADDLE_ENFORCE_EQ( - phi::product(y.dims()), + common::product(y.dims()), 1, phi::errors::InvalidArgument("For MultiplyKernel, if X is Sparse, Y must " "contain only one element.")); diff --git a/paddle/phi/kernels/selected_rows/impl/add_n_kernel_impl.h b/paddle/phi/kernels/selected_rows/impl/add_n_kernel_impl.h index 3fd42fb53b5f76..941878bf419895 100644 --- a/paddle/phi/kernels/selected_rows/impl/add_n_kernel_impl.h +++ b/paddle/phi/kernels/selected_rows/impl/add_n_kernel_impl.h @@ -82,7 +82,7 @@ void AddNKernel(const Context &dev_ctx, // no data, just set a empty out tensor. auto *out_dense = out->mutable_value(); out_dense->clear(); - out_dense->Resize(phi::make_ddim({0})); + out_dense->Resize(common::make_ddim({0})); dev_ctx.template Alloc(out_dense); } } diff --git a/paddle/phi/kernels/selected_rows/impl/lamb_kernel_impl.h b/paddle/phi/kernels/selected_rows/impl/lamb_kernel_impl.h index 0437a48a4c8083..216ab6b55b5242 100644 --- a/paddle/phi/kernels/selected_rows/impl/lamb_kernel_impl.h +++ b/paddle/phi/kernels/selected_rows/impl/lamb_kernel_impl.h @@ -293,11 +293,11 @@ void ComputeRowImpl(const Context& dev_ctx, // paddle/phi/kernels/impl/lamb_kernel_impl.h Please modify it together // DenseTensor p_norm_t; - // p_norm_t.Resize(phi::make_ddim({1})); + // p_norm_t.Resize(common::make_ddim({1})); // auto* p_norm_ptr = dev_ctx.template Alloc(&p_norm_t); // DenseTensor trust_ratio_div_norm_t; - // trust_ratio_div_norm_t.Resize(phi::make_ddim({1})); + // trust_ratio_div_norm_t.Resize(common::make_ddim({1})); // auto* trust_ratio_div_norm_ptr = // dev_ctx.template Alloc(&trust_ratio_div_norm_t); diff --git a/paddle/phi/kernels/sparse/cpu/coalesce_kernel.cc b/paddle/phi/kernels/sparse/cpu/coalesce_kernel.cc index 045b620b9ea957..7a2e2ef96b6e07 100644 --- a/paddle/phi/kernels/sparse/cpu/coalesce_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/coalesce_kernel.cc @@ -63,9 +63,9 @@ void CoalesceCooCPUKernel(const CPUContext& dev_ctx, out_indices.Resize({x_indices.dims()[0], out_nnz}); if (out_values.dims().size() == 1) { - out_values.Resize(phi::make_ddim({out_nnz})); + out_values.Resize(common::make_ddim({out_nnz})); } else { - out_values.Resize(phi::make_ddim({out_nnz, x_values.dims()[1]})); + out_values.Resize(common::make_ddim({out_nnz, x_values.dims()[1]})); } IntT* out_indices_ptr = out_indices.data(); diff --git a/paddle/phi/kernels/sparse/cpu/conv_kernel.cc b/paddle/phi/kernels/sparse/cpu/conv_kernel.cc index 15b1a54640dbc3..9f51885a94e1c0 100644 --- a/paddle/phi/kernels/sparse/cpu/conv_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/conv_kernel.cc @@ -53,7 +53,7 @@ void Conv3dCooCPUKernel(const CPUContext& dev_ctx, int count_tmp = is2D ? 4 : 5; std::vector out_dims_vec(count_tmp, 1); - DDim out_dims = make_ddim(out_dims_vec); + DDim out_dims = common::make_ddim(out_dims_vec); std::vector kernel_sizes(kernel_dims.size()); for (int i = 0; i < kernel_dims.size(); i++) { diff --git a/paddle/phi/kernels/sparse/cpu/elementwise_kernel.cc b/paddle/phi/kernels/sparse/cpu/elementwise_kernel.cc index 72e3d00962b5dc..daa29515dc77b8 100644 --- a/paddle/phi/kernels/sparse/cpu/elementwise_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/elementwise_kernel.cc @@ -237,14 +237,14 @@ void ElementWiseCooKernelImpl(const Context& dev_ctx, } else { DenseTensorMeta indices_meta( phi::CppTypeToDataType::Type(), - phi::make_ddim( + common::make_ddim( {static_cast(sparse_dim), static_cast(nnz)}), DataLayout::NCHW); - auto indeces_dim = - vectorize(slice_ddim(x.values().dims(), 1, x.values().dims().size())); + auto indeces_dim = common::vectorize( + slice_ddim(x.values().dims(), 1, x.values().dims().size())); indeces_dim.insert(indeces_dim.begin(), nnz); DenseTensorMeta values_meta( - x.dtype(), phi::make_ddim(indeces_dim), DataLayout::NCHW); + x.dtype(), common::make_ddim(indeces_dim), DataLayout::NCHW); phi::DenseTensor out_indices = phi::Empty(dev_ctx, std::move(indices_meta)); phi::DenseTensor out_values = phi::Empty(dev_ctx, std::move(values_meta)); diff --git a/paddle/phi/kernels/sparse/cpu/mask_kernel.cc b/paddle/phi/kernels/sparse/cpu/mask_kernel.cc index d4e240d5e82039..b92ebccbefbc80 100644 --- a/paddle/phi/kernels/sparse/cpu/mask_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/mask_kernel.cc @@ -14,8 +14,8 @@ limitations under the License. */ #include "paddle/phi/kernels/sparse/mask_kernel.h" +#include "paddle/common/ddim.h" #include "paddle/phi/api/ext/dispatch.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" diff --git a/paddle/phi/kernels/sparse/cpu/reshape_grad_kernel.cc b/paddle/phi/kernels/sparse/cpu/reshape_grad_kernel.cc index fc843f81c31ee1..8b3949badb77da 100644 --- a/paddle/phi/kernels/sparse/cpu/reshape_grad_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/reshape_grad_kernel.cc @@ -29,7 +29,7 @@ void ReshapeCooGradKernel(const Context& dev_ctx, const SparseCooTensor& dout, SparseCooTensor* dx) { EmptyLikeCooKernel(dev_ctx, x, dx); - phi::IntArray x_shape(phi::vectorize(x.dims())); + phi::IntArray x_shape(common::vectorize(x.dims())); ReshapeCooKernel(dev_ctx, dout, x_shape, dx); } @@ -39,7 +39,7 @@ void ReshapeCsrGradKernel(const Context& dev_ctx, const SparseCsrTensor& dout, SparseCsrTensor* dx) { EmptyLikeCsrKernel(dev_ctx, x, dx); - phi::IntArray x_shape(phi::vectorize(x.dims())); + phi::IntArray x_shape(common::vectorize(x.dims())); ReshapeCsrKernel(dev_ctx, dout, x_shape, dx); } diff --git a/paddle/phi/kernels/sparse/cpu/reshape_kernel.cc b/paddle/phi/kernels/sparse/cpu/reshape_kernel.cc index e8badf3d6e8248..4d5d4285baceef 100644 --- a/paddle/phi/kernels/sparse/cpu/reshape_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/reshape_kernel.cc @@ -14,7 +14,7 @@ #include "paddle/phi/kernels/sparse/unary_kernel.h" -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" #include "paddle/phi/kernels/sparse/sparse_utils_kernel.h" #include "paddle/phi/backends/cpu/cpu_context.h" @@ -59,9 +59,9 @@ void ReshapeCooCPUKernel(const Context& dev_ctx, auto* out_indices_data = out_indices.data(); const phi::DDim& x_sparse_part_strides = - phi::stride(phi::make_ddim(x_sparse_part_dims)); + common::stride(common::make_ddim(x_sparse_part_dims)); const phi::DDim& out_sparse_part_strides = - phi::stride(phi::make_ddim(out_sparse_part_dims)); + common::stride(common::make_ddim(out_sparse_part_dims)); int64_t location = 0; for (int64_t j = 0; j < x_nnz; ++j) { location = 0; diff --git a/paddle/phi/kernels/sparse/cpu/slice_kernel.cc b/paddle/phi/kernels/sparse/cpu/slice_kernel.cc index c40be8a9b15799..81af8339f88a91 100644 --- a/paddle/phi/kernels/sparse/cpu/slice_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/slice_kernel.cc @@ -14,8 +14,8 @@ #include "paddle/phi/kernels/sparse/unary_kernel.h" +#include "paddle/common/ddim.h" #include "paddle/phi/backends/cpu/cpu_context.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/funcs/slice_utils.h" diff --git a/paddle/phi/kernels/sparse/cpu/softmax_grad_kernel.cc b/paddle/phi/kernels/sparse/cpu/softmax_grad_kernel.cc index 3c55d12dcf051d..16362bcff8561c 100644 --- a/paddle/phi/kernels/sparse/cpu/softmax_grad_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/softmax_grad_kernel.cc @@ -100,7 +100,7 @@ void SoftmaxCooGradCPUKernel(const Context& dev_ctx, auto out_values = out.values(); const auto out_dims = out.dims(); auto sparse_dim = out.sparse_dim(); - auto sizes = phi::vectorize(out_dims); + auto sizes = common::vectorize(out_dims); auto grad_indices = dout.indices(); auto grad_values = dout.values(); auto grad_nnz = dout.nnz(); @@ -136,13 +136,13 @@ void SoftmaxCooGradCPUKernel(const Context& dev_ctx, std::multiplies<>()); DenseTensor values_2(*values); - values_2.Resize(phi::make_ddim({nnz, nvalues})); + values_2.Resize(common::make_ddim({nnz, nvalues})); DenseTensor out_values_2(out_values); - out_values_2.Resize(phi::make_ddim({nnz, nvalues})); + out_values_2.Resize(common::make_ddim({nnz, nvalues})); DenseTensor grad_values_2(grad_values); - grad_values_2.Resize(phi::make_ddim({nnz, nvalues})); + grad_values_2.Resize(common::make_ddim({nnz, nvalues})); std::map> pools; phi::funcs::sparse::GetPoolsSoftmax(out_indices, sizes, dim, &pools); diff --git a/paddle/phi/kernels/sparse/cpu/softmax_kernel.cc b/paddle/phi/kernels/sparse/cpu/softmax_kernel.cc index 5ccf157a9dd40b..ea790508ab1679 100644 --- a/paddle/phi/kernels/sparse/cpu/softmax_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/softmax_kernel.cc @@ -111,7 +111,7 @@ void SoftmaxCooCPUKernel(const Context& dev_ctx, return; } - const std::vector sizes = phi::vectorize(x_dims); + const std::vector sizes = common::vectorize(x_dims); std::map> pools; IntT nvalues = std::accumulate(sizes.begin() + sparse_dim, sizes.end(), diff --git a/paddle/phi/kernels/sparse/cpu/sparse_utils_kernel.cc b/paddle/phi/kernels/sparse/cpu/sparse_utils_kernel.cc index f3d26568f50682..af84f14458e616 100644 --- a/paddle/phi/kernels/sparse/cpu/sparse_utils_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/sparse_utils_kernel.cc @@ -265,7 +265,7 @@ void CooToDenseCPUKernel(const CPUContext& dev_ctx, const auto dense_dims = x.dims(); const auto indices = x.indices(); const auto values = x.values(); - const auto indices_dims = phi::vectorize(indices.dims()); + const auto indices_dims = common::vectorize(indices.dims()); int64_t sparse_dim = indices_dims[0]; if (indices_dims.size() == 1) { sparse_dim = 1; diff --git a/paddle/phi/kernels/sparse/cpu/sum_kernel.cc b/paddle/phi/kernels/sparse/cpu/sum_kernel.cc index 2b4b11bea89e45..5b96203b7a2db3 100644 --- a/paddle/phi/kernels/sparse/cpu/sum_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/sum_kernel.cc @@ -44,10 +44,10 @@ void SumCooCPUKernel(const Context& dev_ctx, if (n_dim == 0) { std::vector out_indices_shape; if (keep_dim) { - out_dims = make_ddim(std::vector(x_dims.size(), 1)); + out_dims = common::make_ddim(std::vector(x_dims.size(), 1)); out_indices_shape = {sparse_dim, 1}; } else { - out_dims = make_ddim({1}); + out_dims = common::make_ddim({1}); out_indices_shape = {1}; } out_indices = Empty(dev_ctx, out_indices_shape); @@ -70,7 +70,7 @@ void SumCooCPUKernel(const Context& dev_ctx, dims.emplace_back(1); } } - out_dims = make_ddim(dims); + out_dims = common::make_ddim(dims); if (dim >= sparse_dim) { out_indices = x_indices; @@ -160,9 +160,9 @@ void SumCsrKernel(const Context& dev_ctx, DDim out_dims; if (n_dim == 0) { if (keep_dim && x.dims().size() == 3) { - out_dims = make_ddim({1, 1, 1}); + out_dims = common::make_ddim({1, 1, 1}); } else { - out_dims = make_ddim({1, 1}); + out_dims = common::make_ddim({1, 1}); } out_crows = Empty(dev_ctx, {2}); // crows = [0, 1] auto* out_crows_data = out_crows.data(); @@ -184,7 +184,7 @@ void SumCsrKernel(const Context& dev_ctx, std::vector out_data; if (x.dims().size() == 2) { out_crows_data[0] = 0; - out_dims = make_ddim({x.dims()[0], 1}); + out_dims = common::make_ddim({x.dims()[0], 1}); for (int i = 0; i < x.dims()[0]; ++i) { if (x_crows_data[i] != x_crows_data[i + 1]) { T sum_value = 0; @@ -199,9 +199,9 @@ void SumCsrKernel(const Context& dev_ctx, } } else { if (keep_dim) { - out_dims = make_ddim({x.dims()[0], x.dims()[1], 1}); + out_dims = common::make_ddim({x.dims()[0], x.dims()[1], 1}); } else { - out_dims = make_ddim({x.dims()[0], x.dims()[1]}); + out_dims = common::make_ddim({x.dims()[0], x.dims()[1]}); } int j = 0; for (int batch = 0; batch < x.dims()[0]; ++batch) { diff --git a/paddle/phi/kernels/sparse/gpu/addmm_kernel.cu b/paddle/phi/kernels/sparse/gpu/addmm_kernel.cu index 1a43009c519b6c..d668cd518e4c70 100644 --- a/paddle/phi/kernels/sparse/gpu/addmm_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/addmm_kernel.cu @@ -16,8 +16,8 @@ limitations under the License. */ #include +#include "paddle/common/ddim.h" #include "paddle/phi/backends/gpu/gpu_context.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" @@ -35,9 +35,9 @@ void AddmmKernelImpl(const Context& dev_ctx, float alpha, DenseTensor* out) { #if CUDA_VERSION >= 11000 - std::vector input_dim = phi::vectorize(input.dims()); - std::vector x_dim = phi::vectorize(x.dims()); - std::vector y_dim = phi::vectorize(y.dims()); + std::vector input_dim = common::vectorize(input.dims()); + std::vector x_dim = common::vectorize(x.dims()); + std::vector y_dim = common::vectorize(y.dims()); auto rank = input_dim.size(); PADDLE_ENFORCE_GE( diff --git a/paddle/phi/kernels/sparse/gpu/coalesce_kernel.cu b/paddle/phi/kernels/sparse/gpu/coalesce_kernel.cu index aaed804c926576..67785d89505b4f 100644 --- a/paddle/phi/kernels/sparse/gpu/coalesce_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/coalesce_kernel.cu @@ -120,9 +120,9 @@ void CoalesceCooGPUKernel(const GPUContext& dev_ctx, out_indices.Resize({x_indices.dims()[0], out_nnz}); if (out_values.dims().size() == 1) { - out_values.Resize(phi::make_ddim({out_nnz})); + out_values.Resize(common::make_ddim({out_nnz})); } else { - out_values.Resize(phi::make_ddim({out_nnz, x_values.dims()[1]})); + out_values.Resize(common::make_ddim({out_nnz, x_values.dims()[1]})); } // 5. scatter the values diff --git a/paddle/phi/kernels/sparse/gpu/conv_kernel.cu b/paddle/phi/kernels/sparse/gpu/conv_kernel.cu index 048fef31d2fee4..8c05ae2f3ceabc 100644 --- a/paddle/phi/kernels/sparse/gpu/conv_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/conv_kernel.cu @@ -91,7 +91,7 @@ void Conv3dCooGPUKernel(const GPUContext& dev_ctx, int rank = is2D ? 4 : 5; std::vector out_dims_vec(rank, 1); - DDim out_dims = make_ddim(out_dims_vec); + DDim out_dims = common::make_ddim(out_dims_vec); std::vector kernel_sizes(kernel_dims.size()); for (int i = 0; i < kernel_dims.size(); i++) { diff --git a/paddle/phi/kernels/sparse/gpu/full_kernel.cu b/paddle/phi/kernels/sparse/gpu/full_kernel.cu index b530c3323e330a..1bad453fea8d6f 100644 --- a/paddle/phi/kernels/sparse/gpu/full_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/full_kernel.cu @@ -34,7 +34,7 @@ void FullLikeCooKernel(const Context& dev_ctx, DenseTensor* values = out->mutable_values(); phi::Full( - dev_ctx, phi::vectorize(x.values().dims()), val, values); + dev_ctx, common::vectorize(x.values().dims()), val, values); out->set_dims(x.dims()); } @@ -52,7 +52,7 @@ void FullLikeCsrKernel(const Context& dev_ctx, DenseTensor* values = out->mutable_values(); phi::Full( - dev_ctx, phi::vectorize(x.values().dims()), val, values); + dev_ctx, common::vectorize(x.values().dims()), val, values); out->set_dims(x.dims()); } diff --git a/paddle/phi/kernels/sparse/gpu/fused_attention_kernel.cu b/paddle/phi/kernels/sparse/gpu/fused_attention_kernel.cu index cd8013b4ee8399..5834c525e388c6 100644 --- a/paddle/phi/kernels/sparse/gpu/fused_attention_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/fused_attention_kernel.cu @@ -211,7 +211,8 @@ void FusedAttentionCsrKernel( q_dim[1], batch_nnz); - softmax->set_dims(phi::make_ddim({q_dim[0], q_dim[1], q_dim[2], q_dim[2]})); + softmax->set_dims( + common::make_ddim({q_dim[0], q_dim[1], q_dim[2], q_dim[2]})); MatmulCsrDenseKernel(dev_ctx, *softmax, value, out); #else PADDLE_THROW( diff --git a/paddle/phi/kernels/sparse/gpu/mask_kernel.cu b/paddle/phi/kernels/sparse/gpu/mask_kernel.cu index 3b93ff9638c052..ab367efb11fd6d 100644 --- a/paddle/phi/kernels/sparse/gpu/mask_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/mask_kernel.cu @@ -14,9 +14,9 @@ limitations under the License. */ #include "paddle/phi/kernels/sparse/mask_kernel.h" +#include "paddle/common/ddim.h" #include "paddle/phi/backends/gpu/gpu_info.h" #include "paddle/phi/backends/gpu/gpu_launch_config.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_utils.h" diff --git a/paddle/phi/kernels/sparse/gpu/matmul_grad_kernel.cu b/paddle/phi/kernels/sparse/gpu/matmul_grad_kernel.cu index 7dbdbe2acc9925..5878b6662f8771 100644 --- a/paddle/phi/kernels/sparse/gpu/matmul_grad_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/matmul_grad_kernel.cu @@ -164,7 +164,7 @@ void MaskedMatmulCsrGradKernel(const Context& dev_ctx, // dy{Dense} = x'{Dense} * dout{SparseCsr} // That is: dy'{Dense} = dout'{SparseCsr} * x{Dense} if (dy) { - std::vector trans_dim_vec = phi::vectorize(y.dims()); + std::vector trans_dim_vec = common::vectorize(y.dims()); size_t rank = trans_dim_vec.size(); std::swap(trans_dim_vec[rank - 1], trans_dim_vec[rank - 2]); DenseTensor trans_dy = phi::Empty(dev_ctx, trans_dim_vec); diff --git a/paddle/phi/kernels/sparse/gpu/matmul_kernel.cu b/paddle/phi/kernels/sparse/gpu/matmul_kernel.cu index f39209e9b8604d..9a808f5ddcc0b9 100644 --- a/paddle/phi/kernels/sparse/gpu/matmul_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/matmul_kernel.cu @@ -16,8 +16,8 @@ limitations under the License. */ #include +#include "paddle/common/ddim.h" #include "paddle/phi/backends/gpu/gpu_context.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/meta_tensor.h" @@ -38,8 +38,8 @@ void MatmulKernelImpl(const Context& dev_ctx, const DenseTensor& y, DenseTensor* out) { #if CUDA_VERSION >= 11000 || HIP_VERSION >= 402 - std::vector xdim_vec = phi::vectorize(x.dims()); - std::vector ydim_vec = phi::vectorize(y.dims()); + std::vector xdim_vec = common::vectorize(x.dims()); + std::vector ydim_vec = common::vectorize(y.dims()); auto x_ndims = xdim_vec.size(); auto y_ndims = ydim_vec.size(); PADDLE_ENFORCE_EQ( @@ -76,7 +76,7 @@ void MatmulKernelImpl(const Context& dev_ctx, out_dim_vec[y_ndims - 2] = xdim_vec[x_ndims - 2]; out_dim_vec[y_ndims - 1] = ydim_vec[y_ndims - 1]; MetaTensor meta_out(out); - meta_out.set_dims(phi::make_ddim(out_dim_vec)); + meta_out.set_dims(common::make_ddim(out_dim_vec)); meta_out.set_dtype(y.dtype()); dev_ctx.template Alloc(out); @@ -125,9 +125,9 @@ void MaskedMatmulCsrKernel(const Context& dev_ctx, const SparseCsrTensor& mask, SparseCsrTensor* out) { #if CUDA_VERSION >= 11030 - std::vector xdim_vec = phi::vectorize(x.dims()); - std::vector ydim_vec = phi::vectorize(y.dims()); - std::vector maskdim_vec = phi::vectorize(mask.dims()); + std::vector xdim_vec = common::vectorize(x.dims()); + std::vector ydim_vec = common::vectorize(y.dims()); + std::vector maskdim_vec = common::vectorize(mask.dims()); auto x_ndims = xdim_vec.size(); auto y_ndims = ydim_vec.size(); diff --git a/paddle/phi/kernels/sparse/gpu/mv_kernel.cu b/paddle/phi/kernels/sparse/gpu/mv_kernel.cu index 27f094fb0fa982..7b442eb8e1bc6e 100644 --- a/paddle/phi/kernels/sparse/gpu/mv_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/mv_kernel.cu @@ -16,8 +16,8 @@ limitations under the License. */ #include +#include "paddle/common/ddim.h" #include "paddle/phi/backends/gpu/gpu_context.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/sparse/sparse_blas.h" @@ -30,8 +30,8 @@ void MvKernelImpl(const Context& dev_ctx, const DenseTensor& vec, DenseTensor* out) { #if CUDA_VERSION >= 11000 - std::vector x_dim = phi::vectorize(x.dims()); - std::vector vec_dim = phi::vectorize(vec.dims()); + std::vector x_dim = common::vectorize(x.dims()); + std::vector vec_dim = common::vectorize(vec.dims()); auto x_ndims = x_dim.size(); auto vec_ndims = vec_dim.size(); PADDLE_ENFORCE_EQ(x_ndims, @@ -49,7 +49,7 @@ void MvKernelImpl(const Context& dev_ctx, "suitable for mv opetation, " "x_dim[-1] must be eaqual to vec_dim[-1].")); std::vector out_dim = {x_dim[x_ndims - 2]}; - out->Resize(phi::make_ddim(out_dim)); + out->Resize(common::make_ddim(out_dim)); dev_ctx.template Alloc(out); auto sparse_blas = phi::funcs::sparse::GetSparseBlas(dev_ctx); sparse_blas.SPMV(false, static_cast(1), x, vec, static_cast(0), out); diff --git a/paddle/phi/kernels/sparse/gpu/reshape_grad_kernel.cu b/paddle/phi/kernels/sparse/gpu/reshape_grad_kernel.cu index bfc81676eb8041..a4523a82018f8d 100644 --- a/paddle/phi/kernels/sparse/gpu/reshape_grad_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/reshape_grad_kernel.cu @@ -30,7 +30,7 @@ void ReshapeCooGradKernel(const Context& dev_ctx, const SparseCooTensor& dout, SparseCooTensor* dx) { EmptyLikeCooKernel(dev_ctx, x, dx); - phi::IntArray x_shape(phi::vectorize(x.dims())); + phi::IntArray x_shape(common::vectorize(x.dims())); ReshapeCooKernel(dev_ctx, dout, x_shape, dx); } @@ -41,7 +41,7 @@ void ReshapeCsrGradKernel(const Context& dev_ctx, const SparseCsrTensor& dout, SparseCsrTensor* dx) { EmptyLikeCsrKernel(dev_ctx, x, dx); - phi::IntArray x_shape(phi::vectorize(x.dims())); + phi::IntArray x_shape(common::vectorize(x.dims())); ReshapeCsrKernel(dev_ctx, dout, x_shape, dx); } diff --git a/paddle/phi/kernels/sparse/gpu/reshape_kernel.cu b/paddle/phi/kernels/sparse/gpu/reshape_kernel.cu index 0d04bb2477f6be..33a11639b88058 100644 --- a/paddle/phi/kernels/sparse/gpu/reshape_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/reshape_kernel.cu @@ -75,9 +75,9 @@ void ReshapeCooGPUKernel(const Context& dev_ctx, const auto* x_indices_data = x.indices().data(); auto* out_indices_data = out_indices.data(); const phi::DDim& x_sparse_part_strides = - phi::stride(phi::make_ddim(x_sparse_part_dims)); + common::stride(common::make_ddim(x_sparse_part_dims)); const phi::DDim& out_sparse_part_strides = - phi::stride(phi::make_ddim(out_sparse_part_dims)); + common::stride(common::make_ddim(out_sparse_part_dims)); int64_t *destination_x_sparse_part_strides, *destination_out_sparse_part_strides; diff --git a/paddle/phi/kernels/sparse/gpu/slice_kernel.cu b/paddle/phi/kernels/sparse/gpu/slice_kernel.cu index f47accfc8eff81..b96883c0ea3e17 100644 --- a/paddle/phi/kernels/sparse/gpu/slice_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/slice_kernel.cu @@ -17,11 +17,11 @@ #include "paddle/phi/kernels/sparse/unary_kernel.h" +#include "paddle/common/ddim.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/backends/gpu/gpu_primitives.h" #include "paddle/phi/common/memory_utils.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/visit_type.h" #include "paddle/phi/kernels/empty_kernel.h" diff --git a/paddle/phi/kernels/sparse/gpu/softmax_grad_kernel.cu b/paddle/phi/kernels/sparse/gpu/softmax_grad_kernel.cu index cf3dc79c8edd0b..aeb09b3fc7c981 100644 --- a/paddle/phi/kernels/sparse/gpu/softmax_grad_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/softmax_grad_kernel.cu @@ -189,7 +189,7 @@ void SoftmaxCooGradGPUKernel(const Context& dev_ctx, const auto output_indices_dims = out.indices().dims(); const auto out_dims = out.dims(); auto sparse_dim = out.sparse_dim(); - auto sizes = phi::vectorize(out_dims); + auto sizes = common::vectorize(out_dims); auto grad_indices = dout.indices(); auto grad_values = dout.values(); auto grad_values_ptr = grad_values.data(); @@ -243,7 +243,7 @@ void SoftmaxCooGradGPUKernel(const Context& dev_ctx, std::multiplies<>()); DenseTensor values_2(*values); - values_2.Resize(phi::make_ddim({nnz, nvalues})); + values_2.Resize(common::make_ddim({nnz, nvalues})); DenseTensor sorted_indices; DenseTensor pool_offsets; diff --git a/paddle/phi/kernels/sparse/gpu/softmax_kernel.cu b/paddle/phi/kernels/sparse/gpu/softmax_kernel.cu index 253a5b2141dd03..8a510c6ed30a37 100644 --- a/paddle/phi/kernels/sparse/gpu/softmax_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/softmax_kernel.cu @@ -194,7 +194,7 @@ void SoftmaxCooGPUKernel(const Context& dev_ctx, auto indices = x.indices(); auto values = x.values(); const auto x_dims = x.dims(); - const std::vector sizes = phi::vectorize(x_dims); + const std::vector sizes = common::vectorize(x_dims); const auto sparse_dim = x.sparse_dim(); const IntT x_nnz = x.nnz(); DenseTensor out_indices(indices); diff --git a/paddle/phi/kernels/sparse/gpu/sum_kernel.cu b/paddle/phi/kernels/sparse/gpu/sum_kernel.cu index 594e1ec48b2e1f..4f53b8886f4936 100644 --- a/paddle/phi/kernels/sparse/gpu/sum_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/sum_kernel.cu @@ -175,10 +175,10 @@ void SumCooGPU0Kernel(const Context& dev_ctx, DenseTensor out_indices; DenseTensor out_values; if (keep_dim) { - out_dims = make_ddim(std::vector(x_dims.size(), 1)); + out_dims = common::make_ddim(std::vector(x_dims.size(), 1)); out_indices = Empty(dev_ctx, {sparse_dim, 1}); } else { - out_dims = make_ddim({1}); + out_dims = common::make_ddim({1}); out_indices = Empty(dev_ctx, {1, 1}); } phi::funcs::SetConstant set_out_indices; @@ -213,7 +213,7 @@ void SumCooGPU1Kernel(const Context& dev_ctx, dims.emplace_back(1); } } - out_dims = make_ddim(dims); + out_dims = common::make_ddim(dims); if (dim >= sparse_dim) { out_indices = x_indices; @@ -308,9 +308,9 @@ void SumCsr0Kernel(const Context& dev_ctx, DenseTensor out_crows, out_cols, out_values; DDim out_dims; if (keep_dim && x.dims().size() == 3) { - out_dims = make_ddim({1, 1, 1}); + out_dims = common::make_ddim({1, 1, 1}); } else { - out_dims = make_ddim({1, 1}); + out_dims = common::make_ddim({1, 1}); } out_crows = Empty(dev_ctx, {2}); // crows = [0, 1] out_cols = Empty(dev_ctx, {1}); // crows = [0] @@ -351,7 +351,7 @@ void SumCsr1Kernel(const Context& dev_ctx, out_values = Empty(dev_ctx, {x_dim0}); auto* out_cols_data = out_cols.data(); auto* out_values_data = out_values.data(); - out_dims = make_ddim({x_dim0, 1}); + out_dims = common::make_ddim({x_dim0, 1}); auto config = phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, x_dim0 + 1, 1); SumCsr2DCudaKernel<<(); auto* out_values_data = out_values.data(); if (keep_dim) { - out_dims = make_ddim({x_dim0, x_dim1, 1}); + out_dims = common::make_ddim({x_dim0, x_dim1, 1}); } else { - out_dims = make_ddim({x_dim0, x_dim1}); + out_dims = common::make_ddim({x_dim0, x_dim1}); } DenseTensor x_crows_reshape = diff --git a/paddle/phi/kernels/sparse/sparse_utils_kernel.h b/paddle/phi/kernels/sparse/sparse_utils_kernel.h index 8d92b312bd857a..a072e721887b1a 100644 --- a/paddle/phi/kernels/sparse/sparse_utils_kernel.h +++ b/paddle/phi/kernels/sparse/sparse_utils_kernel.h @@ -170,7 +170,7 @@ void SparseCooTensorKernel(const Context& dev_ctx UNUSED, const DenseTensor& indices, const std::vector& shape, SparseCooTensor* out) { - *out = SparseCooTensor(indices, values, phi::make_ddim(shape)); + *out = SparseCooTensor(indices, values, common::make_ddim(shape)); } } // namespace sparse diff --git a/paddle/phi/kernels/sparse/unary_kernel.h b/paddle/phi/kernels/sparse/unary_kernel.h index 24bf4f131f6101..dff8742f5afc79 100644 --- a/paddle/phi/kernels/sparse/unary_kernel.h +++ b/paddle/phi/kernels/sparse/unary_kernel.h @@ -14,8 +14,8 @@ #pragma once +#include "paddle/common/ddim.h" #include "paddle/phi/common/int_array.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/sparse_coo_tensor.h" #include "paddle/phi/core/sparse_csr_tensor.h" diff --git a/paddle/phi/kernels/squeeze_grad_kernel.cc b/paddle/phi/kernels/squeeze_grad_kernel.cc index 75294557ace259..d39bd0c4952b4c 100644 --- a/paddle/phi/kernels/squeeze_grad_kernel.cc +++ b/paddle/phi/kernels/squeeze_grad_kernel.cc @@ -26,7 +26,7 @@ void SqueezeGradKernel(const Context& dev_ctx, const IntArray& axes UNUSED, DenseTensor* dx) { auto xshape_dims = xshape.dims(); - auto x_dims = phi::slice_ddim(xshape_dims, 1, xshape_dims.size()); + auto x_dims = common::slice_ddim(xshape_dims, 1, xshape_dims.size()); dev_ctx.template Alloc(dx); phi::Copy(dev_ctx, dout, dev_ctx.GetPlace(), false, dx); diff --git a/paddle/phi/kernels/stride/as_strided_grad_kernel.cc b/paddle/phi/kernels/stride/as_strided_grad_kernel.cc index 8cbcc5dfedc106..edf72e5da026cf 100644 --- a/paddle/phi/kernels/stride/as_strided_grad_kernel.cc +++ b/paddle/phi/kernels/stride/as_strided_grad_kernel.cc @@ -42,8 +42,8 @@ void AsStridedGradKernel(const Context& dev_ctx, phi::StridedCopyKernel( dev_ctx, out_grad, - phi::vectorize(tmp.dims()), - phi::vectorize(tmp.strides()), + common::vectorize(tmp.dims()), + common::vectorize(tmp.strides()), tmp.offset(), &tmp); })); diff --git a/paddle/phi/kernels/stride/complex_grad_kernel.cc b/paddle/phi/kernels/stride/complex_grad_kernel.cc index 91640c70a39297..800e484ea7eb88 100644 --- a/paddle/phi/kernels/stride/complex_grad_kernel.cc +++ b/paddle/phi/kernels/stride/complex_grad_kernel.cc @@ -37,8 +37,8 @@ void RealGradStridedKernel(const Context& dev_ctx, phi::StridedCopyKernel( dev_ctx, dout, - phi::vectorize(tmp.dims()), - phi::vectorize(tmp.strides()), + common::vectorize(tmp.dims()), + common::vectorize(tmp.strides()), tmp.offset(), &tmp); })); @@ -61,8 +61,8 @@ void ImagGradStridedKernel(const Context& dev_ctx, phi::StridedCopyKernel( dev_ctx, dout, - phi::vectorize(tmp.dims()), - phi::vectorize(tmp.strides()), + common::vectorize(tmp.dims()), + common::vectorize(tmp.strides()), tmp.offset(), &tmp); })); diff --git a/paddle/phi/kernels/stride/diagonal_grad_kernel.cc b/paddle/phi/kernels/stride/diagonal_grad_kernel.cc index d5ebcd6f4ab8a5..fc44c09118fad8 100644 --- a/paddle/phi/kernels/stride/diagonal_grad_kernel.cc +++ b/paddle/phi/kernels/stride/diagonal_grad_kernel.cc @@ -46,8 +46,8 @@ void DiagonalGradStridedKernel(const Context& dev_ctx, phi::StridedCopyKernel( dev_ctx, out_grad, - phi::vectorize(tmp.dims()), - phi::vectorize(tmp.strides()), + common::vectorize(tmp.dims()), + common::vectorize(tmp.strides()), tmp.offset(), &tmp); })); diff --git a/paddle/phi/kernels/stride/diagonal_kernel.cc b/paddle/phi/kernels/stride/diagonal_kernel.cc index 31b2aa97e96fc6..f21ea6c24ac6f9 100644 --- a/paddle/phi/kernels/stride/diagonal_kernel.cc +++ b/paddle/phi/kernels/stride/diagonal_kernel.cc @@ -54,8 +54,8 @@ void DiagonalStridedKernel(const Context& dev_ctx, } } - std::vector shape = phi::vectorize(x.dims()); - std::vector stride = phi::vectorize(x.strides()); + std::vector shape = common::vectorize(x.dims()); + std::vector stride = common::vectorize(x.strides()); shape.erase(shape.begin() + std::max(axis1, axis2)); stride.erase(stride.begin() + std::max(axis1, axis2)); shape.erase(shape.begin() + std::min(axis1, axis2)); diff --git a/paddle/phi/kernels/stride/flatten_grad_kernel.cc b/paddle/phi/kernels/stride/flatten_grad_kernel.cc index a4cfe5b3d3941e..be7ed0721fdd2f 100644 --- a/paddle/phi/kernels/stride/flatten_grad_kernel.cc +++ b/paddle/phi/kernels/stride/flatten_grad_kernel.cc @@ -24,10 +24,10 @@ void FlattenGradStridedKernel(const Context& dev_ctx, const DenseTensor& out_grad, DenseTensor* x_grad) { auto xshape_dims = xshape.dims(); - auto x_dims = phi::slice_ddim(xshape_dims, 1, xshape_dims.size()); + auto x_dims = common::slice_ddim(xshape_dims, 1, xshape_dims.size()); ReshapeStridedKernel(dev_ctx, out_grad, - IntArray(phi::vectorize(x_dims)), + IntArray(common::vectorize(x_dims)), x_grad, nullptr); } diff --git a/paddle/phi/kernels/stride/flatten_kernel.cc b/paddle/phi/kernels/stride/flatten_kernel.cc index 3bba86123708d5..94b4ae0a89890f 100644 --- a/paddle/phi/kernels/stride/flatten_kernel.cc +++ b/paddle/phi/kernels/stride/flatten_kernel.cc @@ -25,7 +25,11 @@ void FlattenInferStridedKernel(const Context& dev_ctx, int stop_axis UNUSED, DenseTensor* out) { ReshapeStridedKernel( - dev_ctx, x, IntArray(phi::vectorize(out->dims())), out, nullptr); + dev_ctx, + x, + IntArray(common::vectorize(out->dims())), + out, + nullptr); } template diff --git a/paddle/phi/kernels/stride/index_select_grad_kernel.cc b/paddle/phi/kernels/stride/index_select_grad_kernel.cc index 15ab602fe5304d..99705b396f19ef 100644 --- a/paddle/phi/kernels/stride/index_select_grad_kernel.cc +++ b/paddle/phi/kernels/stride/index_select_grad_kernel.cc @@ -44,8 +44,8 @@ void IndexSelectGradStridedKernel(const Context& dev_ctx, phi::StridedCopyKernel( dev_ctx, out_grad, - phi::vectorize(tmp.dims()), - phi::vectorize(tmp.strides()), + common::vectorize(tmp.dims()), + common::vectorize(tmp.strides()), tmp.offset(), &tmp); })); diff --git a/paddle/phi/kernels/stride/index_select_kernel.cc b/paddle/phi/kernels/stride/index_select_kernel.cc index b7f96be147532a..ea278226ee6c2c 100644 --- a/paddle/phi/kernels/stride/index_select_kernel.cc +++ b/paddle/phi/kernels/stride/index_select_kernel.cc @@ -30,8 +30,8 @@ void IndexSelectStridedKernel(const Context& ctx, auto input_dim = x.dims(); dim = dim >= 0 ? dim : dim + input_dim.size(); - std::vector shape = phi::vectorize(x.dims()); - std::vector stride = phi::vectorize(x.strides()); + std::vector shape = common::vectorize(x.dims()); + std::vector stride = common::vectorize(x.strides()); int64_t offset = static_cast(x.offset()); offset = static_cast(offset + diff --git a/paddle/phi/kernels/stride/reshape_grad_kernel.cc b/paddle/phi/kernels/stride/reshape_grad_kernel.cc index 817baf7287c140..4d55c67fbcf0b0 100644 --- a/paddle/phi/kernels/stride/reshape_grad_kernel.cc +++ b/paddle/phi/kernels/stride/reshape_grad_kernel.cc @@ -26,7 +26,7 @@ void ReshapeGradStridedKernel(const Context& dev_ctx, ReshapeStridedKernel( dev_ctx, out_grad, - IntArray(phi::vectorize(x_grad->dims())), + IntArray(common::vectorize(x_grad->dims())), x_grad, nullptr); } diff --git a/paddle/phi/kernels/stride/slice_grad_kernel.cc b/paddle/phi/kernels/stride/slice_grad_kernel.cc index 32ec2c75974d3c..171c20b3b83acd 100644 --- a/paddle/phi/kernels/stride/slice_grad_kernel.cc +++ b/paddle/phi/kernels/stride/slice_grad_kernel.cc @@ -51,8 +51,8 @@ void SliceGradStridedKernel(const Context& dev_ctx, phi::StridedCopyKernel( dev_ctx, out_grad, - phi::vectorize(tmp.dims()), - phi::vectorize(tmp.strides()), + common::vectorize(tmp.dims()), + common::vectorize(tmp.strides()), tmp.offset(), &tmp); })); diff --git a/paddle/phi/kernels/stride/slice_kernel.cc b/paddle/phi/kernels/stride/slice_kernel.cc index 998bc2700df4fb..4e693ab4b0d32d 100644 --- a/paddle/phi/kernels/stride/slice_kernel.cc +++ b/paddle/phi/kernels/stride/slice_kernel.cc @@ -47,8 +47,9 @@ void SliceStridedKernel(const Context& ctx, phi::funcs::CheckAndUpdateSliceAttrs( in_dims, new_axes, &starts, &ends, nullptr, nullptr); - std::vector output_dims = phi::vectorize(input.dims()); - std::vector output_stride = phi::vectorize(input.strides()); + std::vector output_dims = common::vectorize(input.dims()); + std::vector output_stride = + common::vectorize(input.strides()); int64_t output_offset = static_cast(input.offset()); for (size_t i = 0; i < new_axes.size(); ++i) { diff --git a/paddle/phi/kernels/stride/squeeze_grad_kernel.cc b/paddle/phi/kernels/stride/squeeze_grad_kernel.cc index c472c67e651ab5..27361211e8fc02 100644 --- a/paddle/phi/kernels/stride/squeeze_grad_kernel.cc +++ b/paddle/phi/kernels/stride/squeeze_grad_kernel.cc @@ -25,9 +25,9 @@ void SqueezeGradStridedKernel(const Context& dev_ctx, const IntArray& axes UNUSED, DenseTensor* dx) { auto xshape_dims = xshape.dims(); - auto x_dims = phi::slice_ddim(xshape_dims, 1, xshape_dims.size()); + auto x_dims = common::slice_ddim(xshape_dims, 1, xshape_dims.size()); ReshapeStridedKernel( - dev_ctx, dout, IntArray(phi::vectorize(x_dims)), dx, nullptr); + dev_ctx, dout, IntArray(common::vectorize(x_dims)), dx, nullptr); } } // namespace phi diff --git a/paddle/phi/kernels/stride/squeeze_kernel.cc b/paddle/phi/kernels/stride/squeeze_kernel.cc index 33895dfcf8e66b..b03652baee624c 100644 --- a/paddle/phi/kernels/stride/squeeze_kernel.cc +++ b/paddle/phi/kernels/stride/squeeze_kernel.cc @@ -36,7 +36,7 @@ void SqueezeInferStridedKernel(const Context& dev_ctx, auto input_stride = input.strides(); if (input.Holder() == out->Holder() && input.meta() == out->meta()) { - output_dims = phi::vectorize(out->dims()); + output_dims = common::vectorize(out->dims()); if (axes.empty()) { for (int i = input_stride.size() - 1; i > 0; --i) { if (input_stride[i] != input_stride[i - 1]) { diff --git a/paddle/phi/kernels/stride/strided_slice_grad_kernel.cc b/paddle/phi/kernels/stride/strided_slice_grad_kernel.cc index 9b2d03a00e86eb..f0cd2d53bc8238 100644 --- a/paddle/phi/kernels/stride/strided_slice_grad_kernel.cc +++ b/paddle/phi/kernels/stride/strided_slice_grad_kernel.cc @@ -56,8 +56,8 @@ void StridedSliceRawGradStridedKernel(const Context& dev_ctx, phi::StridedCopyKernel( dev_ctx, out_grad, - phi::vectorize(tmp.dims()), - phi::vectorize(tmp.strides()), + common::vectorize(tmp.dims()), + common::vectorize(tmp.strides()), tmp.offset(), &tmp); })); diff --git a/paddle/phi/kernels/stride/strided_slice_kernel.cc b/paddle/phi/kernels/stride/strided_slice_kernel.cc index a57ed98d119a99..77919f8d000a00 100644 --- a/paddle/phi/kernels/stride/strided_slice_kernel.cc +++ b/paddle/phi/kernels/stride/strided_slice_kernel.cc @@ -35,8 +35,9 @@ void StridedSliceRawStridedKernel(const Context& dev_ctx, std::vector ends = ends_arr.GetData(); std::vector strides = strides_arr.GetData(); - std::vector output_dims = phi::vectorize(input.dims()); - std::vector output_stride = phi::vectorize(input.strides()); + std::vector output_dims = common::vectorize(input.dims()); + std::vector output_stride = + common::vectorize(input.strides()); int64_t output_offset = static_cast(input.offset()); for (size_t i = 0; i < axes.size(); ++i) { int64_t axis_size = input.dims()[axes[i]]; diff --git a/paddle/phi/kernels/stride/tensor_unfold_grad_kernel.cc b/paddle/phi/kernels/stride/tensor_unfold_grad_kernel.cc index 620d7bbb46ddc4..7dc3e6e46361ba 100644 --- a/paddle/phi/kernels/stride/tensor_unfold_grad_kernel.cc +++ b/paddle/phi/kernels/stride/tensor_unfold_grad_kernel.cc @@ -50,8 +50,8 @@ void TensorUnfoldGradKernel(const Context& dev_ctx, phi::StridedCopyKernel( dev_ctx, out_grad, - phi::vectorize(tmp.dims()), - phi::vectorize(tmp.strides()), + common::vectorize(tmp.dims()), + common::vectorize(tmp.strides()), tmp.offset(), &tmp); })); diff --git a/paddle/phi/kernels/stride/unsqueeze_grad_kernel.cc b/paddle/phi/kernels/stride/unsqueeze_grad_kernel.cc index 34a52f4659b274..c6c5c117cd94e4 100644 --- a/paddle/phi/kernels/stride/unsqueeze_grad_kernel.cc +++ b/paddle/phi/kernels/stride/unsqueeze_grad_kernel.cc @@ -24,9 +24,9 @@ void UnsqueezeGradStridedKernel(const Context& dev_ctx, const DenseTensor& dout, DenseTensor* dx) { auto xshape_dims = x_shape.dims(); - auto x_dims = phi::slice_ddim(xshape_dims, 1, xshape_dims.size()); + auto x_dims = common::slice_ddim(xshape_dims, 1, xshape_dims.size()); ReshapeStridedKernel( - dev_ctx, dout, IntArray(phi::vectorize(x_dims)), dx, nullptr); + dev_ctx, dout, IntArray(common::vectorize(x_dims)), dx, nullptr); } } // namespace phi diff --git a/paddle/phi/kernels/stride/unsqueeze_kernel.cc b/paddle/phi/kernels/stride/unsqueeze_kernel.cc index b97a0222e6cd62..bd1a200ea0eaae 100644 --- a/paddle/phi/kernels/stride/unsqueeze_kernel.cc +++ b/paddle/phi/kernels/stride/unsqueeze_kernel.cc @@ -28,11 +28,12 @@ void UnsqueezeInferStridedKernel(const Context& dev_ctx, const IntArray& axes_arr, DenseTensor* out) { std::vector axes = axes_arr.GetData(); - std::vector input_dims = phi::vectorize(input.dims()); - std::vector input_stride = phi::vectorize(input.strides()); + std::vector input_dims = common::vectorize(input.dims()); + std::vector input_stride = + common::vectorize(input.strides()); if (input.Holder() == out->Holder() && input.meta() == out->meta()) { - input_dims = phi::vectorize(out->dims()); + input_dims = common::vectorize(out->dims()); for (int64_t i = static_cast(axes.size() - 1); i >= 0; --i) { axes[i] = static_cast(axes[i] < 0 ? axes[i] + input_dims.size() : axes[i]); diff --git a/paddle/phi/kernels/stride/view_grad_kernel.cc b/paddle/phi/kernels/stride/view_grad_kernel.cc index e63598b5b58735..d04998c95622e5 100644 --- a/paddle/phi/kernels/stride/view_grad_kernel.cc +++ b/paddle/phi/kernels/stride/view_grad_kernel.cc @@ -25,7 +25,7 @@ void ViewShapeGradKernel(const Context& dev_ctx, const std::vector& dims, DenseTensor* input_grad) { ViewShapeKernel( - dev_ctx, out_grad, phi::vectorize(input.dims()), input_grad); + dev_ctx, out_grad, common::vectorize(input.dims()), input_grad); } template diff --git a/paddle/phi/kernels/strings/gpu/copy_utils.h b/paddle/phi/kernels/strings/gpu/copy_utils.h index 36cad026184242..6e413ef73098dd 100644 --- a/paddle/phi/kernels/strings/gpu/copy_utils.h +++ b/paddle/phi/kernels/strings/gpu/copy_utils.h @@ -112,7 +112,7 @@ void SerializeOnCPU(const Context& dev_ctx, for (int64_t i = 0; i < numel; ++i) { num += src_str[i].length() + 1; } - dst->Resize(phi::make_ddim({num})); + dst->Resize(common::make_ddim({num})); uint8_t* strings_data = dev_ctx.template HostAlloc(dst); auto* strings_offset = reinterpret_cast(strings_data); int start_offset = sizeof(int) * (numel + 1); @@ -137,7 +137,7 @@ void DeserializeOnCPU(const Context& dev_ctx, auto* strings_data = reinterpret_cast(src.data()); auto* strings_offset = reinterpret_cast(strings_data); int numel = strings_offset[0] / sizeof(int) - 1; - dst->Resize(phi::make_ddim({numel})); + dst->Resize(common::make_ddim({numel})); dtype::pstring* dst_str = dev_ctx.template HostAlloc(dst); for (int i = 0; i < numel; ++i) { // -1 not include '\0' @@ -156,7 +156,7 @@ void SerializeOnGPU(const phi::GPUContext& dev_ctx, auto strings_size = GetAllStringsSize(dev_ctx, src_str, numel); strings_size += sizeof(int32_t) * (numel + 1); - dst->Resize(phi::make_ddim({strings_size})); + dst->Resize(common::make_ddim({strings_size})); uint8_t* strings_data = dev_ctx.template Alloc(dst); auto* strings_offset = reinterpret_cast(strings_data); @@ -184,7 +184,7 @@ void DeserializeOnGPU(const phi::GPUContext& dev_ctx, &numel, strings_data, sizeof(numel), cudaMemcpyDeviceToHost); #endif numel = numel / sizeof(int) - 1; - dst->Resize(phi::make_ddim({numel})); + dst->Resize(common::make_ddim({numel})); dtype::pstring* dst_str = dev_ctx.template Alloc(dst); dim3 block_size = dim3(PREDEFINED_BLOCK_SIZE, 1); diff --git a/paddle/phi/kernels/strings/strings_empty_kernel.cc b/paddle/phi/kernels/strings/strings_empty_kernel.cc index 22a43ceaff1c17..10d958f354e2d3 100644 --- a/paddle/phi/kernels/strings/strings_empty_kernel.cc +++ b/paddle/phi/kernels/strings/strings_empty_kernel.cc @@ -24,7 +24,7 @@ template void EmptyKernel(const Context& dev_ctx, const IntArray& shape, StringTensor* out) { - out->Resize(phi::make_ddim(shape.GetData())); + out->Resize(common::make_ddim(shape.GetData())); dev_ctx.template Alloc(out); } diff --git a/paddle/phi/kernels/strings/unicode.h b/paddle/phi/kernels/strings/unicode.h index 45e41b72d086c0..410543c27d68fc 100644 --- a/paddle/phi/kernels/strings/unicode.h +++ b/paddle/phi/kernels/strings/unicode.h @@ -17,8 +17,8 @@ limitations under the License. */ #include #include +#include "paddle/common/macros.h" #include "paddle/phi/core/hostdevice.h" -#include "paddle/phi/core/macros.h" namespace phi { namespace strings { diff --git a/paddle/phi/kernels/strings/unicode_flag.h b/paddle/phi/kernels/strings/unicode_flag.h index 7e97b80c2c642a..c09104f6bfb8a8 100644 --- a/paddle/phi/kernels/strings/unicode_flag.h +++ b/paddle/phi/kernels/strings/unicode_flag.h @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#include "paddle/phi/core/macros.h" +#include "paddle/common/macros.h" namespace phi { namespace strings { diff --git a/paddle/phi/kernels/transfer_layout_kernel.cc b/paddle/phi/kernels/transfer_layout_kernel.cc index d001822b21fc8f..dbaf74f8c0c984 100644 --- a/paddle/phi/kernels/transfer_layout_kernel.cc +++ b/paddle/phi/kernels/transfer_layout_kernel.cc @@ -69,7 +69,7 @@ void TransferLayoutGeneral(const Context& dev_ctx, dst_dim[i] = src_dim[axis[i]]; } - out->Resize(phi::make_ddim(dst_dim)); + out->Resize(common::make_ddim(dst_dim)); dev_ctx.Alloc(out, x.dtype()); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) // In GPU fp16 model, we will insert many transfer_layout ops in diff --git a/paddle/phi/kernels/triangular_solve_grad_kernel.h b/paddle/phi/kernels/triangular_solve_grad_kernel.h index eb5a5ab461a1dc..1b51ad50d3246a 100644 --- a/paddle/phi/kernels/triangular_solve_grad_kernel.h +++ b/paddle/phi/kernels/triangular_solve_grad_kernel.h @@ -14,9 +14,9 @@ #pragma once +#include "paddle/common/ddim.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/backends/gpu/gpu_context.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/dense_tensor.h" namespace phi { diff --git a/paddle/phi/kernels/unsqueeze_grad_kernel.cc b/paddle/phi/kernels/unsqueeze_grad_kernel.cc index a281bb66b4c671..1603b1e2f63987 100644 --- a/paddle/phi/kernels/unsqueeze_grad_kernel.cc +++ b/paddle/phi/kernels/unsqueeze_grad_kernel.cc @@ -26,7 +26,7 @@ void UnsqueezeGradKernel(const Context& dev_ctx, const DenseTensor& dout, DenseTensor* dx) { auto xshape_dims = x_shape.dims(); - auto x_dims = phi::slice_ddim(xshape_dims, 1, xshape_dims.size()); + auto x_dims = common::slice_ddim(xshape_dims, 1, xshape_dims.size()); dev_ctx.template Alloc(dx); phi::Copy(dev_ctx, dout, dev_ctx.GetPlace(), true, dx); dx->Resize(x_dims); diff --git a/paddle/phi/kernels/xpu/activation_grad_kernel.cc b/paddle/phi/kernels/xpu/activation_grad_kernel.cc index 89732be24cc918..7cada9005c33eb 100644 --- a/paddle/phi/kernels/xpu/activation_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/activation_grad_kernel.cc @@ -195,7 +195,7 @@ struct XPULogGradFunctor : public funcs::BaseActivationFunctor { dev_ctx.x_context(), tmp, x->numel(), static_cast(1.0)); PADDLE_ENFORCE_XDNN_SUCCESS(r, "constant"); - auto x_dims = vectorize(x->dims()); + auto x_dims = common::vectorize(x->dims()); // use [1] to replace [], because xpu not support [] if (x_dims.size() == 0) { @@ -471,9 +471,9 @@ void PowGradKernel(const Context& dev_ctx, T* x_grad = dx->data(); // check dims: all dims should equal - auto x_dims = vectorize(x.dims()); - auto dy_dims = vectorize(dout.dims()); - auto dx_dims = vectorize(dx->dims()); + auto x_dims = common::vectorize(x.dims()); + auto dy_dims = common::vectorize(dout.dims()); + auto dx_dims = common::vectorize(dx->dims()); PADDLE_ENFORCE_EQ(x_dims, dy_dims, errors::PreconditionNotMet("x_dims should match dy_dims.")); diff --git a/paddle/phi/kernels/xpu/activation_kernel.cc b/paddle/phi/kernels/xpu/activation_kernel.cc index 54064e90b82829..0608225281f10e 100644 --- a/paddle/phi/kernels/xpu/activation_kernel.cc +++ b/paddle/phi/kernels/xpu/activation_kernel.cc @@ -212,7 +212,7 @@ void PowKernel(const Context& dev_ctx, static_cast(&pow_factor), sizeof(T)); - auto x_dims = vectorize(x.dims()); + auto x_dims = common::vectorize(x.dims()); // use [1] to replace [], because xpu not support [] if (x_dims.size() == 0) { x_dims = std::vector({1}); diff --git a/paddle/phi/kernels/xpu/arange_kernel.cc b/paddle/phi/kernels/xpu/arange_kernel.cc index 7afdfcd60daf87..5c9ba973340b7e 100644 --- a/paddle/phi/kernels/xpu/arange_kernel.cc +++ b/paddle/phi/kernels/xpu/arange_kernel.cc @@ -32,7 +32,7 @@ void ArangeTensorKernel(const Context& dev_ctx, int64_t size = 0; phi::funcs::GetSize(start_value, end_value, step_value, &size); - out->Resize(phi::make_ddim({size})); + out->Resize(common::make_ddim({size})); auto* out_data = dev_ctx.template Alloc(out); int ret = xpu::range( diff --git a/paddle/phi/kernels/xpu/arg_min_max_kernel.cc b/paddle/phi/kernels/xpu/arg_min_max_kernel.cc index b5b2ed7d328884..dda71ebe46120b 100644 --- a/paddle/phi/kernels/xpu/arg_min_max_kernel.cc +++ b/paddle/phi/kernels/xpu/arg_min_max_kernel.cc @@ -14,8 +14,8 @@ #include "paddle/phi/kernels/arg_min_max_kernel.h" +#include "paddle/common/ddim.h" #include "paddle/phi/backends/xpu/xpu_context.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/kernels/funcs/math_function.h" @@ -46,14 +46,14 @@ void ArgMaxKernel(const Context& dev_ctx, DDim x_dims; int axis_val = axis.to(); if (flatten) { - x_dims = phi::make_ddim({x.numel()}); + x_dims = common::make_ddim({x.numel()}); // if flatten, the axis just as 0 axis_val = 0; } else { x_dims = x.dims(); if (axis_val < 0) axis_val += x_dims.size(); } - auto xdims_vec = phi::vectorize(x_dims); + auto xdims_vec = common::vectorize(x_dims); int r = 0; if (dtype != DataType::INT32) { dev_ctx.template Alloc(out); diff --git a/paddle/phi/kernels/xpu/argsort_grad_kernel.cc b/paddle/phi/kernels/xpu/argsort_grad_kernel.cc index 4ebab7b37fc301..a96c3ade04163f 100644 --- a/paddle/phi/kernels/xpu/argsort_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/argsort_grad_kernel.cc @@ -50,9 +50,9 @@ void ArgsortGradKernel(const Context& dev_ctx, if (axis == -1 || axis + 1 == in_dims.size()) { is_need_transpose = false; } - int len_before = phi::product(phi::slice_ddim(in_dims, 0, axis)); + int len_before = common::product(common::slice_ddim(in_dims, 0, axis)); int len_after = - phi::product(phi::slice_ddim(in_dims, axis + 1, in_dims.size())); + common::product(common::slice_ddim(in_dims, axis + 1, in_dims.size())); int m = len_before * len_after; int n = in_dims[axis]; int len = m * n; diff --git a/paddle/phi/kernels/xpu/argsort_kernel.cc b/paddle/phi/kernels/xpu/argsort_kernel.cc index e1875b8f52c788..1158045a2e602b 100644 --- a/paddle/phi/kernels/xpu/argsort_kernel.cc +++ b/paddle/phi/kernels/xpu/argsort_kernel.cc @@ -186,9 +186,9 @@ void ArgsortKernel(const Context& dev_ctx, return; } - int len_before = phi::product(phi::slice_ddim(in_dims, 0, axis)); + int len_before = common::product(common::slice_ddim(in_dims, 0, axis)); int len_after = - phi::product(phi::slice_ddim(in_dims, axis + 1, in_dims.size())); + common::product(common::slice_ddim(in_dims, axis + 1, in_dims.size())); std::vector permute_vec{0, 2, 1}; std::vector data_shape{len_before, n, len_after}; diff --git a/paddle/phi/kernels/xpu/batch_norm_grad_kernel.cc b/paddle/phi/kernels/xpu/batch_norm_grad_kernel.cc index 863bc2759b39a3..454141ff4c3ea4 100644 --- a/paddle/phi/kernels/xpu/batch_norm_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/batch_norm_grad_kernel.cc @@ -99,7 +99,7 @@ void BatchNormGradKernel(const Context &dev_ctx, "But recevived 'data_layout' is [%s].", data_layout)); - const auto data_layout_val = phi::StringToDataLayout(data_layout); + const auto data_layout_val = common::StringToDataLayout(data_layout); use_global_stats = is_test || use_global_stats; diff --git a/paddle/phi/kernels/xpu/batch_norm_kernel.cc b/paddle/phi/kernels/xpu/batch_norm_kernel.cc index 2abb1686daed98..8427c49b43d42f 100644 --- a/paddle/phi/kernels/xpu/batch_norm_kernel.cc +++ b/paddle/phi/kernels/xpu/batch_norm_kernel.cc @@ -43,7 +43,7 @@ void BatchNormKernel(const Context& dev_ctx, using XPUType = typename XPUTypeTrait::Type; bool test_mode = is_test && (!trainable_statistics); bool global_stats = test_mode || use_global_stats; - const auto data_layout = phi::StringToDataLayout(data_layout_str); + const auto data_layout = common::StringToDataLayout(data_layout_str); PADDLE_ENFORCE_EQ(data_layout_str == "NCHW" || data_layout_str == "NHWC", true, phi::errors::InvalidArgument( diff --git a/paddle/phi/kernels/xpu/c_split_kernel.cc b/paddle/phi/kernels/xpu/c_split_kernel.cc index f330323059e2b7..1d0a6ca31f66ec 100644 --- a/paddle/phi/kernels/xpu/c_split_kernel.cc +++ b/paddle/phi/kernels/xpu/c_split_kernel.cc @@ -56,8 +56,8 @@ void CSplitKernel(const Context& dev_ctx, int64_t end_size = dims[dims_size - 1]; // remain dim - auto remain_ddim = phi::slice_ddim(dims, 0, dims_size - 1); - int64_t remain_numel = phi::product(remain_ddim); + auto remain_ddim = common::slice_ddim(dims, 0, dims_size - 1); + int64_t remain_numel = common::product(remain_ddim); dims[dims_size - 1] /= nranks; out->Resize(dims); diff --git a/paddle/phi/kernels/xpu/compare_kernel.cc b/paddle/phi/kernels/xpu/compare_kernel.cc index 51b392b7144ae2..2732823fd94282 100644 --- a/paddle/phi/kernels/xpu/compare_kernel.cc +++ b/paddle/phi/kernels/xpu/compare_kernel.cc @@ -33,8 +33,8 @@ void XPUCompareKernelImpl(const Context& dev_ctx, bool*, const std::vector&, const std::vector&)> func) { - auto x_shape = vectorize(x.dims()); - auto y_shape = vectorize(y.dims()); + auto x_shape = common::vectorize(x.dims()); + auto y_shape = common::vectorize(y.dims()); if (x.dims().size() == 0) { x_shape = std::vector({1}); diff --git a/paddle/phi/kernels/xpu/contiguous_kernel.cc b/paddle/phi/kernels/xpu/contiguous_kernel.cc index 8e3c0a95954527..922bda579cbdbe 100644 --- a/paddle/phi/kernels/xpu/contiguous_kernel.cc +++ b/paddle/phi/kernels/xpu/contiguous_kernel.cc @@ -39,8 +39,8 @@ void ContiguousKernel(const Context& dev_ctx, r = xpu::as_strided(dev_ctx.x_context(), input_data, output_data, - phi::vectorize(input.dims()), - phi::vectorize(input.strides()), + common::vectorize(input.dims()), + common::vectorize(input.strides()), 0); } } else if (std::is_same::value) { @@ -53,8 +53,8 @@ void ContiguousKernel(const Context& dev_ctx, r = xpu::as_strided(dev_ctx.x_context(), input_data, output_data, - phi::vectorize(input.dims()), - phi::vectorize(input.strides()), + common::vectorize(input.dims()), + common::vectorize(input.strides()), 0); } } else if (std::is_same::value) { @@ -66,12 +66,13 @@ void ContiguousKernel(const Context& dev_ctx, r = xpu::copy( dev_ctx.x_context(), input_data, output_data, 1); } else { - r = xpu::as_strided(dev_ctx.x_context(), - input_data, - output_data, - phi::vectorize(input.dims()), - phi::vectorize(input.strides()), - 0); + r = xpu::as_strided( + dev_ctx.x_context(), + input_data, + output_data, + common::vectorize(input.dims()), + common::vectorize(input.strides()), + 0); } } else if (std::is_same::value) { using XPUFLOAT16 = typename XPUTypeTrait::Type; @@ -82,12 +83,13 @@ void ContiguousKernel(const Context& dev_ctx, r = xpu::copy( dev_ctx.x_context(), input_data, output_data, 1); } else { - r = xpu::as_strided(dev_ctx.x_context(), - input_data, - output_data, - phi::vectorize(input.dims()), - phi::vectorize(input.strides()), - 0); + r = xpu::as_strided( + dev_ctx.x_context(), + input_data, + output_data, + common::vectorize(input.dims()), + common::vectorize(input.strides()), + 0); } } else if (std::is_same::value) { using XPUFLOAT16 = typename XPUTypeTrait::Type; @@ -98,12 +100,13 @@ void ContiguousKernel(const Context& dev_ctx, r = xpu::copy( dev_ctx.x_context(), input_data, output_data, 1); } else { - r = xpu::as_strided(dev_ctx.x_context(), - input_data, - output_data, - phi::vectorize(input.dims()), - phi::vectorize(input.strides()), - 0); + r = xpu::as_strided( + dev_ctx.x_context(), + input_data, + output_data, + common::vectorize(input.dims()), + common::vectorize(input.strides()), + 0); } } else if (std::is_same::value) { auto input_data = reinterpret_cast(input.data()); @@ -115,8 +118,8 @@ void ContiguousKernel(const Context& dev_ctx, r = xpu::as_strided(dev_ctx.x_context(), input_data, output_data, - phi::vectorize(input.dims()), - phi::vectorize(input.strides()), + common::vectorize(input.dims()), + common::vectorize(input.strides()), 0); } } else if (std::is_same::value) { @@ -129,8 +132,8 @@ void ContiguousKernel(const Context& dev_ctx, r = xpu::as_strided(dev_ctx.x_context(), input_data, output_data, - phi::vectorize(input.dims()), - phi::vectorize(input.strides()), + common::vectorize(input.dims()), + common::vectorize(input.strides()), 0); } } else if (std::is_same::value) { @@ -143,8 +146,8 @@ void ContiguousKernel(const Context& dev_ctx, r = xpu::as_strided(dev_ctx.x_context(), input_data, output_data, - phi::vectorize(input.dims()), - phi::vectorize(input.strides()), + common::vectorize(input.dims()), + common::vectorize(input.strides()), 0); } } else if (std::is_same::value) { @@ -157,8 +160,8 @@ void ContiguousKernel(const Context& dev_ctx, r = xpu::as_strided(dev_ctx.x_context(), input_data, output_data, - phi::vectorize(input.dims()), - phi::vectorize(input.strides()), + common::vectorize(input.dims()), + common::vectorize(input.strides()), 0); } } else if (std::is_same::value) { @@ -170,8 +173,8 @@ void ContiguousKernel(const Context& dev_ctx, r = xpu::as_strided(dev_ctx.x_context(), input_data, output_data, - phi::vectorize(input.dims()), - phi::vectorize(input.strides()), + common::vectorize(input.dims()), + common::vectorize(input.strides()), 0); } } else { diff --git a/paddle/phi/kernels/xpu/conv_grad_kernel.cc b/paddle/phi/kernels/xpu/conv_grad_kernel.cc index 0c40e09d2202f4..03276ebd53b5f1 100644 --- a/paddle/phi/kernels/xpu/conv_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/conv_grad_kernel.cc @@ -48,11 +48,11 @@ void ConvGradKernel(const Context& dev_ctx, ("XPU doesn't support data_format is NDHWC in conv grad op."))); phi::DDim in_data_dims = - phi::slice_ddim(input.dims(), 2, input.dims().size()); + common::slice_ddim(input.dims(), 2, input.dims().size()); phi::DDim filter_data_dims = - phi::slice_ddim(filter.dims(), 2, filter.dims().size()); - std::vector ksize = phi::vectorize(filter_data_dims); - std::vector filter_shape = phi::vectorize(filter.dims()); + common::slice_ddim(filter.dims(), 2, filter.dims().size()); + std::vector ksize = common::vectorize(filter_data_dims); + std::vector filter_shape = common::vectorize(filter.dims()); UpdatePaddingAndDilation( &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize); @@ -269,11 +269,11 @@ void Conv3DGradKernel(const Context& dev_ctx, if (!input_grad && !filter_grad) return; phi::DDim in_data_dims = - phi::slice_ddim(input.dims(), 2, input.dims().size()); + common::slice_ddim(input.dims(), 2, input.dims().size()); phi::DDim filter_data_dims = - phi::slice_ddim(filter.dims(), 2, filter.dims().size()); - std::vector ksize = phi::vectorize(filter_data_dims); - std::vector filter_shape = phi::vectorize(filter.dims()); + common::slice_ddim(filter.dims(), 2, filter.dims().size()); + std::vector ksize = common::vectorize(filter_data_dims); + std::vector filter_shape = common::vectorize(filter.dims()); UpdatePaddingAndDilation( &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize); diff --git a/paddle/phi/kernels/xpu/conv_kernel.cc b/paddle/phi/kernels/xpu/conv_kernel.cc index 7a699225f3b01b..0dc93d676186bf 100644 --- a/paddle/phi/kernels/xpu/conv_kernel.cc +++ b/paddle/phi/kernels/xpu/conv_kernel.cc @@ -47,10 +47,10 @@ void ConvKernel(const Context& dev_ctx, ("XPU does not support data_format is NDHWC in conv op."))); phi::DDim in_data_dims = - phi::slice_ddim(input.dims(), 2, input.dims().size()); + common::slice_ddim(input.dims(), 2, input.dims().size()); phi::DDim filter_data_dims = - phi::slice_ddim(filter.dims(), 2, filter.dims().size()); - std::vector ksize = phi::vectorize(filter_data_dims); + common::slice_ddim(filter.dims(), 2, filter.dims().size()); + std::vector ksize = common::vectorize(filter_data_dims); UpdatePaddingAndDilation( &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize); @@ -78,7 +78,7 @@ void ConvKernel(const Context& dev_ctx, if (data_format == "NHWC") { filter_data_tmp = RAII_GUARD.alloc(filter.numel()); PADDLE_ENFORCE_XDNN_NOT_NULL(filter_data_tmp); - std::vector filter_shape = phi::vectorize(filter.dims()); + std::vector filter_shape = common::vectorize(filter.dims()); int r = xpu::transpose(dev_ctx.x_context(), filter_data, filter_data_tmp, @@ -215,10 +215,10 @@ void Conv3DKernel(const Context& dev_ctx, dev_ctx.template Alloc(out); phi::DDim in_data_dims = - phi::slice_ddim(input.dims(), 2, input.dims().size()); + common::slice_ddim(input.dims(), 2, input.dims().size()); phi::DDim filter_data_dims = - phi::slice_ddim(filter.dims(), 2, filter.dims().size()); - std::vector ksize = phi::vectorize(filter_data_dims); + common::slice_ddim(filter.dims(), 2, filter.dims().size()); + std::vector ksize = common::vectorize(filter_data_dims); UpdatePaddingAndDilation( &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize); @@ -248,7 +248,7 @@ void Conv3DKernel(const Context& dev_ctx, if (data_format == "NDHWC") { filter_data_tmp = RAII_GUARD.alloc(filter.numel()); PADDLE_ENFORCE_XDNN_NOT_NULL(filter_data_tmp); - std::vector filter_shape = phi::vectorize(filter.dims()); + std::vector filter_shape = common::vectorize(filter.dims()); int r = xpu::transpose(dev_ctx.x_context(), filter_data, filter_data_tmp, diff --git a/paddle/phi/kernels/xpu/conv_transpose_grad_kernel.cc b/paddle/phi/kernels/xpu/conv_transpose_grad_kernel.cc index f6090980745bf8..296e02c28016d1 100644 --- a/paddle/phi/kernels/xpu/conv_transpose_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/conv_transpose_grad_kernel.cc @@ -52,7 +52,7 @@ void Conv2dTransposeGradKernel(const Context& ctx, DDim in_data_dims = slice_ddim(x.dims(), 2, x.dims().size()); DDim filter_data_dims = slice_ddim(filter_.dims(), 2, filter_.dims().size()); - std::vector ksize = vectorize(filter_data_dims); + std::vector ksize = common::vectorize(filter_data_dims); UpdatePaddingAndDilation( &paddings_, &dilations_, padding_algorithm, in_data_dims, strides, ksize); diff --git a/paddle/phi/kernels/xpu/conv_transpose_kernel.cc b/paddle/phi/kernels/xpu/conv_transpose_kernel.cc index ae8d71a68300f8..2a1195e48c1f00 100644 --- a/paddle/phi/kernels/xpu/conv_transpose_kernel.cc +++ b/paddle/phi/kernels/xpu/conv_transpose_kernel.cc @@ -63,7 +63,7 @@ void Conv2dTransposeKernel(const Context& ctx, DDim in_data_dims = slice_ddim(x.dims(), 2, x.dims().size()); DDim filter_data_dims = slice_ddim(filter.dims(), 2, filter.dims().size()); - std::vector ksize = vectorize(filter_data_dims); + std::vector ksize = common::vectorize(filter_data_dims); std::vector paddings_ = paddings; std::vector dilations_ = dilations; diff --git a/paddle/phi/kernels/xpu/cross_entropy_kernel.cc b/paddle/phi/kernels/xpu/cross_entropy_kernel.cc index b678fde9a882b8..4a3e68169c6b2b 100644 --- a/paddle/phi/kernels/xpu/cross_entropy_kernel.cc +++ b/paddle/phi/kernels/xpu/cross_entropy_kernel.cc @@ -43,7 +43,7 @@ void CrossEntropyWithSoftmaxKernel(const Context& dev_ctx, dev_ctx.template Alloc(loss); const int n = phi::funcs::SizeToAxis(axis, logits.dims()); const int d = phi::funcs::SizeFromAxis(axis, logits.dims()); - std::vector logits_dims = phi::vectorize(logits.dims()); + std::vector logits_dims = common::vectorize(logits.dims()); int t = logits_dims[axis]; diff --git a/paddle/phi/kernels/xpu/cum_kernel.cc b/paddle/phi/kernels/xpu/cum_kernel.cc index cadacf102a8576..64750f9fc54709 100644 --- a/paddle/phi/kernels/xpu/cum_kernel.cc +++ b/paddle/phi/kernels/xpu/cum_kernel.cc @@ -40,7 +40,7 @@ void CumsumKernel(const Context& dev_ctx, } // prepare for call xdnn api - std::vector x_shape = phi::vectorize(x.dims()); + std::vector x_shape = common::vectorize(x.dims()); int axis_as_int = axis.to(); if (flatten) { diff --git a/paddle/phi/kernels/xpu/cumprod_kernel.cc b/paddle/phi/kernels/xpu/cumprod_kernel.cc index c9b771c7bd3ef4..da9cdf4cfa6acf 100644 --- a/paddle/phi/kernels/xpu/cumprod_kernel.cc +++ b/paddle/phi/kernels/xpu/cumprod_kernel.cc @@ -29,7 +29,7 @@ void CumprodKernel(const Context& dev_ctx, auto* x_data = x->data(); auto* out_data = dev_ctx.template Alloc(out); DDim shape = x->dims(); - std::vector xshape = phi::vectorize(shape); + std::vector xshape = common::vectorize(shape); if (dim < 0) dim += xshape.size(); if (shape.size() == 0) { diff --git a/paddle/phi/kernels/xpu/deformable_conv_grad_kernel.cc b/paddle/phi/kernels/xpu/deformable_conv_grad_kernel.cc index 9b975698e9a99c..45b1d33a9f7ffd 100644 --- a/paddle/phi/kernels/xpu/deformable_conv_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/deformable_conv_grad_kernel.cc @@ -75,7 +75,7 @@ void DeformableConvGradKernel(const Context& dev_ctx, "in deformable_conv_grad op.")); const int batch_size = static_cast(x.dims()[0]); - std::vector output_shape_vec(phi::vectorize(out_grad.dims())); + std::vector output_shape_vec(common::vectorize(out_grad.dims())); const T* output_grad_ptr = out_grad.data(); const T* input_ptr = x.data(); const T* filter_ptr = filter.data(); diff --git a/paddle/phi/kernels/xpu/deformable_conv_kernel.cc b/paddle/phi/kernels/xpu/deformable_conv_kernel.cc index 895af9486024da..29c5d6896f3ed1 100644 --- a/paddle/phi/kernels/xpu/deformable_conv_kernel.cc +++ b/paddle/phi/kernels/xpu/deformable_conv_kernel.cc @@ -54,7 +54,7 @@ void DeformableConvKernel(const Context& dev_ctx, "in deformable_conv op.")); const int batch_size = static_cast(x.dims()[0]); - std::vector output_shape_vec(phi::vectorize(out->dims())); + std::vector output_shape_vec(common::vectorize(out->dims())); const T* input_ptr = x.data(); const T* filter_ptr = filter.data(); diff --git a/paddle/phi/kernels/xpu/diag_kernel.cc b/paddle/phi/kernels/xpu/diag_kernel.cc index fe7495f471d09f..89c991742e83cc 100644 --- a/paddle/phi/kernels/xpu/diag_kernel.cc +++ b/paddle/phi/kernels/xpu/diag_kernel.cc @@ -31,8 +31,8 @@ void DiagKernel(const Context& dev_ctx, dev_ctx.template Alloc(out); auto* out_data = reinterpret_cast(out->data()); - auto x_shape = vectorize(x.dims()); - auto out_shape = vectorize(out->dims()); + auto x_shape = common::vectorize(x.dims()); + auto out_shape = common::vectorize(out->dims()); if (x.dims().size() == 0) { x_shape = std::vector({1}); diff --git a/paddle/phi/kernels/xpu/diagonal_kernel.cc b/paddle/phi/kernels/xpu/diagonal_kernel.cc index 708e4d0bd8c88d..eabed011deb5c3 100644 --- a/paddle/phi/kernels/xpu/diagonal_kernel.cc +++ b/paddle/phi/kernels/xpu/diagonal_kernel.cc @@ -28,8 +28,8 @@ void DiagonalKernel(const Context& dev_ctx, DenseTensor* out) { using XPUType = typename XPUTypeTrait::Type; T* out_data = dev_ctx.template Alloc(out); - std::vector xshape = phi::vectorize(x.dims()); - std::vector yshape = phi::vectorize(out->dims()); + std::vector xshape = common::vectorize(x.dims()); + std::vector yshape = common::vectorize(out->dims()); int r = xpu::diagonal(dev_ctx.x_context(), reinterpret_cast(x.data()), diff --git a/paddle/phi/kernels/xpu/elementwise_add_grad_kernel.cc b/paddle/phi/kernels/xpu/elementwise_add_grad_kernel.cc index 4bb12980ec9e31..14a8ad6d34634b 100644 --- a/paddle/phi/kernels/xpu/elementwise_add_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/elementwise_add_grad_kernel.cc @@ -61,7 +61,7 @@ void AddGradKernel(const Context& dev_ctx, } std::vector reduce_dims = funcs::GetReduceDim(dx->dims(), dz_dims, axis); - std::vector dz_vector = phi::vectorize(dz_dims); + std::vector dz_vector = common::vectorize(dz_dims); int ret = xpu::reduce_sum(dev_ctx.x_context(), @@ -86,7 +86,7 @@ void AddGradKernel(const Context& dev_ctx, } else { std::vector reduce_dims = funcs::GetReduceDim(dy->dims(), dz_dims, axis); - std::vector dz_vector = phi::vectorize(dz_dims); + std::vector dz_vector = common::vectorize(dz_dims); int ret = xpu::reduce_sum(dev_ctx.x_context(), reinterpret_cast(dz_data), diff --git a/paddle/phi/kernels/xpu/elementwise_add_kernel.cc b/paddle/phi/kernels/xpu/elementwise_add_kernel.cc index ad6796f81c5c45..569d967c1379b0 100644 --- a/paddle/phi/kernels/xpu/elementwise_add_kernel.cc +++ b/paddle/phi/kernels/xpu/elementwise_add_kernel.cc @@ -55,8 +55,8 @@ void GradAddXPUKernel(const Context& dev_ctx, using XPUType = typename XPUTypeTrait::Type; dev_ctx.template Alloc(out); - auto x_shape = phi::vectorize(x.dims()); - auto y_shape = phi::vectorize(y.dims()); + auto x_shape = common::vectorize(x.dims()); + auto y_shape = common::vectorize(y.dims()); int r = xpu::broadcast_add(dev_ctx.x_context(), reinterpret_cast(x.data()), reinterpret_cast(y.data()), diff --git a/paddle/phi/kernels/xpu/expand_as_kernel.cc b/paddle/phi/kernels/xpu/expand_as_kernel.cc index ae72b85bf06b15..0701294217f412 100644 --- a/paddle/phi/kernels/xpu/expand_as_kernel.cc +++ b/paddle/phi/kernels/xpu/expand_as_kernel.cc @@ -28,7 +28,7 @@ void ExpandAs(const Context& context, DenseTensor* out) { using XPUType = typename XPUTypeTrait::Type; auto in_dims = x.dims(); - auto vec_in_dims = phi::vectorize(in_dims); + auto vec_in_dims = common::vectorize(in_dims); auto diff = target_shape.size() - vec_in_dims.size(); vec_in_dims.insert(vec_in_dims.begin(), diff, 1); for (size_t i = 0; i < vec_in_dims.size(); ++i) { @@ -49,7 +49,7 @@ void ExpandAs(const Context& context, } } if (target_shape.size() == 0) { - phi::DDim out_dims = phi::make_ddim(target_shape); + phi::DDim out_dims = common::make_ddim(target_shape); out->Resize(out_dims); context.template Alloc(out); @@ -61,11 +61,11 @@ void ExpandAs(const Context& context, return; } - phi::DDim out_dims = phi::make_ddim(target_shape); + phi::DDim out_dims = common::make_ddim(target_shape); out->Resize(out_dims); context.template Alloc(out); auto& x_shape = vec_in_dims; - auto out_shape = phi::vectorize(out_dims); + auto out_shape = common::vectorize(out_dims); int r = XPU_SUCCESS; diff --git a/paddle/phi/kernels/xpu/expand_grad_kernel.cc b/paddle/phi/kernels/xpu/expand_grad_kernel.cc index 1665b8e31926c8..a346b07064fb7b 100644 --- a/paddle/phi/kernels/xpu/expand_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/expand_grad_kernel.cc @@ -30,8 +30,8 @@ void ExpandGradKernel(const Context& ctx, DenseTensor* in_grad) { using XPUType = typename XPUTypeTrait::Type; auto in_grad_data = ctx.template Alloc(in_grad); - auto out_grad_dims = phi::vectorize(out_grad.dims()); - auto in_grad_dims = phi::vectorize(in_grad->dims()); + auto out_grad_dims = common::vectorize(out_grad.dims()); + auto in_grad_dims = common::vectorize(in_grad->dims()); in_grad_dims.insert( in_grad_dims.begin(), out_grad.dims().size() - in_grad->dims().size(), 1); diff --git a/paddle/phi/kernels/xpu/expand_kernel.cc b/paddle/phi/kernels/xpu/expand_kernel.cc index d8808d3c3aae3a..f1bac016a17f14 100644 --- a/paddle/phi/kernels/xpu/expand_kernel.cc +++ b/paddle/phi/kernels/xpu/expand_kernel.cc @@ -27,7 +27,7 @@ void ExpandKernel(const Context& ctx, using XPUType = typename XPUTypeTrait::Type; auto in_dims = x.dims(); auto expand_shape = shape.GetData(); - auto vec_in_dims = phi::vectorize(in_dims); + auto vec_in_dims = common::vectorize(in_dims); auto diff = expand_shape.size() - vec_in_dims.size(); vec_in_dims.insert(vec_in_dims.begin(), diff, 1); std::vector final_expand_shape(vec_in_dims.size()); @@ -94,11 +94,11 @@ void ExpandKernel(const Context& ctx, shape_size, rank)); - DDim out_dims = phi::make_ddim(final_expand_shape); + DDim out_dims = common::make_ddim(final_expand_shape); out->Resize(out_dims); ctx.template Alloc(out); auto& x_shape = vec_in_dims; - auto out_shape = phi::vectorize(out_dims); + auto out_shape = common::vectorize(out_dims); if (shape_size == 0) { x_shape = {1}; out_shape = {1}; diff --git a/paddle/phi/kernels/xpu/fill_diagonal_tensor_kernel.cc b/paddle/phi/kernels/xpu/fill_diagonal_tensor_kernel.cc index 216205f251046f..9d7f435d583a06 100644 --- a/paddle/phi/kernels/xpu/fill_diagonal_tensor_kernel.cc +++ b/paddle/phi/kernels/xpu/fill_diagonal_tensor_kernel.cc @@ -35,8 +35,8 @@ void FillDiagonalTensorKernel(const Context &ctx, x.numel()); PADDLE_ENFORCE_XDNN_SUCCESS(r, "copy"); - std::vector xshape = phi::vectorize(x.dims()); - std::vector yshape = phi::vectorize(y.dims()); + std::vector xshape = common::vectorize(x.dims()); + std::vector yshape = common::vectorize(y.dims()); r = xpu::fill_diagonal_tensor(ctx.x_context(), reinterpret_cast(x.data()), diff --git a/paddle/phi/kernels/xpu/flip_kernel.cc b/paddle/phi/kernels/xpu/flip_kernel.cc index 3311fce88bc1d0..56a31197e56c79 100644 --- a/paddle/phi/kernels/xpu/flip_kernel.cc +++ b/paddle/phi/kernels/xpu/flip_kernel.cc @@ -40,7 +40,7 @@ void FlipKernel(const Context& dev_ctx, phi::Copy(dev_ctx, x, dev_ctx.GetPlace(), false, out); return; } - std::vector x_shape = phi::vectorize(x.dims()); + std::vector x_shape = common::vectorize(x.dims()); auto x_data = reinterpret_cast(x.data()); auto out_data = reinterpret_cast(out->data()); auto numel = x.numel(); diff --git a/paddle/phi/kernels/xpu/full_kernel.cc b/paddle/phi/kernels/xpu/full_kernel.cc index abbd28f74db709..1a780f132016d0 100644 --- a/paddle/phi/kernels/xpu/full_kernel.cc +++ b/paddle/phi/kernels/xpu/full_kernel.cc @@ -34,7 +34,7 @@ void FullKernel(const Context& dev_ctx, DataType dtype, DenseTensor* out) { using XPUInTDType = typename XPUTypeTrait::Type; - out->Resize(phi::make_ddim(shape.GetData())); + out->Resize(common::make_ddim(shape.GetData())); int numel = out->numel(); dev_ctx.template Alloc(out); auto out_data = reinterpret_cast(out->data()); @@ -109,7 +109,7 @@ void FullBatchSizeLikeKernel(const Context& dev_ctx, // set the correct batch size for the LoDTensor. auto odims = out->dims(); odims[out_batch_size_dim] = static_cast(x.lod().back().size()) - 1; - FullKernel(dev_ctx, phi::vectorize(odims), val, dtype, out); + FullKernel(dev_ctx, common::vectorize(odims), val, dtype, out); } FullLikeKernel(dev_ctx, x, val, dtype, out); } diff --git a/paddle/phi/kernels/xpu/gather_nd_grad_kernel.cc b/paddle/phi/kernels/xpu/gather_nd_grad_kernel.cc index d260e97ce30204..bab6e86ec1cde3 100644 --- a/paddle/phi/kernels/xpu/gather_nd_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/gather_nd_grad_kernel.cc @@ -48,8 +48,8 @@ void GatherNdGradKernel(const Context &ctx, 0, phi::errors::InvalidArgument("end_size[%d] should be 0", end_size)); // remain dim - auto remain_ddim = phi::slice_ddim(index_dims, 0, index_dims_size - 1); - int64_t remain_numel = phi::product(remain_ddim); + auto remain_ddim = common::slice_ddim(index_dims, 0, index_dims_size - 1); + int64_t remain_numel = common::product(remain_ddim); int64_t x_numel = x.numel(); int64_t out_grad_numel = out_grad.numel(); @@ -85,8 +85,8 @@ void GatherNdGradKernel(const Context &ctx, phi::DataType::INT32, phi::DataType::INT64)); - auto x_shape = phi::vectorize(x_grad->dims()); - auto index_shape = phi::vectorize(index.dims()); + auto x_shape = common::vectorize(x_grad->dims()); + auto index_shape = common::vectorize(index.dims()); if (index_shape.size() == 1) { index_shape.insert(index_shape.begin(), 1); } diff --git a/paddle/phi/kernels/xpu/gather_nd_kernel.cc b/paddle/phi/kernels/xpu/gather_nd_kernel.cc index d7250678ffdc42..8d9ca774088c6a 100644 --- a/paddle/phi/kernels/xpu/gather_nd_kernel.cc +++ b/paddle/phi/kernels/xpu/gather_nd_kernel.cc @@ -41,8 +41,8 @@ void GatherNdKernel(const Context &ctx, 0, phi::errors::InvalidArgument("end_size[%d] should be 0", end_size)); // remain dim - auto remain_ddim = phi::slice_ddim(index_dims, 0, index_dims_size - 1); - int64_t remain_numel = phi::product(remain_ddim); + auto remain_ddim = common::slice_ddim(index_dims, 0, index_dims_size - 1); + int64_t remain_numel = common::product(remain_ddim); int64_t x_numel = x.numel(); int64_t y_numel = out->numel(); @@ -78,8 +78,8 @@ void GatherNdKernel(const Context &ctx, DataType::INT32, DataType::INT64)); - auto x_shape = phi::vectorize(x.dims()); - auto index_shape = phi::vectorize(index.dims()); + auto x_shape = common::vectorize(x.dims()); + auto index_shape = common::vectorize(index.dims()); if (index_shape.size() == 1) { index_shape.insert(index_shape.begin(), 1); } diff --git a/paddle/phi/kernels/xpu/gaussian_kernel.cc b/paddle/phi/kernels/xpu/gaussian_kernel.cc index 2c4a29b6bfe515..99bde3096f6c11 100644 --- a/paddle/phi/kernels/xpu/gaussian_kernel.cc +++ b/paddle/phi/kernels/xpu/gaussian_kernel.cc @@ -28,7 +28,7 @@ void GaussianKernel(const Context& ctx, int seed, DataType dtype, DenseTensor* out) { - out->Resize(phi::make_ddim(shape.GetData())); + out->Resize(common::make_ddim(shape.GetData())); T* data = ctx.template Alloc(out); using XPUType = typename XPUTypeTrait::Type; int64_t real_seed = seed != 0 ? seed : ctx.GetGenerator()->Random64(); diff --git a/paddle/phi/kernels/xpu/generate_proposals_kernel.cc b/paddle/phi/kernels/xpu/generate_proposals_kernel.cc index 367ebfde95ae37..1b05c6e55c0016 100644 --- a/paddle/phi/kernels/xpu/generate_proposals_kernel.cc +++ b/paddle/phi/kernels/xpu/generate_proposals_kernel.cc @@ -88,16 +88,16 @@ std::pair ProposalForOneImage( SortDescending(dev_ctx, scores_slice, &index_sort, pre_nms_top_n); DenseTensor scores_sel, bbox_sel, anchor_sel, var_sel; - scores_sel.Resize(phi::make_ddim({index_sort.numel(), 1})); + scores_sel.Resize(common::make_ddim({index_sort.numel(), 1})); dev_ctx.template Alloc(&scores_sel); - bbox_sel.Resize(phi::make_ddim({index_sort.numel(), 4})); + bbox_sel.Resize(common::make_ddim({index_sort.numel(), 4})); dev_ctx.template Alloc(&bbox_sel); - anchor_sel.Resize(phi::make_ddim({index_sort.numel(), 4})); + anchor_sel.Resize(common::make_ddim({index_sort.numel(), 4})); dev_ctx.template Alloc(&anchor_sel); - var_sel.Resize(phi::make_ddim({index_sort.numel(), 4})); + var_sel.Resize(common::make_ddim({index_sort.numel(), 4})); dev_ctx.template Alloc(&var_sel); int r = xpu::gather(dev_ctx.x_context(), @@ -145,7 +145,7 @@ std::pair ProposalForOneImage( // 2. box decode and clipping DenseTensor proposals; - proposals.Resize(phi::make_ddim({index_sort.numel(), 4})); + proposals.Resize(common::make_ddim({index_sort.numel(), 4})); dev_ctx.template Alloc(&proposals); r = xpu::box_decoder(dev_ctx.x_context(), @@ -161,10 +161,10 @@ std::pair ProposalForOneImage( // 3. filter DenseTensor keep_index, keep_num_t; - keep_index.Resize(phi::make_ddim({pre_nms_num})); + keep_index.Resize(common::make_ddim({pre_nms_num})); dev_ctx.template Alloc(&keep_index); - keep_num_t.Resize(phi::make_ddim({1})); + keep_num_t.Resize(common::make_ddim({1})); dev_ctx.template Alloc(&keep_num_t); min_size = std::max(min_size, 1.0f); r = xpu::remove_small_boxes(dev_ctx.x_context(), @@ -191,17 +191,17 @@ std::pair ProposalForOneImage( // Handle the case when there is no keep index left if (keep_num == 0) { phi::funcs::SetConstant set_zero; - proposals_filter.Resize(phi::make_ddim({1, 4})); + proposals_filter.Resize(common::make_ddim({1, 4})); dev_ctx.template Alloc(&proposals_filter); - scores_filter.Resize(phi::make_ddim({1, 1})); + scores_filter.Resize(common::make_ddim({1, 1})); dev_ctx.template Alloc(&scores_filter); set_zero(dev_ctx, &proposals_filter, static_cast(0)); set_zero(dev_ctx, &scores_filter, static_cast(0)); return std::make_pair(proposals_filter, scores_filter); } - proposals_filter.Resize(phi::make_ddim({keep_num, 4})); + proposals_filter.Resize(common::make_ddim({keep_num, 4})); dev_ctx.template Alloc(&proposals_filter); - scores_filter.Resize(phi::make_ddim({keep_num, 1})); + scores_filter.Resize(common::make_ddim({keep_num, 1})); dev_ctx.template Alloc(&scores_filter); r = xpu::gather(dev_ctx.x_context(), proposals.data(), @@ -245,9 +245,9 @@ std::pair ProposalForOneImage( } DenseTensor scores_nms, proposals_nms; - proposals_nms.Resize(phi::make_ddim({keep_index.numel(), 4})); + proposals_nms.Resize(common::make_ddim({keep_index.numel(), 4})); dev_ctx.template Alloc(&proposals_nms); - scores_nms.Resize(phi::make_ddim({keep_index.numel(), 1})); + scores_nms.Resize(common::make_ddim({keep_index.numel(), 1})); dev_ctx.template Alloc(&scores_nms); r = xpu::gather(dev_ctx.x_context(), proposals_filter.data(), @@ -307,10 +307,10 @@ void GenerateProposalsKernel(const Context& dev_ctx, int w_bbox = bbox_dim[3]; DenseTensor bbox_deltas_swap, scores_swap; - bbox_deltas_swap.Resize(phi::make_ddim({num, h_bbox, w_bbox, c_bbox})); + bbox_deltas_swap.Resize(common::make_ddim({num, h_bbox, w_bbox, c_bbox})); dev_ctx.template Alloc(&bbox_deltas_swap); - scores_swap.Resize(phi::make_ddim({num, h_score, w_score, c_score})); + scores_swap.Resize(common::make_ddim({num, h_score, w_score, c_score})); dev_ctx.template Alloc(&scores_swap); std::vector axis = {0, 2, 3, 1}; @@ -330,14 +330,14 @@ void GenerateProposalsKernel(const Context& dev_ctx, DenseTensor tmp_anchors = anchors; DenseTensor tmp_variances = variances; - tmp_anchors.Resize(phi::make_ddim({tmp_anchors.numel() / 4, 4})); - tmp_variances.Resize(phi::make_ddim({tmp_variances.numel() / 4, 4})); + tmp_anchors.Resize(common::make_ddim({tmp_anchors.numel() / 4, 4})); + tmp_variances.Resize(common::make_ddim({tmp_variances.numel() / 4, 4})); // output - rpn_rois->Resize(phi::make_ddim({bbox_deltas.numel() / 4, 4})); + rpn_rois->Resize(common::make_ddim({bbox_deltas.numel() / 4, 4})); dev_ctx.template Alloc(rpn_rois); - rpn_roi_probs->Resize(phi::make_ddim({scores.numel(), 1})); + rpn_roi_probs->Resize(common::make_ddim({scores.numel(), 1})); dev_ctx.template Alloc(rpn_roi_probs); auto place = dev_ctx.GetPlace(); @@ -352,8 +352,9 @@ void GenerateProposalsKernel(const Context& dev_ctx, DenseTensor bbox_deltas_slice = bbox_deltas_swap.Slice(i, i + 1); DenseTensor scores_slice = scores_swap.Slice(i, i + 1); - bbox_deltas_slice.Resize(phi::make_ddim({h_bbox * w_bbox * c_bbox / 4, 4})); - scores_slice.Resize(phi::make_ddim({h_score * w_score * c_score, 1})); + bbox_deltas_slice.Resize( + common::make_ddim({h_bbox * w_bbox * c_bbox / 4, 4})); + scores_slice.Resize(common::make_ddim({h_score * w_score * c_score, 1})); std::pair tensor_pair = ProposalForOneImage(dev_ctx, @@ -392,7 +393,7 @@ void GenerateProposalsKernel(const Context& dev_ctx, } if (rpn_rois_num != nullptr) { - rpn_rois_num->Resize(phi::make_ddim({num})); + rpn_rois_num->Resize(common::make_ddim({num})); dev_ctx.template Alloc(rpn_rois_num); int* num_data = rpn_rois_num->data(); memory_utils::Copy( @@ -403,8 +404,8 @@ void GenerateProposalsKernel(const Context& dev_ctx, lod.emplace_back(offset); rpn_rois->set_lod(lod); rpn_roi_probs->set_lod(lod); - rpn_rois->Resize(phi::make_ddim({num_proposals, 4})); - rpn_roi_probs->Resize(phi::make_ddim({num_proposals, 1})); + rpn_rois->Resize(common::make_ddim({num_proposals, 4})); + rpn_roi_probs->Resize(common::make_ddim({num_proposals, 1})); } } // namespace phi diff --git a/paddle/phi/kernels/xpu/grid_sample_kernel.cc b/paddle/phi/kernels/xpu/grid_sample_kernel.cc index c374b2cc9dce68..5f6d4f31f67e77 100644 --- a/paddle/phi/kernels/xpu/grid_sample_kernel.cc +++ b/paddle/phi/kernels/xpu/grid_sample_kernel.cc @@ -14,8 +14,8 @@ #include "paddle/phi/kernels/grid_sample_kernel.h" +#include "paddle/common/layout.h" #include "paddle/phi/backends/xpu/enforce_xpu.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/core/kernel_registry.h" namespace phi { @@ -31,7 +31,7 @@ void GridSampleKernel(const Context& dev_ctx, // attrs // paddle.nn.functional.grid_sample(x, grid, mode='bilinear', // padding_mode='zeros', align_corners=True, name=None) - const std::string data_format = phi::DataLayoutToString(x.layout()); + const std::string data_format = common::DataLayoutToString(x.layout()); // attr to real param bool is_nearest_bool; @@ -85,7 +85,7 @@ void GridSampleKernel(const Context& dev_ctx, data_format)); } - out->Resize(make_ddim({n, c, out_h, out_w})); + out->Resize(common::make_ddim({n, c, out_h, out_w})); T* output_data = dev_ctx.template Alloc(out); int r = xpu::grid_sample(dev_ctx.x_context(), @@ -111,7 +111,7 @@ void GridSampleKernel(const Context& dev_ctx, int out_h = grid.dims()[2]; int out_w = grid.dims()[3]; - out->Resize(make_ddim({n, c, out_d, out_h, out_w})); + out->Resize(common::make_ddim({n, c, out_d, out_h, out_w})); T* output_data = dev_ctx.template Alloc(out); int r = xpu::grid_sample3d(dev_ctx.x_context(), diff --git a/paddle/phi/kernels/xpu/group_norm_grad_kernel.cc b/paddle/phi/kernels/xpu/group_norm_grad_kernel.cc index 08532e22d86c96..428b2699dc2753 100644 --- a/paddle/phi/kernels/xpu/group_norm_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/group_norm_grad_kernel.cc @@ -19,8 +19,8 @@ #include #include +#include "paddle/common/layout.h" #include "paddle/phi/backends/xpu/enforce_xpu.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/math_function.h" @@ -42,10 +42,10 @@ void GroupNormGradKernel(const Context& dev_ctx, DenseTensor* d_scale, DenseTensor* d_bias) { using XPUType = typename XPUTypeTrait::Type; - const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); + const DataLayout data_layout = common::StringToDataLayout(data_layout_str); const auto scale_ptr = scale.get_ptr(); const auto bias_ptr = bias.get_ptr(); - const auto x_dims = phi::vectorize(x.dims()); + const auto x_dims = common::vectorize(x.dims()); const int N = x_dims[0]; const bool channel_first = data_layout == DataLayout::kNCHW || data_layout == DataLayout::kNCDHW; diff --git a/paddle/phi/kernels/xpu/group_norm_kernel.cc b/paddle/phi/kernels/xpu/group_norm_kernel.cc index 7d82a5d18fee7e..01435f82b2cef8 100644 --- a/paddle/phi/kernels/xpu/group_norm_kernel.cc +++ b/paddle/phi/kernels/xpu/group_norm_kernel.cc @@ -19,8 +19,8 @@ #include #include +#include "paddle/common/layout.h" #include "paddle/phi/backends/xpu/enforce_xpu.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/core/kernel_registry.h" namespace phi { @@ -38,11 +38,11 @@ void GroupNormKernel(const Context& dev_ctx, DenseTensor* var) { using XPUType = typename XPUTypeTrait::Type; - const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); + const DataLayout data_layout = common::StringToDataLayout(data_layout_str); const auto scale_ptr = scale.get_ptr(); const auto bias_ptr = bias.get_ptr(); - const auto x_dims = phi::vectorize(x.dims()); + const auto x_dims = common::vectorize(x.dims()); const int N = x_dims[0]; const bool channel_first = data_layout == DataLayout::kNCHW || data_layout == DataLayout::kNCDHW; diff --git a/paddle/phi/kernels/xpu/index_put_kernel.cc b/paddle/phi/kernels/xpu/index_put_kernel.cc index 4197b9698cb3c1..60c91a8e5c83c7 100644 --- a/paddle/phi/kernels/xpu/index_put_kernel.cc +++ b/paddle/phi/kernels/xpu/index_put_kernel.cc @@ -43,20 +43,21 @@ void XPUDealWithIndices(const Context& dev_ctx, expanded_index = casted_index; } else { expanded_index.Resize(bd_dim); - ExpandKernel(dev_ctx, - casted_index, - IntArray(vectorize(bd_dim)), - &expanded_index); + ExpandKernel( + dev_ctx, + casted_index, + IntArray(common::vectorize(bd_dim)), + &expanded_index); } tmp_indices_v.emplace_back(expanded_index); } - auto bd_dim_vec = vectorize(bd_dim); + auto bd_dim_vec = common::vectorize(bd_dim); std::vector stacked_dim_vec(bd_dim.size() + 1); std::copy(bd_dim_vec.begin(), bd_dim_vec.end(), stacked_dim_vec.begin()); stacked_dim_vec.back() = int_indices_v.size(); - out->Resize(make_ddim(stacked_dim_vec)); + out->Resize(common::make_ddim(stacked_dim_vec)); std::vector tmp_indices_ptr(tmp_indices_v.size(), nullptr); @@ -109,13 +110,13 @@ void IndexPutKernel(const Context& dev_ctx, DenseTensor res_indices(DataType::INT64); // Broadcast and merge indices XPUDealWithIndices(dev_ctx, int_indices_v, bd_dims, &res_indices); - auto index_shape = vectorize(res_indices.dims()); - auto x_shape = vectorize(x.dims()); + auto index_shape = common::vectorize(res_indices.dims()); + auto x_shape = common::vectorize(x.dims()); const T* value_data = value.data(); // Broadcast value - auto value_shape = vectorize(value.dims()); + auto value_shape = common::vectorize(value.dims()); int64_t value_rank = bd_dims.size() + (x_shape.size() - int_indices_v.size()); std::vector value_shape_bd(value_rank); std::copy(index_shape.begin(), index_shape.end() - 1, value_shape_bd.begin()); @@ -126,7 +127,7 @@ void IndexPutKernel(const Context& dev_ctx, DenseTensor value_bd(value.dtype()); if (value_shape != value_shape_bd) { - value_bd.Resize(make_ddim(value_shape_bd)); + value_bd.Resize(common::make_ddim(value_shape_bd)); ExpandKernel( dev_ctx, value, IntArray(value_shape_bd), &value_bd); value_data = value_bd.data(); diff --git a/paddle/phi/kernels/xpu/index_sample_grad_kernel.cc b/paddle/phi/kernels/xpu/index_sample_grad_kernel.cc index 22c35ef46840fc..537d50701cd01f 100644 --- a/paddle/phi/kernels/xpu/index_sample_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/index_sample_grad_kernel.cc @@ -41,9 +41,9 @@ void IndexSampleGradKernel(const Context& ctx, XPUType* in_grad_data = ctx.template Alloc(in_grad); const XPUType* out_grad_data = out_grad.data(); - auto in_grad_shape = phi::vectorize(in_grad->dims()); - auto out_grad_shape = phi::vectorize(out_grad.dims()); - auto index_shape = phi::vectorize(index.dims()); + auto in_grad_shape = common::vectorize(in_grad->dims()); + auto out_grad_shape = common::vectorize(out_grad.dims()); + auto index_shape = common::vectorize(index.dims()); int r = xpu::constant( ctx.x_context(), in_grad_data, in_grad->numel(), static_cast(0)); diff --git a/paddle/phi/kernels/xpu/index_select_grad_kernel.cc b/paddle/phi/kernels/xpu/index_select_grad_kernel.cc index 14bfce38799f0c..7a3ef41b8261a1 100644 --- a/paddle/phi/kernels/xpu/index_select_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/index_select_grad_kernel.cc @@ -45,8 +45,8 @@ void IndexSelectGradKernel(const Context& ctx, T* x_grad_data = ctx.template Alloc(x_grad); const T* out_grad_data = out_grad.data(); - auto out_grad_shape = phi::vectorize(out_grad.dims()); - auto x_grad_shape = phi::vectorize(x_grad->dims()); + auto out_grad_shape = common::vectorize(out_grad.dims()); + auto x_grad_shape = common::vectorize(x_grad->dims()); int r = xpu::Error_t::SUCCESS; if (index_type == phi::DataType::INT32) { diff --git a/paddle/phi/kernels/xpu/index_select_kernel.cc b/paddle/phi/kernels/xpu/index_select_kernel.cc index 75c19aa028bce7..12395387eccf26 100644 --- a/paddle/phi/kernels/xpu/index_select_kernel.cc +++ b/paddle/phi/kernels/xpu/index_select_kernel.cc @@ -41,7 +41,7 @@ void IndexSelectKernel(const Context& ctx, phi::DataType::INT32, phi::DataType::INT64)); auto* in_data = x.data(); - std::vector in_shape = phi::vectorize(input_dim); + std::vector in_shape = common::vectorize(input_dim); int index_len = output->dims()[dim]; T* out_data = ctx.template Alloc(output); int r = 0; diff --git a/paddle/phi/kernels/xpu/interpolate_grad_kernel.cc b/paddle/phi/kernels/xpu/interpolate_grad_kernel.cc index 0c0570475f7de4..054856862bc15f 100644 --- a/paddle/phi/kernels/xpu/interpolate_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/interpolate_grad_kernel.cc @@ -13,9 +13,9 @@ // limitations under the License. #include "paddle/phi/kernels/interpolate_grad_kernel.h" +#include "paddle/common/layout.h" #include "paddle/phi/backends/xpu/enforce_xpu.h" #include "paddle/phi/backends/xpu/xpu_context.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/interpolate_function.h" #include "paddle/phi/kernels/funcs/math_function.h" @@ -39,7 +39,7 @@ void InterpolateGradKernel( bool align_corners, int align_mode, DenseTensor* x_grad) { - const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); + const DataLayout data_layout = common::StringToDataLayout(data_layout_str); int n, c, in_d, in_h, in_w; funcs::ExtractNCDWH(x.dims(), data_layout, &n, &c, &in_d, &in_h, &in_w); diff --git a/paddle/phi/kernels/xpu/interpolate_kernel.cc b/paddle/phi/kernels/xpu/interpolate_kernel.cc index 0bf7f6e1113fb8..712897ee90079c 100644 --- a/paddle/phi/kernels/xpu/interpolate_kernel.cc +++ b/paddle/phi/kernels/xpu/interpolate_kernel.cc @@ -14,9 +14,9 @@ #include "paddle/phi/kernels/interpolate_kernel.h" +#include "paddle/common/layout.h" #include "paddle/phi/backends/xpu/enforce_xpu.h" #include "paddle/phi/backends/xpu/xpu_context.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/interpolate_function.h" @@ -39,7 +39,7 @@ void InterpolateKernel( int align_mode, DenseTensor* output) { using XPUType = typename XPUTypeTrait::Type; - const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); + const DataLayout data_layout = common::StringToDataLayout(data_layout_str); int n, c, in_d, in_h, in_w; phi::funcs::ExtractNCDWH(x.dims(), data_layout, &n, &c, &in_d, &in_h, &in_w); diff --git a/paddle/phi/kernels/xpu/layer_norm_grad_kernel.cc b/paddle/phi/kernels/xpu/layer_norm_grad_kernel.cc index 6218db6ae2b4e4..35220636dffb68 100644 --- a/paddle/phi/kernels/xpu/layer_norm_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/layer_norm_grad_kernel.cc @@ -34,7 +34,7 @@ void LayerNormGradKernel(const Context& ctx, DenseTensor* bias_grad) { using XPUType = typename XPUTypeTrait::Type; const auto& x_dims = x.dims(); - auto matrix_dim = phi::flatten_to_2d(x_dims, begin_norm_axis); + auto matrix_dim = common::flatten_to_2d(x_dims, begin_norm_axis); int left = static_cast(matrix_dim[0]); int right = static_cast(matrix_dim[1]); const auto* x_data = x.data(); diff --git a/paddle/phi/kernels/xpu/layer_norm_kernel.cc b/paddle/phi/kernels/xpu/layer_norm_kernel.cc index 9c6c2ef727fd2c..7c9727bc121999 100644 --- a/paddle/phi/kernels/xpu/layer_norm_kernel.cc +++ b/paddle/phi/kernels/xpu/layer_norm_kernel.cc @@ -31,7 +31,7 @@ void LayerNormKernel(const Context& ctx, DenseTensor* variance) { using XPUType = typename XPUTypeTrait::Type; const auto& x_dims = x.dims(); - auto matrix_dim = phi::flatten_to_2d(x_dims, begin_norm_axis); + auto matrix_dim = common::flatten_to_2d(x_dims, begin_norm_axis); int left = static_cast(matrix_dim[0]); int right = static_cast(matrix_dim[1]); const auto* x_data = x.data(); diff --git a/paddle/phi/kernels/xpu/linspace_kernel.cc b/paddle/phi/kernels/xpu/linspace_kernel.cc index 15799b7d40a1af..38a6b176f796d1 100644 --- a/paddle/phi/kernels/xpu/linspace_kernel.cc +++ b/paddle/phi/kernels/xpu/linspace_kernel.cc @@ -67,7 +67,7 @@ void LinspaceKernel(const Context& ctx, "than 0, but received num is %d", num)); - out->Resize(phi::make_ddim({num})); + out->Resize(common::make_ddim({num})); T* out_data = ctx.template Alloc(out); int r = xpu::linspace(ctx.x_context(), diff --git a/paddle/phi/kernels/xpu/log_softmax_grad_kernel.cc b/paddle/phi/kernels/xpu/log_softmax_grad_kernel.cc index 949e40474c7351..bcf69f3966e587 100644 --- a/paddle/phi/kernels/xpu/log_softmax_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/log_softmax_grad_kernel.cc @@ -38,7 +38,7 @@ void LogSoftmaxGradKernel(const Context& dev_ctx, return; } - auto out_shape = phi::vectorize(out.dims()); + auto out_shape = common::vectorize(out.dims()); dev_ctx.template Alloc(x_grad); int r = xpu::log_softmax_grad( dev_ctx.x_context(), diff --git a/paddle/phi/kernels/xpu/log_softmax_kernel.cc b/paddle/phi/kernels/xpu/log_softmax_kernel.cc index 2ee093dbad44c5..429b53e717cffd 100644 --- a/paddle/phi/kernels/xpu/log_softmax_kernel.cc +++ b/paddle/phi/kernels/xpu/log_softmax_kernel.cc @@ -37,7 +37,7 @@ void LogSoftmaxKernel(const Context& dev_ctx, return; } if (x.numel() != 0) { - auto x_shape = phi::vectorize(x.dims()); + auto x_shape = common::vectorize(x.dims()); dev_ctx.template Alloc(out); if (axis < 0) axis += rank; int r = diff --git a/paddle/phi/kernels/xpu/logical_kernel.cc b/paddle/phi/kernels/xpu/logical_kernel.cc index 57dc8b4387489b..4f8b1b75d4cfd1 100644 --- a/paddle/phi/kernels/xpu/logical_kernel.cc +++ b/paddle/phi/kernels/xpu/logical_kernel.cc @@ -91,7 +91,7 @@ void LogicalBinaryKernel( bool is_x_need_broadcast = false; bool is_y_need_broadcast = false; - auto out_vec = phi::vectorize(out->dims()); + auto out_vec = common::vectorize(out->dims()); for (int i = 0; i < max_dim; i++) { if (x_dims_vec[i] != out_vec[i]) { is_x_need_broadcast = true; diff --git a/paddle/phi/kernels/xpu/masked_select_grad_kernel.cc b/paddle/phi/kernels/xpu/masked_select_grad_kernel.cc index 8e2f56adfa1414..6eed8cb524a752 100644 --- a/paddle/phi/kernels/xpu/masked_select_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/masked_select_grad_kernel.cc @@ -31,8 +31,8 @@ void MaskedSelectGradKernel(const Context& dev_ctx, auto* out_data = reinterpret_cast(dev_ctx.template Alloc(x_grad)); - auto mask_shape = phi::vectorize(mask.dims()); - auto xshape = phi::vectorize(x_grad->dims()); + auto mask_shape = common::vectorize(mask.dims()); + auto xshape = common::vectorize(x_grad->dims()); if (mask.dims().size() == 0) { mask_shape = std::vector({1}); } diff --git a/paddle/phi/kernels/xpu/masked_select_kernel.cc b/paddle/phi/kernels/xpu/masked_select_kernel.cc index e4af5b5a970970..62803fde27aa5c 100644 --- a/paddle/phi/kernels/xpu/masked_select_kernel.cc +++ b/paddle/phi/kernels/xpu/masked_select_kernel.cc @@ -59,8 +59,8 @@ void MaskedSelectKernel(const Context& dev_ctx, out->Resize(out_dim); auto out_data = reinterpret_cast(dev_ctx.template Alloc(out)); - auto input_shape = vectorize(input_dim); - auto mask_shape = vectorize(mask_dim); + auto input_shape = common::vectorize(input_dim); + auto mask_shape = common::vectorize(mask_dim); if (input_dim.size() == 0) { input_shape = std::vector({1}); } diff --git a/paddle/phi/kernels/xpu/matmul_grad_kernel.cc b/paddle/phi/kernels/xpu/matmul_grad_kernel.cc index c4fb311cbe5f0e..f94abe63000178 100644 --- a/paddle/phi/kernels/xpu/matmul_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/matmul_grad_kernel.cc @@ -114,8 +114,8 @@ void MatmulWithFlattenGradKernel(const Context& dev_ctx, auto y_matrix = y.dims().size() > 2 ? phi::ReshapeToMatrix(y, y_num_col_dims) : static_cast(y); DenseTensor dout_mat; - dout_mat.Resize({phi::flatten_to_2d(x.dims(), x_num_col_dims)[0], - phi::flatten_to_2d(y.dims(), y_num_col_dims)[1]}); + dout_mat.Resize({common::flatten_to_2d(x.dims(), x_num_col_dims)[0], + common::flatten_to_2d(y.dims(), y_num_col_dims)[1]}); if (x_grad != nullptr) { x_grad->set_lod(x.lod()); diff --git a/paddle/phi/kernels/xpu/meshgrid_kernel.cc b/paddle/phi/kernels/xpu/meshgrid_kernel.cc index 4a26a1a946f6dd..4e86e360e1c1d0 100644 --- a/paddle/phi/kernels/xpu/meshgrid_kernel.cc +++ b/paddle/phi/kernels/xpu/meshgrid_kernel.cc @@ -32,7 +32,7 @@ void MeshgridKernel(const Context& ctx, for (const auto& x : inputs) { x_list.push_back(reinterpret_cast(x->data())); - xshape_list.emplace_back(phi::vectorize(x->dims())); + xshape_list.emplace_back(common::vectorize(x->dims())); } for (auto& x : outputs) { ctx.template Alloc(x); diff --git a/paddle/phi/kernels/xpu/multiclass_nms3_kernel.cc b/paddle/phi/kernels/xpu/multiclass_nms3_kernel.cc index 442d972691f5a4..17746e4eeff0af 100644 --- a/paddle/phi/kernels/xpu/multiclass_nms3_kernel.cc +++ b/paddle/phi/kernels/xpu/multiclass_nms3_kernel.cc @@ -46,7 +46,7 @@ void MultiClassNMSKernel(const Context& ctx, bool return_index = index != nullptr; bool has_rois_num = rois_num.get_ptr() != nullptr; bool return_rois_num = nms_rois_num != nullptr; - auto score_dims = phi::vectorize(scores.dims()); + auto score_dims = common::vectorize(scores.dims()); auto score_size = score_dims.size(); bool is_lod = score_size == 2 ? true : false; diff --git a/paddle/phi/kernels/xpu/nll_loss_grad_kernel.cc b/paddle/phi/kernels/xpu/nll_loss_grad_kernel.cc index 1dbe679e67498e..ca7b1b23273b1e 100644 --- a/paddle/phi/kernels/xpu/nll_loss_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/nll_loss_grad_kernel.cc @@ -45,7 +45,7 @@ void NllLossGradKernel(const Context& dev_ctx, auto d_x_data = dev_ctx.template Alloc(d_x); auto d_x_dims = d_x->dims(); - std::vector d_x_shape = phi::vectorize(d_x_dims); + std::vector d_x_shape = common::vectorize(d_x_dims); auto weight_data = weight.get_ptr() ? weight.get_ptr()->data() : nullptr; diff --git a/paddle/phi/kernels/xpu/nll_loss_kernel.cc b/paddle/phi/kernels/xpu/nll_loss_kernel.cc index 2d9bf5baf57670..8bd95efe02b329 100644 --- a/paddle/phi/kernels/xpu/nll_loss_kernel.cc +++ b/paddle/phi/kernels/xpu/nll_loss_kernel.cc @@ -49,7 +49,7 @@ void NllLossRawKernel(const Context& dev_ctx, auto total_weight_data = dev_ctx.template Alloc(total_weight); auto x_dims = x.dims(); - std::vector x_shape = phi::vectorize(x_dims); + std::vector x_shape = common::vectorize(x_dims); int64_t reduction_id = 0; if (reduction == "none") { diff --git a/paddle/phi/kernels/xpu/nonzero_kernel.cc b/paddle/phi/kernels/xpu/nonzero_kernel.cc index fe241965fb5c69..f3d665afaa6643 100644 --- a/paddle/phi/kernels/xpu/nonzero_kernel.cc +++ b/paddle/phi/kernels/xpu/nonzero_kernel.cc @@ -41,14 +41,14 @@ void NonZeroKernel(const Context& dev_ctx, static_cast(true_num), sizeof(int32_t)); - out->Resize(phi::make_ddim({static_cast(true_num_cpu), rank})); + out->Resize(common::make_ddim({static_cast(true_num_cpu), rank})); auto* out_data = dev_ctx.template Alloc(out); if (true_num_cpu == 0) { return; } - auto condition_shape = phi::vectorize(dims); + auto condition_shape = common::vectorize(dims); ret = xpu::where( dev_ctx.x_context(), cond_data, out_data, condition_shape, true_num_cpu); PADDLE_ENFORCE_XDNN_SUCCESS(ret, "where"); diff --git a/paddle/phi/kernels/xpu/p_norm_grad_kernel.cc b/paddle/phi/kernels/xpu/p_norm_grad_kernel.cc index 883e3262a64876..083cf33d0600ac 100644 --- a/paddle/phi/kernels/xpu/p_norm_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/p_norm_grad_kernel.cc @@ -134,7 +134,7 @@ void PNormGradKernel(const Context& dev_ctx, PADDLE_ENFORCE_XDNN_SUCCESS(r, "abs"); DenseTensor porder_tensor; - phi::DDim pdim = phi::make_ddim({1}); + phi::DDim pdim = common::make_ddim({1}); porder_tensor.Resize(pdim); dev_ctx.template Alloc(&porder_tensor); r = xpu::constant( diff --git a/paddle/phi/kernels/xpu/p_norm_kernel.cc b/paddle/phi/kernels/xpu/p_norm_kernel.cc index 60abc59517b786..722fc4cc3aba05 100644 --- a/paddle/phi/kernels/xpu/p_norm_kernel.cc +++ b/paddle/phi/kernels/xpu/p_norm_kernel.cc @@ -125,7 +125,7 @@ void PNormKernel(const Context& dev_ctx, } else { DenseTensor porder_tensor; - phi::DDim pdim = phi::make_ddim({1}); + phi::DDim pdim = common::make_ddim({1}); porder_tensor.Resize(pdim); dev_ctx.template Alloc(&porder_tensor); r = xpu::constant( diff --git a/paddle/phi/kernels/xpu/pad3d_grad_kernel.cc b/paddle/phi/kernels/xpu/pad3d_grad_kernel.cc index d5960b02cf91ac..2599458e44733a 100644 --- a/paddle/phi/kernels/xpu/pad3d_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/pad3d_grad_kernel.cc @@ -34,7 +34,7 @@ void Pad3dGradKernel(const Context& dev_ctx, auto* d_out = &out_grad; auto* d_in = x_grad; - auto d_in_dims = vectorize(d_in->dims()); + auto d_in_dims = common::vectorize(d_in->dims()); const T* d_out_data = d_out->data(); T* d_in_data = dev_ctx.template Alloc(d_in); diff --git a/paddle/phi/kernels/xpu/pad_grad_kernel.cc b/paddle/phi/kernels/xpu/pad_grad_kernel.cc index 45fc3393412cd8..6eb2741ac5521d 100644 --- a/paddle/phi/kernels/xpu/pad_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/pad_grad_kernel.cc @@ -26,7 +26,7 @@ void PadGradKernel(const Context& dev_ctx, DenseTensor* d_x) { using XPUType = typename XPUTypeTrait::Type; std::vector pad_left, pad_right; - std::vector out_shape = vectorize(d_out.dims()); + std::vector out_shape = common::vectorize(d_out.dims()); dev_ctx.template Alloc(d_x); for (size_t i = 0; i < paddings.size() / 2; ++i) { diff --git a/paddle/phi/kernels/xpu/pad_kernel.cc b/paddle/phi/kernels/xpu/pad_kernel.cc index 899503e328607b..63906b8cb09351 100644 --- a/paddle/phi/kernels/xpu/pad_kernel.cc +++ b/paddle/phi/kernels/xpu/pad_kernel.cc @@ -27,7 +27,7 @@ void PadKernel(const Context& dev_ctx, using XPUType = typename XPUTypeTrait::Type; dev_ctx.template Alloc(out); std::vector pad_left, pad_right; - std::vector xshape = vectorize(x.dims()); + std::vector xshape = common::vectorize(x.dims()); for (size_t i = 0; i < paddings.size() / 2; ++i) { pad_left.push_back(paddings[i * 2]); diff --git a/paddle/phi/kernels/xpu/pow2_decay_with_linear_warmup_kernel.cc b/paddle/phi/kernels/xpu/pow2_decay_with_linear_warmup_kernel.cc index bfda5688bb3407..9195613c315f49 100644 --- a/paddle/phi/kernels/xpu/pow2_decay_with_linear_warmup_kernel.cc +++ b/paddle/phi/kernels/xpu/pow2_decay_with_linear_warmup_kernel.cc @@ -14,10 +14,10 @@ #include "paddle/phi/kernels/pow2_decay_with_linear_warmup_kernel.h" +#include "paddle/common/macros.h" #include "paddle/phi/backends/xpu/enforce_xpu.h" #include "paddle/phi/backends/xpu/xpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/core/macros.h" namespace phi { diff --git a/paddle/phi/kernels/xpu/randint_kernel.cc b/paddle/phi/kernels/xpu/randint_kernel.cc index ce86d7e77a9fd5..f284846637f09f 100644 --- a/paddle/phi/kernels/xpu/randint_kernel.cc +++ b/paddle/phi/kernels/xpu/randint_kernel.cc @@ -32,7 +32,7 @@ void RandintKernel(const Context& dev_ctx, DenseTensor* out) { int seed = 0; int64_t size = out->numel(); - out->Resize(phi::make_ddim(shape.GetData())); + out->Resize(common::make_ddim(shape.GetData())); T* data = dev_ctx.template Alloc(out); auto numel = out->numel(); std::shared_ptr engine; diff --git a/paddle/phi/kernels/xpu/randperm_kernel.cc b/paddle/phi/kernels/xpu/randperm_kernel.cc index b5ba469b837813..a90691c14e7028 100644 --- a/paddle/phi/kernels/xpu/randperm_kernel.cc +++ b/paddle/phi/kernels/xpu/randperm_kernel.cc @@ -43,7 +43,7 @@ void RandpermKernel(const Context& dev_ctx, } else { dev_ctx.template Alloc(out); phi::DenseTensor tmp_tensor; - tmp_tensor.Resize(phi::make_ddim({n})); + tmp_tensor.Resize(common::make_ddim({n})); T* tmp_data = dev_ctx.template HostAlloc(&tmp_tensor); for (int i = 0; i < n; ++i) { tmp_data[i] = static_cast(i); diff --git a/paddle/phi/kernels/xpu/reduce_mean_grad_kernel.cc b/paddle/phi/kernels/xpu/reduce_mean_grad_kernel.cc index afe84e43d99d14..c5b0950552629d 100644 --- a/paddle/phi/kernels/xpu/reduce_mean_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/reduce_mean_grad_kernel.cc @@ -39,8 +39,8 @@ void ReduceMeanGradKernel(const Context& dev_ctx, auto reduce_dims = dims_arr.GetData(); - std::vector xdims = vectorize(x.dims()); - std::vector ydims = vectorize(out_grad.dims()); + std::vector xdims = common::vectorize(x.dims()); + std::vector ydims = common::vectorize(out_grad.dims()); int reduce_numel = 1; if (reduce_all) { diff --git a/paddle/phi/kernels/xpu/scatter_kernel.cc b/paddle/phi/kernels/xpu/scatter_kernel.cc index 9052cd5b5f5f0d..fc13bd92b90dec 100644 --- a/paddle/phi/kernels/xpu/scatter_kernel.cc +++ b/paddle/phi/kernels/xpu/scatter_kernel.cc @@ -79,8 +79,8 @@ void ScatterKernel(const Context &ctx, } int dim0 = static_cast(x.dims()[0]); - int dim1 = - static_cast(phi::product(phi::slice_ddim(x_dims, 1, x_dims.size()))); + int dim1 = static_cast( + common::product(common::slice_ddim(x_dims, 1, x_dims.size()))); DenseTensor indices_cpu(index.type()); phi::Copy(ctx, index, phi::CPUPlace(), true, &indices_cpu); diff --git a/paddle/phi/kernels/xpu/scatter_nd_add_grad_kernel.cc b/paddle/phi/kernels/xpu/scatter_nd_add_grad_kernel.cc index a0fd86fcc3208d..37e6e91ea779e3 100644 --- a/paddle/phi/kernels/xpu/scatter_nd_add_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/scatter_nd_add_grad_kernel.cc @@ -52,8 +52,8 @@ void ScatterNdAddGradKernel(const Context &ctx, errors::InvalidArgument( "Size of the last dim of the index tensor [%d] should be 0", end_size)); - auto remain_dims = phi::slice_ddim(index_dims, 0, index_dims_size - 1); - int64_t remain_numel = phi::product(remain_dims); + auto remain_dims = common::slice_ddim(index_dims, 0, index_dims_size - 1); + int64_t remain_numel = common::product(remain_dims); int64_t updates_grad_numel = updates_grad->numel(); int64_t out_grad_numel = out_grad.numel(); PADDLE_ENFORCE_EQ( @@ -73,11 +73,11 @@ void ScatterNdAddGradKernel(const Context &ctx, return; } - auto index_shape_vec = vectorize(index.dims()); + auto index_shape_vec = common::vectorize(index.dims()); if (index_shape_vec.size() == 1) { index_shape_vec.insert(index_shape_vec.begin(), 1); } - auto out_grad_shape_vec = vectorize(out_grad.dims()); + auto out_grad_shape_vec = common::vectorize(out_grad.dims()); xpu::VectorParam out_grad_shape_param = { out_grad_shape_vec.data(), static_cast(out_grad_shape_vec.size()), diff --git a/paddle/phi/kernels/xpu/scatter_nd_add_kernel.cc b/paddle/phi/kernels/xpu/scatter_nd_add_kernel.cc index 69e40994eb92de..9b826aecdb8b39 100644 --- a/paddle/phi/kernels/xpu/scatter_nd_add_kernel.cc +++ b/paddle/phi/kernels/xpu/scatter_nd_add_kernel.cc @@ -37,7 +37,7 @@ void ScatterNdAddKernel(const Context &ctx, int64_t index_dims_size = index.dims().size(); int loop_time = static_cast( index_dims_size == 0 ? 1 - : phi::product(phi::slice_ddim( + : common::product(common::slice_ddim( index.dims(), 0, index_dims_size - 1))); for (int i = 0; i < loop_time; i++) { @@ -64,8 +64,8 @@ void ScatterNdAddKernel(const Context &ctx, phi::DataType::INT32, phi::DataType::INT64)); - auto x_shape = phi::vectorize(x.dims()); - auto index_shape = phi::vectorize(index.dims()); + auto x_shape = common::vectorize(x.dims()); + auto index_shape = common::vectorize(index.dims()); if (index_shape.size() == 1) { index_shape.insert(index_shape.begin(), 1); } diff --git a/paddle/phi/kernels/xpu/set_value_grad_kernel.cc b/paddle/phi/kernels/xpu/set_value_grad_kernel.cc index d80a2a97da8cfe..d1ad332cd626c5 100644 --- a/paddle/phi/kernels/xpu/set_value_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/set_value_grad_kernel.cc @@ -67,7 +67,7 @@ void SetValueGradImpl(const Context& dev_ctx, "The input of `set_value_grad`(out_grad) has not been initialized")); auto in_dims = out_grad.dims(); - auto in_dims_vector = phi::vectorize(in_dims); + auto in_dims_vector = common::vectorize(in_dims); std::vector decrease_axis_int32(decrease_axes.begin(), decrease_axes.end()); @@ -88,7 +88,7 @@ void SetValueGradImpl(const Context& dev_ctx, axes.size(), false); - DDim out_dims(phi::make_ddim(out_dims_vector)); + DDim out_dims(common::make_ddim(out_dims_vector)); std::vector reverse_vector(starts_local.size(), 0); funcs::StridedSliceFunctor(starts_local.data(), @@ -159,7 +159,7 @@ void SetValueGradImpl(const Context& dev_ctx, reinterpret_cast(tmp.data()), reinterpret_cast(x_grad->data()), out_dims_vector, - phi::vectorize(x_grad->dims()), + common::vectorize(x_grad->dims()), starts_indices, ends_indices, steps_indices); @@ -265,11 +265,11 @@ void SetValueGradImpl(const Context& dev_ctx, Full(dev_ctx, {fake_value_grad_dims.Get(), fake_value_grad_dims.size()}, static_cast(0)); - auto value_grad_dims_vec = phi::vectorize(value_grad_dims); + auto value_grad_dims_vec = common::vectorize(value_grad_dims); // for value is a 0-D Tensor if (value_grad_dims.size() == 0) { - value_grad_dims_vec = - phi::vectorize(phi::make_ddim(std::vector({1}))); + value_grad_dims_vec = common::vectorize( + common::make_ddim(std::vector({1}))); } for (auto offset : offsets) { for (int i = 0; i < out_dims_size; i++) { @@ -279,7 +279,7 @@ void SetValueGradImpl(const Context& dev_ctx, reinterpret_cast(tmp.data()), reinterpret_cast(tmp2.data()), out_dims_vector, - phi::vectorize(offset), + common::vectorize(offset), slice_end); PADDLE_ENFORCE_XDNN_SUCCESS(r, "slice"); r = xpu::broadcast_add( diff --git a/paddle/phi/kernels/xpu/set_value_kernel.cc b/paddle/phi/kernels/xpu/set_value_kernel.cc index a706ef00b9a41d..c457a6d21fd8a1 100644 --- a/paddle/phi/kernels/xpu/set_value_kernel.cc +++ b/paddle/phi/kernels/xpu/set_value_kernel.cc @@ -122,7 +122,7 @@ void SetValueImpl(const Context& dev_ctx, none_axes_cur++; } - slice_dims_for_assign = phi::make_ddim(slice_dims_with_none); + slice_dims_for_assign = common::make_ddim(slice_dims_with_none); } // Here copy data from input to avoid data loss at PE and Graph level. @@ -146,7 +146,7 @@ void SetValueImpl(const Context& dev_ctx, PADDLE_ENFORCE_XDNN_SUCCESS(r, "copy"); xpu::ctx_guard RAII_GUARD(dev_ctx.x_context()); - int64_t slice_numels = phi::product(slice_dims); + int64_t slice_numels = common::product(slice_dims); XPUType* slice_data = RAII_GUARD.alloc_l3_or_gm(slice_numels); int in_size = in_dims.size(); @@ -227,8 +227,8 @@ void SetValueImpl(const Context& dev_ctx, } } - auto out_shape = phi::vectorize(out->dims()); - auto slice_shape = phi::vectorize(slice_dims); + auto out_shape = common::vectorize(out->dims()); + auto slice_shape = common::vectorize(slice_dims); if (need_flip) { r = xpu::flip(dev_ctx.x_context(), @@ -407,7 +407,7 @@ void SetValueKernel(const Context& dev_ctx, phi::CPUPlace(), value_data_uint8_cpu, values_length); - auto value_dims = phi::make_ddim(shape); + auto value_dims = common::make_ddim(shape); SetValueKernelImpl(dev_ctx, x, diff --git a/paddle/phi/kernels/xpu/split_kernel.cc b/paddle/phi/kernels/xpu/split_kernel.cc index e3aeb7ffdfbe32..eded1bc67c2fec 100644 --- a/paddle/phi/kernels/xpu/split_kernel.cc +++ b/paddle/phi/kernels/xpu/split_kernel.cc @@ -28,7 +28,7 @@ void SplitKernel(const Context& dev_ctx, using XPUType = typename XPUTypeTrait::Type; int axis = axis_scalar.to(); auto in_dims = x.dims(); - auto input_shape = vectorize(in_dims); + auto input_shape = common::vectorize(in_dims); std::vector out_ptrs; std::vector split_lists; for (size_t j = 0; j < outs.size(); ++j) { diff --git a/paddle/phi/kernels/xpu/stack_grad_kernel.cc b/paddle/phi/kernels/xpu/stack_grad_kernel.cc index cbc91e13dfc64e..be82010d696ce5 100644 --- a/paddle/phi/kernels/xpu/stack_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/stack_grad_kernel.cc @@ -29,7 +29,7 @@ void StackGradKernel(const Context& dev_ctx, auto dy_dims = out.dims(); if (axis < 0) axis += dy_dims.size(); - auto dy_shape = phi::vectorize(dy_dims); + auto dy_shape = common::vectorize(dy_dims); std::vector dx_dims_list(x_grad.size(), 1); std::vector dx_lists; diff --git a/paddle/phi/kernels/xpu/stride_slice_kernel.cc b/paddle/phi/kernels/xpu/stride_slice_kernel.cc index 2f026bae02fe45..5aee59729b52ef 100644 --- a/paddle/phi/kernels/xpu/stride_slice_kernel.cc +++ b/paddle/phi/kernels/xpu/stride_slice_kernel.cc @@ -49,7 +49,7 @@ void StridedSliceRawKernel(const Context& dev_ctx, out_dims_vector.data(), axes.size(), false); - DDim out_dims(phi::make_ddim(out_dims_vector)); + DDim out_dims(common::make_ddim(out_dims_vector)); out->Resize(out_dims); dev_ctx.template Alloc(out); diff --git a/paddle/phi/kernels/xpu/strided_copy_kernel.cc b/paddle/phi/kernels/xpu/strided_copy_kernel.cc index 4721d24f4e1194..4e37d439123c63 100644 --- a/paddle/phi/kernels/xpu/strided_copy_kernel.cc +++ b/paddle/phi/kernels/xpu/strided_copy_kernel.cc @@ -27,8 +27,8 @@ void StridedCopyKernel(const Context& dev_ctx, int64_t offset, DenseTensor* out) { phi::DenseTensorMeta meta = input.meta(); - meta.strides = phi::make_ddim(out_stride); - meta.dims = phi::make_ddim(dims); + meta.strides = common::make_ddim(out_stride); + meta.dims = common::make_ddim(dims); meta.offset = offset; out->set_meta(meta); @@ -61,10 +61,10 @@ void StridedCopyKernel(const Context& dev_ctx, r = xpu::strided_copy(dev_ctx.x_context(), input_data, output_data, - phi::vectorize(input.dims()), - phi::vectorize(out->dims()), - phi::vectorize(input.strides()), - phi::vectorize(out->strides())); + common::vectorize(input.dims()), + common::vectorize(out->dims()), + common::vectorize(input.strides()), + common::vectorize(out->strides())); } } else if (std::is_same::value) { auto input_data = reinterpret_cast(input.data()); @@ -76,13 +76,14 @@ void StridedCopyKernel(const Context& dev_ctx, if (input.numel() == 1) { r = xpu::copy(dev_ctx.x_context(), input_data, output_data, 1); } else { - r = xpu::strided_copy(dev_ctx.x_context(), - input_data, - output_data, - phi::vectorize(input.dims()), - phi::vectorize(out->dims()), - phi::vectorize(input.strides()), - phi::vectorize(out->strides())); + r = xpu::strided_copy( + dev_ctx.x_context(), + input_data, + output_data, + common::vectorize(input.dims()), + common::vectorize(out->dims()), + common::vectorize(input.strides()), + common::vectorize(out->strides())); } } else if (std::is_same::value) { using XPUFLOAT16 = typename XPUTypeTrait::Type; @@ -100,10 +101,10 @@ void StridedCopyKernel(const Context& dev_ctx, dev_ctx.x_context(), input_data, output_data, - phi::vectorize(input.dims()), - phi::vectorize(out->dims()), - phi::vectorize(input.strides()), - phi::vectorize(out->strides())); + common::vectorize(input.dims()), + common::vectorize(out->dims()), + common::vectorize(input.strides()), + common::vectorize(out->strides())); } } else if (std::is_same::value) { using XPUFLOAT16 = typename XPUTypeTrait::Type; @@ -121,10 +122,10 @@ void StridedCopyKernel(const Context& dev_ctx, dev_ctx.x_context(), input_data, output_data, - phi::vectorize(input.dims()), - phi::vectorize(out->dims()), - phi::vectorize(input.strides()), - phi::vectorize(out->strides())); + common::vectorize(input.dims()), + common::vectorize(out->dims()), + common::vectorize(input.strides()), + common::vectorize(out->strides())); } } else if (std::is_same::value) { using XPUFLOAT16 = typename XPUTypeTrait::Type; @@ -142,10 +143,10 @@ void StridedCopyKernel(const Context& dev_ctx, dev_ctx.x_context(), input_data, output_data, - phi::vectorize(input.dims()), - phi::vectorize(out->dims()), - phi::vectorize(input.strides()), - phi::vectorize(out->strides())); + common::vectorize(input.dims()), + common::vectorize(out->dims()), + common::vectorize(input.strides()), + common::vectorize(out->strides())); } } else if (std::is_same::value) { auto input_data = reinterpret_cast(input.data()); @@ -160,10 +161,10 @@ void StridedCopyKernel(const Context& dev_ctx, r = xpu::strided_copy(dev_ctx.x_context(), input_data, output_data, - phi::vectorize(input.dims()), - phi::vectorize(out->dims()), - phi::vectorize(input.strides()), - phi::vectorize(out->strides())); + common::vectorize(input.dims()), + common::vectorize(out->dims()), + common::vectorize(input.strides()), + common::vectorize(out->strides())); } } else if (std::is_same::value) { auto input_data = reinterpret_cast(input.data()); @@ -178,10 +179,10 @@ void StridedCopyKernel(const Context& dev_ctx, r = xpu::strided_copy(dev_ctx.x_context(), input_data, output_data, - phi::vectorize(input.dims()), - phi::vectorize(out->dims()), - phi::vectorize(input.strides()), - phi::vectorize(out->strides())); + common::vectorize(input.dims()), + common::vectorize(out->dims()), + common::vectorize(input.strides()), + common::vectorize(out->strides())); } } else if (std::is_same::value) { auto input_data = reinterpret_cast(input.data()); @@ -193,13 +194,14 @@ void StridedCopyKernel(const Context& dev_ctx, if (input.numel() == 1) { r = xpu::copy(dev_ctx.x_context(), input_data, output_data, 1); } else { - r = xpu::strided_copy(dev_ctx.x_context(), - input_data, - output_data, - phi::vectorize(input.dims()), - phi::vectorize(out->dims()), - phi::vectorize(input.strides()), - phi::vectorize(out->strides())); + r = xpu::strided_copy( + dev_ctx.x_context(), + input_data, + output_data, + common::vectorize(input.dims()), + common::vectorize(out->dims()), + common::vectorize(input.strides()), + common::vectorize(out->strides())); } } else if (std::is_same::value) { auto input_data = reinterpret_cast(input.data()); @@ -211,13 +213,14 @@ void StridedCopyKernel(const Context& dev_ctx, if (input.numel() == 1) { r = xpu::copy(dev_ctx.x_context(), input_data, output_data, 1); } else { - r = xpu::strided_copy(dev_ctx.x_context(), - input_data, - output_data, - phi::vectorize(input.dims()), - phi::vectorize(out->dims()), - phi::vectorize(input.strides()), - phi::vectorize(out->strides())); + r = xpu::strided_copy( + dev_ctx.x_context(), + input_data, + output_data, + common::vectorize(input.dims()), + common::vectorize(out->dims()), + common::vectorize(input.strides()), + common::vectorize(out->strides())); } } else if (std::is_same::value) { auto input_data = reinterpret_cast(input.data()); @@ -233,10 +236,10 @@ void StridedCopyKernel(const Context& dev_ctx, r = xpu::strided_copy(dev_ctx.x_context(), input_data, output_data, - phi::vectorize(input.dims()), - phi::vectorize(out->dims()), - phi::vectorize(input.strides()), - phi::vectorize(out->strides())); + common::vectorize(input.dims()), + common::vectorize(out->dims()), + common::vectorize(input.strides()), + common::vectorize(out->strides())); } } else { PADDLE_THROW(phi::errors::InvalidArgument( diff --git a/paddle/phi/kernels/xpu/take_along_axis_kernel.cc b/paddle/phi/kernels/xpu/take_along_axis_kernel.cc index db98be61206742..e55604e768b9af 100644 --- a/paddle/phi/kernels/xpu/take_along_axis_kernel.cc +++ b/paddle/phi/kernels/xpu/take_along_axis_kernel.cc @@ -16,8 +16,8 @@ #include "glog/logging.h" +#include "paddle/common/layout.h" #include "paddle/phi/backends/xpu/enforce_xpu.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/core/kernel_registry.h" namespace phi { diff --git a/paddle/phi/kernels/xpu/temporal_shift_grad_kernel.cc b/paddle/phi/kernels/xpu/temporal_shift_grad_kernel.cc index bba2442fdc9e40..37ecad59285ee3 100644 --- a/paddle/phi/kernels/xpu/temporal_shift_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/temporal_shift_grad_kernel.cc @@ -14,8 +14,8 @@ #include "paddle/phi/kernels/temporal_shift_grad_kernel.h" +#include "paddle/common/layout.h" #include "paddle/phi/backends/xpu/enforce_xpu.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/axis_utils.h" @@ -31,7 +31,7 @@ void TemporalShiftGradKernel(const Context& dev_ctx, auto* input_grad = x_grad; auto* output_grad = &out_grad; int t = seg_num; - const DataLayout data_layout = phi::StringToDataLayout(data_format_str); + const DataLayout data_layout = common::StringToDataLayout(data_format_str); const int nt = output_grad->dims()[0]; const int n = nt / t; @@ -43,8 +43,8 @@ void TemporalShiftGradKernel(const Context& dev_ctx, : output_grad->dims()[2]); DDim in_grad_dims = - (data_layout == DataLayout::kNCHW ? phi::make_ddim({nt, c, h, w}) - : phi::make_ddim({nt, h, w, c})); + (data_layout == DataLayout::kNCHW ? common::make_ddim({nt, c, h, w}) + : common::make_ddim({nt, h, w, c})); const T* output_grad_data = output_grad->data(); input_grad->Resize(in_grad_dims); T* input_grad_data = dev_ctx.template Alloc(input_grad); diff --git a/paddle/phi/kernels/xpu/temporal_shift_kernel.cc b/paddle/phi/kernels/xpu/temporal_shift_kernel.cc index 3da9873d2c01ac..61ee3b555f0dee 100644 --- a/paddle/phi/kernels/xpu/temporal_shift_kernel.cc +++ b/paddle/phi/kernels/xpu/temporal_shift_kernel.cc @@ -14,8 +14,8 @@ #include "paddle/phi/kernels/temporal_shift_kernel.h" +#include "paddle/common/layout.h" #include "paddle/phi/backends/xpu/enforce_xpu.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/axis_utils.h" @@ -31,7 +31,7 @@ void TemporalShiftKernel(const Context& dev_ctx, auto* input = &x; auto* output = out; int t = seg_num; - const DataLayout data_layout = phi::StringToDataLayout(data_format_str); + const DataLayout data_layout = common::StringToDataLayout(data_format_str); const int nt = input->dims()[0]; const int n = nt / t; @@ -43,8 +43,8 @@ void TemporalShiftKernel(const Context& dev_ctx, (data_layout == DataLayout::kNCHW ? input->dims()[3] : input->dims()[2]); DDim out_dims = - (data_layout == DataLayout::kNCHW ? phi::make_ddim({nt, c, h, w}) - : phi::make_ddim({nt, h, w, c})); + (data_layout == DataLayout::kNCHW ? common::make_ddim({nt, c, h, w}) + : common::make_ddim({nt, h, w, c})); const T* input_data = input->data(); output->Resize(out_dims); T* output_data = dev_ctx.template Alloc(output); diff --git a/paddle/phi/kernels/xpu/tile_grad_kernel.cc b/paddle/phi/kernels/xpu/tile_grad_kernel.cc index c9dce98d192343..b131c168549607 100644 --- a/paddle/phi/kernels/xpu/tile_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/tile_grad_kernel.cc @@ -26,7 +26,7 @@ void TileGradKernel(const Context& dev_ctx, const IntArray& repeat_times, DenseTensor* x_grad) { auto x_dims = x.dims(); - auto vec_x_dims = phi::vectorize(x_dims); + auto vec_x_dims = common::vectorize(x_dims); auto repeat_times_data = repeat_times.GetData(); if (repeat_times_data.size() < vec_x_dims.size()) { int diff = vec_x_dims.size() - repeat_times_data.size(); diff --git a/paddle/phi/kernels/xpu/tile_kernel.cc b/paddle/phi/kernels/xpu/tile_kernel.cc index f6bc716a7d58a7..cce230c970bf97 100644 --- a/paddle/phi/kernels/xpu/tile_kernel.cc +++ b/paddle/phi/kernels/xpu/tile_kernel.cc @@ -74,7 +74,7 @@ void TileKernel(const Context& dev_ctx, "be positive integers, but the value received is %d.", repeat_times[i])); } - auto vec_in_dims = phi::vectorize(in_dims); + auto vec_in_dims = common::vectorize(in_dims); if (repeat_times.size() < vec_in_dims.size()) { int diff = vec_in_dims.size() - repeat_times.size(); repeat_times.insert(repeat_times.begin(), diff, 1); @@ -91,13 +91,13 @@ void TileKernel(const Context& dev_ctx, vec_in_dims.size(), repeat_times.size())); - DDim new_in_dims = phi::make_ddim(vec_in_dims); + DDim new_in_dims = common::make_ddim(vec_in_dims); DDim out_dims(new_in_dims); for (size_t i = 0; i < repeat_times.size(); ++i) { out_dims[i] *= repeat_times[i]; } - auto vec_out_dims = phi::vectorize(out_dims); + auto vec_out_dims = common::vectorize(out_dims); out->Resize(out_dims); dev_ctx.template Alloc(out); diff --git a/paddle/phi/kernels/xpu/top_k_kernel.cc b/paddle/phi/kernels/xpu/top_k_kernel.cc index a3a37db5e6e0fb..146a09ef410f55 100644 --- a/paddle/phi/kernels/xpu/top_k_kernel.cc +++ b/paddle/phi/kernels/xpu/top_k_kernel.cc @@ -65,7 +65,7 @@ void TopkKernel(const Context& dev_ctx, PADDLE_ENFORCE_XDNN_NOT_NULL(indices_int_data); const size_t row = - phi::product(phi::slice_ddim(in_dims, 0, in_dims.size() - 1)); + common::product(common::slice_ddim(in_dims, 0, in_dims.size() - 1)); const size_t col = in_dims[in_dims.size() - 1]; int r = xpu::sorted_topk(dev_ctx.x_context(), reinterpret_cast(in_data), @@ -131,8 +131,8 @@ void TopkKernel(const Context& dev_ctx, int32_t* trans_idx_int32_data = RAII_GUARD.alloc_l3_or_gm(out->numel()); PADDLE_ENFORCE_XDNN_NOT_NULL(trans_idx_int32_data); - const size_t row = - phi::product(phi::slice_ddim(trans_dims, 0, trans_dims.size() - 1)); + const size_t row = common::product( + common::slice_ddim(trans_dims, 0, trans_dims.size() - 1)); const size_t col = trans_dims[trans_dims.size() - 1]; // Do top k on transposed input diff --git a/paddle/phi/kernels/xpu/transpose_grad_kernel.cc b/paddle/phi/kernels/xpu/transpose_grad_kernel.cc index 71b2187bddce10..ab6be8c3347cac 100644 --- a/paddle/phi/kernels/xpu/transpose_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/transpose_grad_kernel.cc @@ -48,7 +48,7 @@ void TransposeGradKernel(const Context& dev_ctx, reversed_axis[formated_axis[i]] = i; } - std::vector out_grad_dim_vec = phi::vectorize(out_grad.dims()); + std::vector out_grad_dim_vec = common::vectorize(out_grad.dims()); int r = xpu::transpose( dev_ctx.x_context(), reinterpret_cast(out_grad.data()), diff --git a/paddle/phi/kernels/xpu/transpose_kernel.cc b/paddle/phi/kernels/xpu/transpose_kernel.cc index dd985ddc7ebc58..f88e06b18e88db 100644 --- a/paddle/phi/kernels/xpu/transpose_kernel.cc +++ b/paddle/phi/kernels/xpu/transpose_kernel.cc @@ -43,7 +43,7 @@ void TransposeKernel(const Context& dev_ctx, return; } - std::vector x_dim_vec = phi::vectorize(x.dims()); + std::vector x_dim_vec = common::vectorize(x.dims()); int r = xpu::transpose(dev_ctx.x_context(), reinterpret_cast(x.data()), reinterpret_cast(out->data()), diff --git a/paddle/phi/kernels/xpu/tril_triu_grad_kernel.cc b/paddle/phi/kernels/xpu/tril_triu_grad_kernel.cc index aa3fbb8f9423de..4f672c6b609290 100644 --- a/paddle/phi/kernels/xpu/tril_triu_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/tril_triu_grad_kernel.cc @@ -27,7 +27,7 @@ void TrilTriuGradKernel(const Context& ctx, DenseTensor* x_grad) { using XPUType = typename XPUTypeTrait::Type; ctx.template Alloc(x_grad); - auto dy_shape = vectorize(out_grad.dims()); + auto dy_shape = common::vectorize(out_grad.dims()); int r = 0; if (lower) { r = xpu::tril(ctx.x_context(), diff --git a/paddle/phi/kernels/xpu/tril_triu_kernel.cc b/paddle/phi/kernels/xpu/tril_triu_kernel.cc index e72d5b73dfa5b9..26169136c9d3c8 100644 --- a/paddle/phi/kernels/xpu/tril_triu_kernel.cc +++ b/paddle/phi/kernels/xpu/tril_triu_kernel.cc @@ -27,7 +27,7 @@ void TrilTriuKernel(const Context& ctx, DenseTensor* out) { using XPUType = typename XPUTypeTrait::Type; ctx.template Alloc(out); - auto xshape = vectorize(x.dims()); + auto xshape = common::vectorize(x.dims()); int r = 0; if (lower) { r = xpu::tril(ctx.x_context(), diff --git a/paddle/phi/kernels/xpu/unbind_kernel.cc b/paddle/phi/kernels/xpu/unbind_kernel.cc index fb7ebc9c13452e..ba59be52884ded 100644 --- a/paddle/phi/kernels/xpu/unbind_kernel.cc +++ b/paddle/phi/kernels/xpu/unbind_kernel.cc @@ -32,7 +32,7 @@ void UnbindKernel(const Context& dev_ctx, dev_ctx.template Alloc(outs[j]); y_ptrs.emplace_back(outs[j]->data()); } - auto x_shape = vectorize(x.dims()); + auto x_shape = common::vectorize(x.dims()); int r = xpu::unbind(dev_ctx.x_context(), x.data(), y_ptrs, x_shape, axis); PADDLE_ENFORCE_XDNN_SUCCESS(r, "unbind"); } diff --git a/paddle/phi/kernels/xpu/unfold_grad_kernel.cc b/paddle/phi/kernels/xpu/unfold_grad_kernel.cc index 298d6655331da0..1bf6e989d7029b 100644 --- a/paddle/phi/kernels/xpu/unfold_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/unfold_grad_kernel.cc @@ -31,7 +31,7 @@ void UnfoldGradKernel(const Context& ctx, DenseTensor* x_grad) { using XPUType = typename XPUTypeTrait::Type; ctx.template Alloc(x_grad); - const std::string data_format = phi::DataLayoutToString(x.layout()); + const std::string data_format = common::DataLayoutToString(x.layout()); bool is_nchw = data_format == "NCHW"; PADDLE_ENFORCE_EQ(is_nchw, true, diff --git a/paddle/phi/kernels/xpu/unfold_kernel.cc b/paddle/phi/kernels/xpu/unfold_kernel.cc index 64a12b2881296e..2ed1860128140b 100644 --- a/paddle/phi/kernels/xpu/unfold_kernel.cc +++ b/paddle/phi/kernels/xpu/unfold_kernel.cc @@ -30,7 +30,7 @@ void UnfoldKernel(const Context& ctx, DenseTensor* out) { using XPUType = typename XPUTypeTrait::Type; ctx.template Alloc(out); - const std::string data_format = phi::DataLayoutToString(x.layout()); + const std::string data_format = common::DataLayoutToString(x.layout()); bool is_nchw = data_format == "NCHW"; PADDLE_ENFORCE_EQ(is_nchw, true, diff --git a/paddle/phi/kernels/xpu/uniform_kernel.cc b/paddle/phi/kernels/xpu/uniform_kernel.cc index ead65b65a8466f..a53924e3619410 100644 --- a/paddle/phi/kernels/xpu/uniform_kernel.cc +++ b/paddle/phi/kernels/xpu/uniform_kernel.cc @@ -28,7 +28,7 @@ void UniformKernel(const Context &dev_ctx, const Scalar &max, int seed, DenseTensor *out) { - out->Resize(phi::make_ddim(shape.GetData())); + out->Resize(common::make_ddim(shape.GetData())); T *data = dev_ctx.template Alloc(out); if (out->numel() == 0) { return; diff --git a/paddle/phi/kernels/xpu/unique_kernel.cc b/paddle/phi/kernels/xpu/unique_kernel.cc index 6f2d8f470a2120..944276a4b6f51a 100644 --- a/paddle/phi/kernels/xpu/unique_kernel.cc +++ b/paddle/phi/kernels/xpu/unique_kernel.cc @@ -63,23 +63,23 @@ void XPUFlattenUniqueKernelImpl(const Context& dev_ctx, unique_len_xpu, sizeof(int64_t)); } - out->Resize(phi::make_ddim({unique_len_cpu})); + out->Resize(common::make_ddim({unique_len_cpu})); auto* out_data = dev_ctx.template Alloc(out); IndexT* indices_data = nullptr; if (return_index) { - indices->Resize(phi::make_ddim({unique_len_cpu})); + indices->Resize(common::make_ddim({unique_len_cpu})); indices_data = dev_ctx.template Alloc(indices); } IndexT* inverse_data = nullptr; if (return_inverse) { - index->Resize(phi::make_ddim({x_len})); + index->Resize(common::make_ddim({x_len})); inverse_data = dev_ctx.template Alloc(index); } IndexT* counts_data = nullptr; if (return_counts) { - counts->Resize(phi::make_ddim({unique_len_cpu})); + counts->Resize(common::make_ddim({unique_len_cpu})); counts_data = dev_ctx.template Alloc(counts); } if (x_len == 0) { @@ -124,7 +124,7 @@ void XPUDimUniqueKernelImpl(const Context& dev_ctx, permute[axis] = 0; permute[0] = axis; if (axis != 0) { - auto x_shape = vectorize(x.dims()); + auto x_shape = common::vectorize(x.dims()); r = xpu::transpose(dev_ctx.x_context(), reinterpret_cast(x_data), x_trans_data, @@ -142,10 +142,10 @@ void XPUDimUniqueKernelImpl(const Context& dev_ctx, DDim x_trans_dims = x.dims(); x_trans_dims[0] = x.dims()[axis]; x_trans_dims[axis] = x.dims()[0]; - DDim x_trans_flat_dims = phi::flatten_to_2d(x_trans_dims, 1); + DDim x_trans_flat_dims = common::flatten_to_2d(x_trans_dims, 1); int64_t axis_len = x_trans_flat_dims[0]; int64_t slice_size = x_trans_flat_dims[1]; - auto x_trans_flat_dims_vec = vectorize(x_trans_flat_dims); + auto x_trans_flat_dims_vec = common::vectorize(x_trans_flat_dims); auto* sorted_axis_idx = RAII_GUARD.alloc_l3_or_gm(axis_len); auto* sort_in_tmp = RAII_GUARD.alloc_l3_or_gm(axis_len); @@ -284,7 +284,7 @@ void XPUDimUniqueKernelImpl(const Context& dev_ctx, PADDLE_ENFORCE_XDNN_SUCCESS(r, "gather"); DDim out_trans_dims = x_trans_dims; out_trans_dims[0] = unique_len; - auto out_trans_dims_vec = vectorize(out_trans_dims); + auto out_trans_dims_vec = common::vectorize(out_trans_dims); if (axis != 0) { r = xpu::transpose(dev_ctx.x_context(), out_trans_data, diff --git a/paddle/phi/kernels/xpu/unstack_kernel.cc b/paddle/phi/kernels/xpu/unstack_kernel.cc index 1c9c7a797957a4..a498ed99c2460f 100644 --- a/paddle/phi/kernels/xpu/unstack_kernel.cc +++ b/paddle/phi/kernels/xpu/unstack_kernel.cc @@ -29,7 +29,7 @@ void UnStackKernel(const Context &dev_ctx, auto x_dims = x.dims(); if (axis < 0) axis += x_dims.size(); - auto x_shape = phi::vectorize(x_dims); + auto x_shape = common::vectorize(x_dims); std::vector dx_dims_list(outs.size(), 1); std::vector dx_lists; diff --git a/paddle/phi/kernels/xpu/warpctc_kernel.cc b/paddle/phi/kernels/xpu/warpctc_kernel.cc index aac1ee9093a4e6..7a5bbfe5cb2998 100644 --- a/paddle/phi/kernels/xpu/warpctc_kernel.cc +++ b/paddle/phi/kernels/xpu/warpctc_kernel.cc @@ -110,7 +110,7 @@ void WarpctcKernel(const Context& dev_ctx, DataTypeToString(labels_length_dtype))); warpctcgrad->Resize( - phi::make_ddim({max_sequence_length, num_sequences, sequence_width})); + common::make_ddim({max_sequence_length, num_sequences, sequence_width})); dev_ctx.template Alloc(warpctcgrad); T* warpctcgrad_data = warpctcgrad->data(); @@ -136,7 +136,7 @@ void WarpctcKernel(const Context& dev_ctx, 256 * 1024, sm_workspace + lm_workspace)); - loss->Resize(phi::make_ddim({num_sequences, 1})); + loss->Resize(common::make_ddim({num_sequences, 1})); dev_ctx.template Alloc(loss); T* loss_data = loss->data(); diff --git a/paddle/phi/kernels/xpu/where_grad_kernel.cc b/paddle/phi/kernels/xpu/where_grad_kernel.cc index 03cdc117ed0d9d..49a5a1b22685d7 100644 --- a/paddle/phi/kernels/xpu/where_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/where_grad_kernel.cc @@ -31,8 +31,8 @@ void WhereGradKernel(const Context& ctx, const auto* cond_data = condition.data(); auto* dout = out_grad.data(); - auto cond_shape = phi::vectorize(condition.dims()); - auto out_shape = phi::vectorize(out_grad.dims()); + auto cond_shape = common::vectorize(condition.dims()); + auto out_shape = common::vectorize(out_grad.dims()); // use [1] to replace [], because xpu not support [] if (cond_shape.size() == 0) { cond_shape = std::vector({1}); diff --git a/paddle/phi/kernels/xpu/where_kernel.cc b/paddle/phi/kernels/xpu/where_kernel.cc index 4c5a7fbf5cc094..1edfc693cff867 100644 --- a/paddle/phi/kernels/xpu/where_kernel.cc +++ b/paddle/phi/kernels/xpu/where_kernel.cc @@ -31,8 +31,8 @@ void WhereKernel(const Context& ctx, const XPUType* y_data = reinterpret_cast(y.data()); XPUType* out_data = reinterpret_cast(ctx.template Alloc(out)); - auto cond_dims = phi::vectorize(condition.dims()); - auto x_dims = phi::vectorize(x.dims()); + auto cond_dims = common::vectorize(condition.dims()); + auto x_dims = common::vectorize(x.dims()); // use [1] to replace [], because xpu not support [] if (cond_dims.size() == 0) { diff --git a/paddle/phi/kernels/xpu/xpu_api_wrapper.h b/paddle/phi/kernels/xpu/xpu_api_wrapper.h index b75eaa15893234..70ee326500e1ca 100644 --- a/paddle/phi/kernels/xpu/xpu_api_wrapper.h +++ b/paddle/phi/kernels/xpu/xpu_api_wrapper.h @@ -122,9 +122,9 @@ static void GetFCInfo(const phi::DDim& x_dims, bool trans_y, XpuFcInfo* info) { DDim new_x_dims = - (x_dims.size() > 1) ? x_dims : phi::make_ddim({1, x_dims[0]}); + (x_dims.size() > 1) ? x_dims : common::make_ddim({1, x_dims[0]}); DDim new_y_dims = - (y_dims.size() > 1) ? y_dims : phi::make_ddim({y_dims[0], 1}); + (y_dims.size() > 1) ? y_dims : common::make_ddim({y_dims[0], 1}); auto mat_dim_a = phi::funcs::CreateMatrixDescriptor(new_x_dims, 0, trans_x); auto mat_dim_b = phi::funcs::CreateMatrixDescriptor(new_y_dims, 0, trans_y); diff --git a/paddle/phi/tools/CMakeLists.txt b/paddle/phi/tools/CMakeLists.txt index 74a9bd66a78d05..e6ae73384180eb 100644 --- a/paddle/phi/tools/CMakeLists.txt +++ b/paddle/phi/tools/CMakeLists.txt @@ -6,7 +6,7 @@ if(WITH_GPU) endif() add_executable(print_phi_kernels print_phi_kernels.cc) -target_link_libraries(print_phi_kernels phi) +target_link_libraries(print_phi_kernels phi common) if(WIN32) target_link_libraries(print_phi_kernels shlwapi.lib) endif() diff --git a/paddle/pir/core/block.cc b/paddle/pir/core/block.cc index 9a42e927557990..73902960c95ab7 100644 --- a/paddle/pir/core/block.cc +++ b/paddle/pir/core/block.cc @@ -16,7 +16,7 @@ #include -#include "paddle/pir/core/enforce.h" +#include "paddle/common/enforce.h" #include "paddle/pir/core/operation.h" #include "paddle/pir/core/region.h" diff --git a/paddle/pir/core/block_argument.cc b/paddle/pir/core/block_argument.cc index a0da7fbc16b2ac..66a18964280d39 100644 --- a/paddle/pir/core/block_argument.cc +++ b/paddle/pir/core/block_argument.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/pir/core/block_argument.h" -#include "paddle/pir/core/enforce.h" +#include "paddle/common/enforce.h" #include "paddle/pir/core/value_impl.h" #define CHECK_NULL_IMPL(func_name) \ diff --git a/paddle/pir/core/block_operand.cc b/paddle/pir/core/block_operand.cc index 78dd9c0b5d14e6..2b435f74a29a94 100644 --- a/paddle/pir/core/block_operand.cc +++ b/paddle/pir/core/block_operand.cc @@ -13,9 +13,9 @@ // limitations under the License. #include "paddle/pir/core/block_operand.h" +#include "paddle/common/enforce.h" #include "paddle/pir/core/block.h" #include "paddle/pir/core/block_operand_impl.h" -#include "paddle/pir/core/enforce.h" namespace pir { diff --git a/paddle/pir/core/builtin_attribute_storage.h b/paddle/pir/core/builtin_attribute_storage.h index 2ab13326d3ebc6..c35d17e2544e6f 100644 --- a/paddle/pir/core/builtin_attribute_storage.h +++ b/paddle/pir/core/builtin_attribute_storage.h @@ -18,9 +18,9 @@ #include #include +#include "paddle/common/enforce.h" #include "paddle/pir/core/attribute.h" #include "paddle/pir/core/attribute_base.h" -#include "paddle/pir/core/enforce.h" #include "paddle/pir/core/type.h" #include "paddle/pir/core/utils.h" diff --git a/paddle/pir/core/builtin_op.cc b/paddle/pir/core/builtin_op.cc index c228e2565b9ed5..9f80b7a93a4193 100644 --- a/paddle/pir/core/builtin_op.cc +++ b/paddle/pir/core/builtin_op.cc @@ -13,7 +13,7 @@ // limitations under the License. #include "paddle/pir/core/builtin_op.h" -#include "paddle/phi/core/enforce.h" +#include "paddle/common/enforce.h" #include "paddle/pir/core/builtin_attribute.h" #include "paddle/pir/core/builtin_type.h" diff --git a/paddle/pir/core/builtin_type.cc b/paddle/pir/core/builtin_type.cc index fb168a9a051cc4..a12255dc8adf97 100644 --- a/paddle/pir/core/builtin_type.cc +++ b/paddle/pir/core/builtin_type.cc @@ -23,9 +23,7 @@ const DenseTensorType::Dim& DenseTensorType::dims() const { return storage()->dims_; } -DenseTensorType::DataLayout DenseTensorType::data_layout() const { - return storage()->layout_; -} +DataLayout DenseTensorType::data_layout() const { return storage()->layout_; } const DenseTensorType::LoD& DenseTensorType::lod() const { return storage()->lod_; diff --git a/paddle/pir/core/builtin_type.h b/paddle/pir/core/builtin_type.h index d151f80d3e79c7..b1f21d6e9d418b 100644 --- a/paddle/pir/core/builtin_type.h +++ b/paddle/pir/core/builtin_type.h @@ -59,7 +59,6 @@ class DenseTensorType : public Type::TypeBase ShapedTypeInterface::GetDyShape() const { if (dy_shape_.size() == 0) { - auto ddim_vec = vectorize(impl_->get_shape(*this)); + auto ddim_vec = common::vectorize(impl_->get_shape(*this)); dy_shape_ = ddim_vec; std::replace(dy_shape_.begin(), dy_shape_.end(), diff --git a/paddle/pir/core/builtin_type_interfaces.h b/paddle/pir/core/builtin_type_interfaces.h index 6497a0146bd69c..34144e5a7785b8 100644 --- a/paddle/pir/core/builtin_type_interfaces.h +++ b/paddle/pir/core/builtin_type_interfaces.h @@ -17,16 +17,16 @@ #include #include -#include "paddle/phi/core/tensor_base.h" +#include "paddle/common/ddim.h" +#include "paddle/common/enforce.h" #include "paddle/pir/core/cast_utils.h" -#include "paddle/pir/core/enforce.h" #include "paddle/pir/core/type.h" namespace pir { class ShapedTypeInterface : public TypeInterfaceBase { public: - using DDim = phi::DDim; + using DDim = pir::DDim; using DataType = Type; struct Concept { /// Defined these methods with the interface. diff --git a/paddle/pir/core/builtin_type_storage.h b/paddle/pir/core/builtin_type_storage.h index 10063963df6332..d8361658f9e85b 100644 --- a/paddle/pir/core/builtin_type_storage.h +++ b/paddle/pir/core/builtin_type_storage.h @@ -14,8 +14,9 @@ #pragma once -#include "paddle/phi/common/layout.h" -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" +#include "paddle/common/dim.h" +#include "paddle/common/layout.h" #include "paddle/pir/core/type.h" #include "paddle/pir/core/type_base.h" #include "paddle/pir/core/utils.h" @@ -50,13 +51,13 @@ struct DenseTensorTypeStorage : public pir::TypeStorage { /// /// \brief Declare ParamKey according to parameter type. /// - using DataLayout = phi::DataLayout; - using Dim = phi::DDim; + using Dim = pir::DDim; + using DataLayout = pir::DataLayout; using LoD = std::vector>; - using ParamKey = std::tuple; + using ParamKey = std::tuple; DenseTensorTypeStorage(Type dtype, - const Dim& dims, + const pir::DDim& dims, DataLayout layout, const LoD& lod, size_t offset) @@ -88,7 +89,7 @@ struct DenseTensorTypeStorage : public pir::TypeStorage { pir::hash_combine(hash_value, std::hash()(std::get<0>(key))); // hash dims hash_value = - pir::hash_combine(hash_value, std::hash()(std::get<1>(key))); + pir::hash_combine(hash_value, std::hash()(std::get<1>(key))); // hash layout hash_value = pir::hash_combine( hash_value, @@ -120,7 +121,7 @@ struct DenseTensorTypeStorage : public pir::TypeStorage { /// layout, lod, offset. /// pir::Type dtype_; - Dim dims_; + pir::DDim dims_; DataLayout layout_; LoD lod_; size_t offset_; diff --git a/paddle/pir/core/dialect.h b/paddle/pir/core/dialect.h index 87332e184256aa..0cb6f9aae234e1 100644 --- a/paddle/pir/core/dialect.h +++ b/paddle/pir/core/dialect.h @@ -17,10 +17,10 @@ #include #include +#include "paddle/common/enforce.h" #include "paddle/pir/core/attribute.h" #include "paddle/pir/core/attribute_base.h" #include "paddle/pir/core/dialect_interface.h" -#include "paddle/pir/core/enforce.h" #include "paddle/pir/core/ir_context.h" #include "paddle/pir/core/op_base.h" #include "paddle/pir/core/type_base.h" diff --git a/paddle/pir/core/enforce.h b/paddle/pir/core/enforce.h deleted file mode 100644 index e8624b8bbe4e13..00000000000000 --- a/paddle/pir/core/enforce.h +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include - -#include "paddle/utils/string/printf.h" - -#if defined(_WIN32) -#define UNUSED -#define __builtin_expect(EXP, C) (EXP) -#else -#define UNUSED __attribute__((unused)) -#endif - -#if !defined(_WIN32) -#define UNLIKELY(condition) __builtin_expect(static_cast(condition), 0) -#else -// there is no equivalent intrinsics in msvc. -#define UNLIKELY(condition) (condition) -#endif -template -inline bool is_error(const T& stat) { - return !stat; -} - -namespace pir { -class IrNotMetException : public std::exception { - public: - explicit IrNotMetException(const std::string& str) : err_str_(str) {} - - const char* what() const noexcept override { return err_str_.c_str(); } - - private: - std::string err_str_; -}; - -#define IR_THROW(...) \ - do { \ - try { \ - throw pir::IrNotMetException( \ - paddle::string::Sprintf("Error occured at: %s:%d :\n%s", \ - __FILE__, \ - __LINE__, \ - paddle::string::Sprintf(__VA_ARGS__))); \ - } catch (const std::exception& e) { \ - std::cout << e.what() << std::endl; \ - throw; \ - } \ - } while (0) - -#define IR_ENFORCE(COND, ...) \ - do { \ - bool __cond__(COND); \ - if (UNLIKELY(is_error(__cond__))) { \ - try { \ - throw pir::IrNotMetException( \ - paddle::string::Sprintf("Error occured at: %s:%d :\n%s", \ - __FILE__, \ - __LINE__, \ - paddle::string::Sprintf(__VA_ARGS__))); \ - } catch (const std::exception& e) { \ - std::cout << e.what() << std::endl; \ - throw; \ - } \ - } \ - } while (0) - -} // namespace pir diff --git a/paddle/pir/core/interface_support.h b/paddle/pir/core/interface_support.h index 083be35f7f1f92..f8fc83efa31720 100644 --- a/paddle/pir/core/interface_support.h +++ b/paddle/pir/core/interface_support.h @@ -14,7 +14,7 @@ #pragma once -#include "paddle/pir/core/enforce.h" +#include "paddle/common/enforce.h" #include "paddle/pir/core/interface_value.h" namespace pir { diff --git a/paddle/pir/core/iterator.h b/paddle/pir/core/iterator.h index 54563d2fce80c1..4e87fa290f8cff 100644 --- a/paddle/pir/core/iterator.h +++ b/paddle/pir/core/iterator.h @@ -15,7 +15,7 @@ #pragma once #include #include -#include "paddle/pir/core/macros.h" +#include "paddle/common/macros.h" namespace pir { class Operation; diff --git a/paddle/pir/core/macros.h b/paddle/pir/core/macros.h deleted file mode 100644 index 25d6dd5a812abc..00000000000000 --- a/paddle/pir/core/macros.h +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#pragma once - -namespace pir { -// TODO(Aurelius84): We also has DISABLE_COPY_AND_ASSIGN in phi/core/maros.h, -// howere it's not recommended to use it in ir namspace. So we define this again -// here. - -// Disable the copy and assignment operator for a class. -#ifndef DISABLE_COPY_AND_ASSIGN -#define DISABLE_COPY_AND_ASSIGN(classname) \ - private: \ - classname(const classname&) = delete; \ - classname(classname&&) = delete; \ - classname& operator=(const classname&) = delete; \ - classname& operator=(classname&&) = delete -#endif - -} // namespace pir diff --git a/paddle/pir/core/op_base.h b/paddle/pir/core/op_base.h index 9a0edfd6714988..c7f82954844d79 100644 --- a/paddle/pir/core/op_base.h +++ b/paddle/pir/core/op_base.h @@ -15,7 +15,7 @@ #pragma once #include -#include "paddle/pir/core/enforce.h" +#include "paddle/common/enforce.h" #include "paddle/pir/core/interface_support.h" #include "paddle/pir/core/op_result.h" #include "paddle/pir/core/operation.h" diff --git a/paddle/pir/core/op_operand.cc b/paddle/pir/core/op_operand.cc index c728180f48fbfb..74e5dced1fc630 100644 --- a/paddle/pir/core/op_operand.cc +++ b/paddle/pir/core/op_operand.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/pir/core/op_operand.h" -#include "paddle/pir/core/enforce.h" +#include "paddle/common/enforce.h" #include "paddle/pir/core/op_operand_impl.h" #define CHECK_NULL_IMPL(class_name, func_name) \ diff --git a/paddle/pir/core/op_result.cc b/paddle/pir/core/op_result.cc index 8249872593652f..30c6ec97d8fbae 100644 --- a/paddle/pir/core/op_result.cc +++ b/paddle/pir/core/op_result.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/pir/core/op_result.h" -#include "paddle/pir/core/enforce.h" +#include "paddle/common/enforce.h" #include "paddle/pir/core/op_result_impl.h" #define CHECK_OPRESULT_NULL_IMPL(func_name) \ diff --git a/paddle/pir/core/op_trait.cc b/paddle/pir/core/op_trait.cc index d7103fa31ce455..506af3177e671f 100644 --- a/paddle/pir/core/op_trait.cc +++ b/paddle/pir/core/op_trait.cc @@ -13,7 +13,7 @@ // limitations under the License. #include "paddle/pir/core/op_trait.h" -#include "paddle/pir/core/enforce.h" +#include "paddle/common/enforce.h" #include "paddle/pir/core/type_util.h" namespace { diff --git a/paddle/pir/core/operation.cc b/paddle/pir/core/operation.cc index 0697195fc2f94a..4ce0cda102be7e 100644 --- a/paddle/pir/core/operation.cc +++ b/paddle/pir/core/operation.cc @@ -14,10 +14,10 @@ #include +#include "paddle/common/enforce.h" #include "paddle/pir/core/block.h" #include "paddle/pir/core/block_operand_impl.h" #include "paddle/pir/core/dialect.h" -#include "paddle/pir/core/enforce.h" #include "paddle/pir/core/op_info.h" #include "paddle/pir/core/op_result_impl.h" #include "paddle/pir/core/operation.h" diff --git a/paddle/pir/core/operation.h b/paddle/pir/core/operation.h index 11943609e41634..0c3f213adab506 100644 --- a/paddle/pir/core/operation.h +++ b/paddle/pir/core/operation.h @@ -16,10 +16,10 @@ #include #include +#include "paddle/common/enforce.h" +#include "paddle/common/macros.h" #include "paddle/pir/core/block.h" -#include "paddle/pir/core/enforce.h" #include "paddle/pir/core/iterator.h" -#include "paddle/pir/core/macros.h" #include "paddle/pir/core/op_info.h" #include "paddle/pir/core/operation_utils.h" #include "paddle/pir/core/type.h" diff --git a/paddle/pir/core/region.cc b/paddle/pir/core/region.cc index dfb3b45aef3e9b..911cb740fa6452 100644 --- a/paddle/pir/core/region.cc +++ b/paddle/pir/core/region.cc @@ -13,8 +13,8 @@ // limitations under the License. #include "paddle/pir/core/region.h" +#include "paddle/common/enforce.h" #include "paddle/pir/core/block.h" -#include "paddle/pir/core/enforce.h" #include "paddle/pir/core/operation.h" namespace pir { diff --git a/paddle/pir/core/storage_manager.cc b/paddle/pir/core/storage_manager.cc index 07cc4e07cce2c1..bcfdf34a231e83 100644 --- a/paddle/pir/core/storage_manager.cc +++ b/paddle/pir/core/storage_manager.cc @@ -17,7 +17,7 @@ #include #include -#include "paddle/pir/core/enforce.h" +#include "paddle/common/enforce.h" namespace pir { // This is a structure for creating, caching, and looking up Storage of diff --git a/paddle/pir/core/type_util.h b/paddle/pir/core/type_util.h index 5704ba2abea781..14f1c7022c88ce 100644 --- a/paddle/pir/core/type_util.h +++ b/paddle/pir/core/type_util.h @@ -31,8 +31,8 @@ Type GetElementTypeOrSelf(Type type); /// have the same size and each pair of the elements are equal or one of them is /// dynamic. /// -bool VerifyCompatibleShape(const phi::DDim& lhs_shape, - const phi::DDim& rhs_shape); +bool VerifyCompatibleShape(const pir::DDim& lhs_shape, + const pir::DDim& rhs_shape); /// /// \brief Returns true if the given two types have compatible shape. That diff --git a/paddle/pir/core/value.cc b/paddle/pir/core/value.cc index dec2aaecfb3441..8bdda56a5d75ee 100644 --- a/paddle/pir/core/value.cc +++ b/paddle/pir/core/value.cc @@ -16,7 +16,7 @@ #include -#include "paddle/pir/core/enforce.h" +#include "paddle/common/enforce.h" #include "paddle/pir/core/op_operand.h" #include "paddle/pir/core/op_result.h" #include "paddle/pir/core/operation.h" diff --git a/paddle/pir/dialect/shape/ir/shape_op.cc b/paddle/pir/dialect/shape/ir/shape_op.cc index bf4a85d0d648f0..4a494f3ca187ac 100644 --- a/paddle/pir/dialect/shape/ir/shape_op.cc +++ b/paddle/pir/dialect/shape/ir/shape_op.cc @@ -13,10 +13,10 @@ // limitations under the License. #include "paddle/pir/dialect/shape/ir/shape_op.h" +#include "paddle/common/enforce.h" #include "paddle/pir/core/builtin_attribute.h" #include "paddle/pir/core/builtin_op.h" #include "paddle/pir/core/builtin_type.h" -#include "paddle/pir/core/enforce.h" namespace pir::shape { diff --git a/paddle/pir/pass/pass.h b/paddle/pir/pass/pass.h index cc5e4a1dcbd834..30c55d7d3c6c59 100644 --- a/paddle/pir/pass/pass.h +++ b/paddle/pir/pass/pass.h @@ -18,8 +18,8 @@ #include #include +#include "paddle/common/enforce.h" #include "paddle/pir/core/builtin_op.h" -#include "paddle/pir/core/enforce.h" #include "paddle/pir/pass/analysis_manager.h" #include "paddle/pir/pattern_rewrite/pattern_rewrite_driver.h" diff --git a/paddle/pir/pass/pass_registry.h b/paddle/pir/pass/pass_registry.h index 88dbfa443ddc37..08d76133edaa55 100644 --- a/paddle/pir/pass/pass_registry.h +++ b/paddle/pir/pass/pass_registry.h @@ -18,8 +18,7 @@ #include #include -#include "paddle/pir/core/enforce.h" -#include "paddle/pir/core/macros.h" +#include "paddle/common/enforce.h" #include "paddle/pir/pass/pass.h" namespace pir { diff --git a/paddle/pir/pattern_rewrite/pattern_match.cc b/paddle/pir/pattern_rewrite/pattern_match.cc index 7155894a68ef47..2cc8e80e3d6dc2 100644 --- a/paddle/pir/pattern_rewrite/pattern_match.cc +++ b/paddle/pir/pattern_rewrite/pattern_match.cc @@ -17,7 +17,7 @@ #include #include -#include "paddle/pir/core/enforce.h" +#include "paddle/common/enforce.h" #include "paddle/pir/core/operation.h" namespace pir { diff --git a/paddle/pir/pattern_rewrite/pattern_match.h b/paddle/pir/pattern_rewrite/pattern_match.h index d247ff075615a6..a0c34d8f58f073 100644 --- a/paddle/pir/pattern_rewrite/pattern_match.h +++ b/paddle/pir/pattern_rewrite/pattern_match.h @@ -24,9 +24,9 @@ #include #include +#include "paddle/common/enforce.h" #include "paddle/pir/core/builder.h" #include "paddle/pir/core/dll_decl.h" -#include "paddle/pir/core/enforce.h" #include "paddle/pir/core/ir_context.h" #include "paddle/pir/core/op_info.h" #include "paddle/pir/core/operation.h" diff --git a/paddle/testing/CMakeLists.txt b/paddle/testing/CMakeLists.txt index 4c5f3049f23254..ecba5716b7b494 100644 --- a/paddle/testing/CMakeLists.txt +++ b/paddle/testing/CMakeLists.txt @@ -5,6 +5,7 @@ if(WITH_TESTING) device_context gtest phi + common init memory phi_utils diff --git a/paddle/utils/CMakeLists.txt b/paddle/utils/CMakeLists.txt index 0bc36dd5578ea7..529ffe8ebb44c0 100644 --- a/paddle/utils/CMakeLists.txt +++ b/paddle/utils/CMakeLists.txt @@ -14,11 +14,11 @@ add_subdirectory(string) cc_test( array_ref_test SRCS array_ref_test.cc - DEPS gtest phi) + DEPS gtest phi common) cc_test( small_vector_test SRCS small_vector_test.cc - DEPS gtest phi) + DEPS gtest phi common) cc_test( variant_test SRCS variant_test.cc @@ -32,5 +32,5 @@ if(NOT ((NOT WITH_PYTHON) AND ON_INFER)) cc_library( pybind_util SRCS pybind.cc - DEPS phi) + DEPS phi common) endif() diff --git a/paddle/utils/string/CMakeLists.txt b/paddle/utils/string/CMakeLists.txt index ddfc8f96b2ecdd..7bdc43629b10a1 100644 --- a/paddle/utils/string/CMakeLists.txt +++ b/paddle/utils/string/CMakeLists.txt @@ -1,15 +1,15 @@ cc_library( pretty_log SRCS pretty_log.cc - DEPS phi) + DEPS phi common) cc_library( string_helper SRCS string_helper.cc - DEPS phi) + DEPS phi common) cc_test( stringprintf_test SRCS printf_test.cc - DEPS phi) + DEPS phi common) cc_test(to_string_test SRCS to_string_test.cc) cc_test(split_test SRCS split_test.cc) cc_test( diff --git a/patches/eigen/TensorReductionGpu.h b/patches/eigen/TensorReductionGpu.h index 696078e54881af..4807aaa2c1be75 100644 --- a/patches/eigen/TensorReductionGpu.h +++ b/patches/eigen/TensorReductionGpu.h @@ -14,7 +14,7 @@ namespace Eigen { namespace internal { #if defined(EIGEN_USE_GPU) && defined(EIGEN_GPUCC) -// Full reducers for GPU, don't vectorize for now +// Full reducers for GPU, don't common::vectorize for now // Reducer function that enables multiple gpu thread to safely accumulate at the same // output address. It basically reads the current value of the output variable, and diff --git a/python/setup.py.in b/python/setup.py.in index 25e1c2ca8df7cc..620893ab0d17b0 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -737,7 +737,7 @@ if '${CMAKE_BUILD_TYPE}' == 'Release': if "@APPLE@" == "1": commands = ["install_name_tool -id '@loader_path/../libs/' ${PADDLE_BINARY_DIR}/python/paddle/base/${FLUID_CORE_NAME}" + '.so'] commands.append("install_name_tool -add_rpath '@loader_path/../libs/' ${PADDLE_BINARY_DIR}/python/paddle/base/${FLUID_CORE_NAME}" + '.so') - commands.append("install_name_tool -add_rpath '@loader_path' ${PADDLE_BINARY_DIR}/python/paddle/libs/${COMMON_NAME}") + commands.append("install_name_tool -add_rpath '@loader_path/../libs/' ${PADDLE_BINARY_DIR}/python/paddle/libs/${COMMON_NAME}") if('${WITH_SHARED_PHI}' == 'ON'): # change rpath of phi.ext for loading 3rd party libb commands.append("install_name_tool -add_rpath '@loader_path' ${PADDLE_BINARY_DIR}/python/paddle/libs/${PHI_NAME}") @@ -780,6 +780,7 @@ def find_files(pattern, root, recursive=False): headers = ( # paddle level api headers (high level api, for both training and inference) list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle')) + + list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle/common')) + # paddle common headers list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle/phi/api')) + # phi unify api header list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle/phi/api/ext')) + # custom op api list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle/phi/api/include')) + # phi api diff --git a/python/setup_cinn.py.in b/python/setup_cinn.py.in index 753a1d30cd7ad3..f7fb513ed218ea 100644 --- a/python/setup_cinn.py.in +++ b/python/setup_cinn.py.in @@ -141,6 +141,8 @@ if '${WITH_MKLDNN}' == 'ON': if '${CINN_ONLY}' == 'OFF': cinnlibs.append('${PHI_LIB}') + cinnlibs.append('${IR_LIB}') + cinnlibs.append('${COMMON_LIB}') if '${WITH_GPU}' == 'ON': cinnlibs.append('${CMAKE_BINARY_DIR}/dist/cinn/include/paddle/cinn/runtime/cuda/cinn_cuda_runtime_source.cuh') diff --git a/setup.py b/setup.py index e0e52c27d5b639..791e9ecc0cf22e 100644 --- a/setup.py +++ b/setup.py @@ -38,7 +38,7 @@ # check python python_version = platform.python_version() version_detail = sys.version_info -version = str(version_detail[0]) + '.' + str(version_detail[1]) +version = str(version_detail[0]) + '.' + str(version_detail[1]) env_version = str(os.getenv("PY_VERSION")) if version_detail < (3, 7): @@ -57,14 +57,12 @@ f"we will attempt to use the python version you set to execute." ) cmd = 'which python' + env_version - res = subprocess.run(cmd, shell = True, stdout=subprocess.PIPE) + res = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE) if res.returncode == 0: os.environ["PYTHON_EXECUTABLE"] = res else: - raise RuntimeError( - "We can't find the version you set in your machine" - ) - + raise RuntimeError("We can't find the version you set in your machine") + # check cmake CMAKE = shutil.which('cmake3') or shutil.which('cmake') @@ -1181,7 +1179,7 @@ def get_package_data_and_package_dir(): + '.so' ) commands.append( - "install_name_tool -add_rpath '@loader_path' " + "install_name_tool -add_rpath '@loader_path/../libs/' " + env_dict.get("PADDLE_BINARY_DIR") + '/python/paddle/libs/' + env_dict.get("COMMON_NAME") @@ -1266,6 +1264,9 @@ def get_headers(): + list( # phi api find_files('*.h', paddle_source_dir + '/paddle/phi/common') ) + + list( # common api + find_files('*.h', paddle_source_dir + '/paddle/common') + ) # phi level api headers (low level api, for training only) + list( # phi extension header find_files('*.h', paddle_source_dir + '/paddle/phi') diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 00186357bfac5e..dd4ff65e332019 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -189,6 +189,7 @@ if(${len} GREATER_EQUAL 1) if(WITH_SHARED_IR) target_link_libraries(${test_name} $) endif() + target_link_libraries(${test_name} $) add_dependencies(${test_name} ${paddle_lib} paddle_gtest_main_new) if(WITH_GPU) target_link_libraries(${test_name} ${CUDA_CUDART_LIBRARY} @@ -200,7 +201,7 @@ if(${len} GREATER_EQUAL 1) if(APPLE) target_link_libraries( ${test_name} - "-Wl,-rpath,$ -Wl,-rpath,$ -Wl,-rpath,$" + "-Wl,-rpath,$ -Wl,-rpath,$ -Wl,-rpath,$ -Wl,-rpath,$" ) endif() if(NOT ((NOT WITH_PYTHON) AND ON_INFER)) @@ -239,9 +240,6 @@ endif() if(TARGET layer_test) add_dependencies(layer_test jit_download_program) - add_dependencies(layer_test_new jit_download_program) - set_tests_properties(layer_test_new PROPERTIES ENVIRONMENT - "FLAGS_jit_engine_type=New") endif() if(TEST buddy_allocator_test) diff --git a/test/cpp/auto_parallel/CMakeLists.txt b/test/cpp/auto_parallel/CMakeLists.txt index e041c746624443..f38cf32f350592 100644 --- a/test/cpp/auto_parallel/CMakeLists.txt +++ b/test/cpp/auto_parallel/CMakeLists.txt @@ -13,7 +13,8 @@ if(WITH_DISTRIBUTE) cc_test( dist_tensor_test SRCS dist_tensor_test.cc - DEPS phi) + DEPS phi common) + paddle_test( spmd_rule_test SRCS @@ -21,7 +22,8 @@ if(WITH_DISTRIBUTE) DEPS spmd_rule_test_util spmd_rules - phi) + phi + common) paddle_test( softmax_grad_spmd_rule_test SRCS @@ -32,4 +34,4 @@ if(WITH_DISTRIBUTE) phi) endif() -cc_test_old(dist_mapper_test SRCS dist_mapper_test.cc DEPS phi) +cc_test_old(dist_mapper_test SRCS dist_mapper_test.cc DEPS phi common) diff --git a/test/cpp/auto_parallel/dist_tensor_test.cc b/test/cpp/auto_parallel/dist_tensor_test.cc index a94cfd37d6cc24..c1d6851b86ca2d 100644 --- a/test/cpp/auto_parallel/dist_tensor_test.cc +++ b/test/cpp/auto_parallel/dist_tensor_test.cc @@ -34,7 +34,7 @@ TEST(dist_tensor, constructor) { DDim dims({3, 4}); DenseTensorMeta meta(dtype, dims); - auto dist_attr = TensorDistAttr(phi::vectorize(dims)); + auto dist_attr = TensorDistAttr(common::vectorize(dims)); std::vector mesh_shape = {1}; std::vector process_ids = {0}; diff --git a/test/cpp/auto_parallel/spmd_rule_test.cc b/test/cpp/auto_parallel/spmd_rule_test.cc index 014672f91add1e..b4254e2d0912e6 100644 --- a/test/cpp/auto_parallel/spmd_rule_test.cc +++ b/test/cpp/auto_parallel/spmd_rule_test.cc @@ -41,8 +41,8 @@ TEST(MatmulSPMDRule, Ctor) { size_t input_size = 2; size_t output_size = 1; - phi::distributed::DistMetaTensor x(phi::make_ddim(x_shape), x_dist_attr); - phi::distributed::DistMetaTensor y(phi::make_ddim(y_shape), y_dist_attr); + phi::distributed::DistMetaTensor x(common::make_ddim(x_shape), x_dist_attr); + phi::distributed::DistMetaTensor y(common::make_ddim(y_shape), y_dist_attr); auto matmul_spmd_rule = phi::distributed::SpmdRuleFactory::Instance().GetSpmdRule("matmul"); @@ -63,8 +63,8 @@ TEST(MatmulSPMDRule, Ctor) { // mk[-1,-1],kn[-1,0] --> mk[-1,-1],kn[-1,0] = nm[-1,0] partial[] x_dist_attr.set_dims_mapping({-1, -1}); y_dist_attr.set_dims_mapping({-1, 0}); - x = phi::distributed::DistMetaTensor(phi::make_ddim(x_shape), x_dist_attr); - y = phi::distributed::DistMetaTensor(phi::make_ddim(y_shape), y_dist_attr); + x = phi::distributed::DistMetaTensor(common::make_ddim(x_shape), x_dist_attr); + y = phi::distributed::DistMetaTensor(common::make_ddim(y_shape), y_dist_attr); ctx = phi::distributed::InferSpmdContext( {x, y}, {/*trans_x=*/false, /*trans_x=*/false}); infered_dist_attrs = matmul_spmd_rule.InferForward(ctx); @@ -76,8 +76,8 @@ TEST(MatmulSPMDRule, Ctor) { // mk[1, 0],kn[-1,-1] --> mk[1, 0],kn[0, -1] = nm[1, -1] partial[0]: done x_dist_attr.set_dims_mapping({1, 0}); y_dist_attr.set_dims_mapping({-1, -1}); - x = phi::distributed::DistMetaTensor(phi::make_ddim(x_shape), x_dist_attr); - y = phi::distributed::DistMetaTensor(phi::make_ddim(y_shape), y_dist_attr); + x = phi::distributed::DistMetaTensor(common::make_ddim(x_shape), x_dist_attr); + y = phi::distributed::DistMetaTensor(common::make_ddim(y_shape), y_dist_attr); ctx = phi::distributed::InferSpmdContext( {x, y}, {/*trans_x=*/false, /*trans_x=*/false}); infered_dist_attrs = matmul_spmd_rule.InferForward(ctx); @@ -91,8 +91,8 @@ TEST(MatmulSPMDRule, Ctor) { // mk[-1,-1],kn[1,0] --> mk[-1, 1],kn[1, 0] = nm[-1, 0] partial[1]: done x_dist_attr.set_dims_mapping({-1, -1}); y_dist_attr.set_dims_mapping({1, 0}); - x = phi::distributed::DistMetaTensor(phi::make_ddim(x_shape), x_dist_attr); - y = phi::distributed::DistMetaTensor(phi::make_ddim(y_shape), y_dist_attr); + x = phi::distributed::DistMetaTensor(common::make_ddim(x_shape), x_dist_attr); + y = phi::distributed::DistMetaTensor(common::make_ddim(y_shape), y_dist_attr); ctx = phi::distributed::InferSpmdContext( {x, y}, {/*trans_x=*/false, /*trans_x=*/false}); infered_dist_attrs = matmul_spmd_rule.InferForward(ctx); @@ -108,8 +108,8 @@ TEST(MatmulSPMDRule, Ctor) { x_shape = {512, 48, 64, 32}; x_dist_attr.set_dims_mapping({0, 1, -1, -1}); y_dist_attr.set_dims_mapping({-1, -1}); - x = phi::distributed::DistMetaTensor(phi::make_ddim(x_shape), x_dist_attr); - y = phi::distributed::DistMetaTensor(phi::make_ddim(y_shape), y_dist_attr); + x = phi::distributed::DistMetaTensor(common::make_ddim(x_shape), x_dist_attr); + y = phi::distributed::DistMetaTensor(common::make_ddim(y_shape), y_dist_attr); ctx = phi::distributed::InferSpmdContext( {x, y}, {/*trans_x=*/false, /*trans_x=*/false}); infered_dist_attrs = matmul_spmd_rule.InferForward(ctx); @@ -123,8 +123,8 @@ TEST(MatmulSPMDRule, Ctor) { // -1, -1, -1] partial[0]: done x_dist_attr.set_dims_mapping({1, -1, -1, 0}); y_dist_attr.set_dims_mapping({-1, -1}); - x = phi::distributed::DistMetaTensor(phi::make_ddim(x_shape), x_dist_attr); - y = phi::distributed::DistMetaTensor(phi::make_ddim(y_shape), y_dist_attr); + x = phi::distributed::DistMetaTensor(common::make_ddim(x_shape), x_dist_attr); + y = phi::distributed::DistMetaTensor(common::make_ddim(y_shape), y_dist_attr); ctx = phi::distributed::InferSpmdContext( {x, y}, {/*trans_x=*/false, /*trans_x=*/false}); infered_dist_attrs = matmul_spmd_rule.InferForward(ctx); @@ -139,8 +139,8 @@ TEST(MatmulSPMDRule, Ctor) { // abcmn[1, -1, 0, -1] partial[]: done x_dist_attr.set_dims_mapping({1, -1, -1, 0}); y_dist_attr.set_dims_mapping({-1, -1}); - x = phi::distributed::DistMetaTensor(phi::make_ddim(x_shape), x_dist_attr); - y = phi::distributed::DistMetaTensor(phi::make_ddim(y_shape), y_dist_attr); + x = phi::distributed::DistMetaTensor(common::make_ddim(x_shape), x_dist_attr); + y = phi::distributed::DistMetaTensor(common::make_ddim(y_shape), y_dist_attr); ctx = phi::distributed::InferSpmdContext( {x, y}, {/*trans_x=*/true, /*trans_x=*/false}); infered_dist_attrs = matmul_spmd_rule.InferForward(ctx); @@ -156,8 +156,8 @@ TEST(MatmulSPMDRule, Ctor) { // abcmn[-1, -1, -1, 1] partial[0]: done x_dist_attr.set_dims_mapping({-1, -1, -1, -1}); y_dist_attr.set_dims_mapping({1, 0}); - x = phi::distributed::DistMetaTensor(phi::make_ddim(x_shape), x_dist_attr); - y = phi::distributed::DistMetaTensor(phi::make_ddim(y_shape), y_dist_attr); + x = phi::distributed::DistMetaTensor(common::make_ddim(x_shape), x_dist_attr); + y = phi::distributed::DistMetaTensor(common::make_ddim(y_shape), y_dist_attr); ctx = phi::distributed::InferSpmdContext( {x, y}, {/*trans_x=*/false, /*trans_x=*/true}); infered_dist_attrs = matmul_spmd_rule.InferForward(ctx); @@ -174,8 +174,8 @@ TEST(MatmulSPMDRule, Ctor) { // 0, -1],kn[-1, 0] = abcmn[-1, -1, 1, -1] partial[0]: done x_dist_attr.set_dims_mapping({-1, -1, 0, 1}); y_dist_attr.set_dims_mapping({1, 0}); - x = phi::distributed::DistMetaTensor(phi::make_ddim(x_shape), x_dist_attr); - y = phi::distributed::DistMetaTensor(phi::make_ddim(y_shape), y_dist_attr); + x = phi::distributed::DistMetaTensor(common::make_ddim(x_shape), x_dist_attr); + y = phi::distributed::DistMetaTensor(common::make_ddim(y_shape), y_dist_attr); ctx = phi::distributed::InferSpmdContext( {x, y}, {/*trans_x=*/true, /*trans_x=*/true}); infered_dist_attrs = matmul_spmd_rule.InferForward(ctx); @@ -195,8 +195,8 @@ TEST(MatmulSPMDRule, Ctor) { // abcmn[-1, -1, -1, 1] partial[0]: done x_dist_attr.set_dims_mapping({-1, -1, 1, 0}); y_dist_attr.set_dims_mapping({1, 0}); - x = phi::distributed::DistMetaTensor(phi::make_ddim(x_shape), x_dist_attr); - y = phi::distributed::DistMetaTensor(phi::make_ddim(y_shape), y_dist_attr); + x = phi::distributed::DistMetaTensor(common::make_ddim(x_shape), x_dist_attr); + y = phi::distributed::DistMetaTensor(common::make_ddim(y_shape), y_dist_attr); ctx = phi::distributed::InferSpmdContext( {x, y}, {/*trans_x=*/true, /*trans_x=*/true}); EXPECT_ANY_THROW(infered_dist_attrs = matmul_spmd_rule.InferForward(ctx)); @@ -207,8 +207,8 @@ TEST(MatmulSPMDRule, Ctor) { // abcmn[-1, -1, 1, -1] partial[0]: x_dist_attr.set_dims_mapping({-1, -1, 0, 1}); y_dist_attr.set_dims_mapping({1, 0}); - x = phi::distributed::DistMetaTensor(phi::make_ddim(x_shape), x_dist_attr); - y = phi::distributed::DistMetaTensor(phi::make_ddim(y_shape), y_dist_attr); + x = phi::distributed::DistMetaTensor(common::make_ddim(x_shape), x_dist_attr); + y = phi::distributed::DistMetaTensor(common::make_ddim(y_shape), y_dist_attr); ctx = phi::distributed::InferSpmdContext( {x, y}, {/*trans_x=*/true, /*trans_x=*/true}); infered_dist_attrs = matmul_spmd_rule.InferForward(ctx); @@ -269,10 +269,10 @@ TEST(LayerNormSPMDRule, Ctor) { x_dist_attr.set_dims_mapping({1, -1, -1}); scale_dist_attr.set_dims_mapping({-1}); bias_dist_attr.set_dims_mapping({-1}); - phi::distributed::DistMetaTensor x(phi::make_ddim(x_shape), x_dist_attr); - phi::distributed::DistMetaTensor scale(phi::make_ddim(scale_shape), + phi::distributed::DistMetaTensor x(common::make_ddim(x_shape), x_dist_attr); + phi::distributed::DistMetaTensor scale(common::make_ddim(scale_shape), scale_dist_attr); - phi::distributed::DistMetaTensor bias(phi::make_ddim(bias_shape), + phi::distributed::DistMetaTensor bias(common::make_ddim(bias_shape), bias_dist_attr); phi::distributed::InferSpmdContext ctx({x, scale, bias}, {epsilon, begin_norm_axis}); @@ -296,10 +296,10 @@ TEST(LayerNormSPMDRule, Ctor) { x_dist_attr.set_dims_mapping({1, 0, -1}); scale_dist_attr.set_dims_mapping({0}); bias_dist_attr.set_dims_mapping({0}); - x = phi::distributed::DistMetaTensor(phi::make_ddim(x_shape), x_dist_attr); - scale = phi::distributed::DistMetaTensor(phi::make_ddim(scale_shape), + x = phi::distributed::DistMetaTensor(common::make_ddim(x_shape), x_dist_attr); + scale = phi::distributed::DistMetaTensor(common::make_ddim(scale_shape), scale_dist_attr); - bias = phi::distributed::DistMetaTensor(phi::make_ddim(bias_shape), + bias = phi::distributed::DistMetaTensor(common::make_ddim(bias_shape), bias_dist_attr); ctx = phi::distributed::InferSpmdContext({x, scale, bias}, {epsilon, begin_norm_axis}); @@ -319,10 +319,10 @@ TEST(LayerNormSPMDRule, Ctor) { x_dist_attr.set_dims_mapping({0, -1, -1}); scale_dist_attr.set_dims_mapping({-1}); bias_dist_attr.set_dims_mapping({1}); - x = phi::distributed::DistMetaTensor(phi::make_ddim(x_shape), x_dist_attr); - scale = phi::distributed::DistMetaTensor(phi::make_ddim(scale_shape), + x = phi::distributed::DistMetaTensor(common::make_ddim(x_shape), x_dist_attr); + scale = phi::distributed::DistMetaTensor(common::make_ddim(scale_shape), scale_dist_attr); - bias = phi::distributed::DistMetaTensor(phi::make_ddim(bias_shape), + bias = phi::distributed::DistMetaTensor(common::make_ddim(bias_shape), bias_dist_attr); ctx = phi::distributed::InferSpmdContext({x, scale, bias}, {epsilon, begin_norm_axis}); @@ -366,9 +366,9 @@ TEST(MatmulSPMDRuleInferBackward, Ctor) { out_dist_attr.set_dynamic_dims(std::vector({false, false})); out_dist_attr.set_partial_status(std::vector({0})); - phi::distributed::DistMetaTensor x(phi::make_ddim(x_shape), x_dist_attr); - phi::distributed::DistMetaTensor y(phi::make_ddim(y_shape), y_dist_attr); - phi::distributed::DistMetaTensor out(phi::make_ddim(out_shape), + phi::distributed::DistMetaTensor x(common::make_ddim(x_shape), x_dist_attr); + phi::distributed::DistMetaTensor y(common::make_ddim(y_shape), y_dist_attr); + phi::distributed::DistMetaTensor out(common::make_ddim(out_shape), out_dist_attr); auto matmul_spmd_rule = @@ -427,11 +427,11 @@ TEST(ReplicatedSPMDRule, Ctor) { out2_dist_attr.set_dims_mapping(std::vector({-1, 1, -1})); out2_dist_attr.set_dynamic_dims(std::vector({false, false})); - phi::distributed::DistMetaTensor x(phi::make_ddim(x_shape), x_dist_attr); - phi::distributed::DistMetaTensor y(phi::make_ddim(y_shape), y_dist_attr); - phi::distributed::DistMetaTensor out1(phi::make_ddim(out1_shape), + phi::distributed::DistMetaTensor x(common::make_ddim(x_shape), x_dist_attr); + phi::distributed::DistMetaTensor y(common::make_ddim(y_shape), y_dist_attr); + phi::distributed::DistMetaTensor out1(common::make_ddim(out1_shape), out1_dist_attr); - phi::distributed::DistMetaTensor out2(phi::make_ddim(out2_shape), + phi::distributed::DistMetaTensor out2(common::make_ddim(out2_shape), out2_dist_attr); // 2 inputs 2 outputs @@ -539,11 +539,11 @@ TEST(DefaultDataParallelSPMDRule, Ctor) { out2_dist_attr.set_dims_mapping(std::vector({-1, 1, -1})); out2_dist_attr.set_dynamic_dims(std::vector({false, false})); - phi::distributed::DistMetaTensor x(phi::make_ddim(x_shape), x_dist_attr); - phi::distributed::DistMetaTensor y(phi::make_ddim(y_shape), y_dist_attr); - phi::distributed::DistMetaTensor out1(phi::make_ddim(out1_shape), + phi::distributed::DistMetaTensor x(common::make_ddim(x_shape), x_dist_attr); + phi::distributed::DistMetaTensor y(common::make_ddim(y_shape), y_dist_attr); + phi::distributed::DistMetaTensor out1(common::make_ddim(out1_shape), out1_dist_attr); - phi::distributed::DistMetaTensor out2(phi::make_ddim(out2_shape), + phi::distributed::DistMetaTensor out2(common::make_ddim(out2_shape), out2_dist_attr); // 2 inputs 2 outputs, batch axis sharding is propagatd while other axes are @@ -603,9 +603,9 @@ TEST(DefaultDataParallelSPMDRule, Ctor) { x_dist_attr.set_dims_mapping(std::vector({0, -1, -1, -1})); y_dist_attr.set_dims_mapping(std::vector({-1, -1})); out1_dist_attr.set_dims_mapping(std::vector({1, -1, -1, -1})); - x = phi::distributed::DistMetaTensor(phi::make_ddim(x_shape), x_dist_attr); - y = phi::distributed::DistMetaTensor(phi::make_ddim(y_shape), y_dist_attr); - out1 = phi::distributed::DistMetaTensor(phi::make_ddim(out1_shape), + x = phi::distributed::DistMetaTensor(common::make_ddim(x_shape), x_dist_attr); + y = phi::distributed::DistMetaTensor(common::make_ddim(y_shape), y_dist_attr); + out1 = phi::distributed::DistMetaTensor(common::make_ddim(out1_shape), out1_dist_attr); EXPECT_ANY_THROW(infered_dist_attrs_st = @@ -622,9 +622,9 @@ TEST(DefaultDataParallelSPMDRule, Ctor) { // call in vector arguments format out1_dist_attr.set_dims_mapping(std::vector({-1, 0, 1, -1})); out2_dist_attr.set_dims_mapping(std::vector({0, 1, -1})); - out1 = phi::distributed::DistMetaTensor(phi::make_ddim(out1_shape), + out1 = phi::distributed::DistMetaTensor(common::make_ddim(out1_shape), out1_dist_attr); - out2 = phi::distributed::DistMetaTensor(phi::make_ddim(out2_shape), + out2 = phi::distributed::DistMetaTensor(common::make_ddim(out2_shape), out2_dist_attr); infered_dist_attrs_st = phi::distributed::DefaultDataParallelInferSpmdReverse( @@ -667,8 +667,8 @@ TEST(ConcatRule, Ctor) { t_dist_attr.set_process_mesh(process_mesh); t_dist_attr.set_dims_mapping(dim_mappings[i]); t_dist_attr.set_dynamic_dims({false, false, false}); - auto input = phi::distributed::DistMetaTensor(phi::make_ddim(shapes[i]), - t_dist_attr); + auto input = phi::distributed::DistMetaTensor( + common::make_ddim(shapes[i]), t_dist_attr); inputs.push_back(input); } return inputs; @@ -695,7 +695,8 @@ TEST(ConcatRule, Ctor) { auto build_output = [&](const TensorDistAttr& t_dist_attr, const std::vector& shape) { - return phi::distributed::DistMetaTensor(phi::make_ddim(shape), t_dist_attr); + return phi::distributed::DistMetaTensor(common::make_ddim(shape), + t_dist_attr); }; auto& output_dist_attr = @@ -769,8 +770,8 @@ TEST(StackRule, Ctor) { t_dist_attr.set_process_mesh(process_mesh); t_dist_attr.set_dims_mapping(dim_mappings[i]); t_dist_attr.set_dynamic_dims({false, false, false}); - auto input = phi::distributed::DistMetaTensor(phi::make_ddim(input_shape), - t_dist_attr); + auto input = phi::distributed::DistMetaTensor( + common::make_ddim(input_shape), t_dist_attr); inputs.push_back(input); } return inputs; @@ -787,7 +788,7 @@ TEST(StackRule, Ctor) { input_shape.end(), std::back_inserter(output_shape), [](int64_t x) { return x; }); - return phi::distributed::DistMetaTensor(phi::make_ddim(output_shape), + return phi::distributed::DistMetaTensor(common::make_ddim(output_shape), t_dist_attr); }; @@ -873,8 +874,8 @@ TEST(WhereRule, Ctor) { t_dist_attr.set_process_mesh(process_mesh); t_dist_attr.set_dims_mapping(dim_mappings[i]); t_dist_attr.set_dynamic_dims({false, false, false}); - auto input = phi::distributed::DistMetaTensor(phi::make_ddim(shapes[i]), - t_dist_attr); + auto input = phi::distributed::DistMetaTensor( + common::make_ddim(shapes[i]), t_dist_attr); inputs.push_back(input); } return inputs; @@ -909,8 +910,8 @@ TEST(ReduceMaxRule, Ctor) { t_dist_attr.set_process_mesh(process_mesh); t_dist_attr.set_dims_mapping({-1, 0, -1}); t_dist_attr.set_dynamic_dims({false, false, false}); - phi::distributed::DistMetaTensor x = - phi::distributed::DistMetaTensor(phi::make_ddim({4, 6, 8}), t_dist_attr); + phi::distributed::DistMetaTensor x = phi::distributed::DistMetaTensor( + common::make_ddim({4, 6, 8}), t_dist_attr); IntArray axis = {1}; bool keep_dim = false; phi::distributed::SpmdInfo forward_info = @@ -919,7 +920,7 @@ TEST(ReduceMaxRule, Ctor) { check_partial_dims(forward_info.second[0], {0}); // test backward phi::distributed::DistMetaTensor out = phi::distributed::DistMetaTensor( - phi::make_ddim({4, 8}), + common::make_ddim({4, 8}), PADDLE_GET_CONST(TensorDistAttr, forward_info.second[0])); phi::distributed::DistMetaTensor out_grad = out; phi::distributed::SpmdInfo backward_info = @@ -965,7 +966,7 @@ TEST(Numel, Ctor) { t_dist_attr.set_dims_mapping(dims_mapping); t_dist_attr.set_dynamic_dims({false, false, false}); auto input = - phi::distributed::DistMetaTensor(phi::make_ddim(shape), t_dist_attr); + phi::distributed::DistMetaTensor(common::make_ddim(shape), t_dist_attr); auto infered_dist_attrs = phi::distributed::NumelInferSpmd(input); EXPECT_EQ(infered_dist_attrs.first.size(), static_cast(1)); EXPECT_EQ(infered_dist_attrs.second.size(), static_cast(1)); @@ -988,7 +989,7 @@ TEST(Triu, Ctor) { t_dist_attr.set_dims_mapping(dims_mapping); t_dist_attr.set_dynamic_dims({false, false, false}); auto input = - phi::distributed::DistMetaTensor(phi::make_ddim(shape), t_dist_attr); + phi::distributed::DistMetaTensor(common::make_ddim(shape), t_dist_attr); auto infered_dist_attrs = phi::distributed::TriuGradInferSpmd(input, 0); EXPECT_EQ(infered_dist_attrs.first.size(), static_cast(1)); EXPECT_EQ(infered_dist_attrs.second.size(), static_cast(1)); @@ -1013,7 +1014,7 @@ TEST(LayerNorm, Ctor) { t_dist_attr.set_dims_mapping(dim_mapping); t_dist_attr.set_dynamic_dims({false, false, false}); auto input = - phi::distributed::DistMetaTensor(phi::make_ddim(shape), t_dist_attr); + phi::distributed::DistMetaTensor(common::make_ddim(shape), t_dist_attr); return input; }; // test 1 @@ -1076,7 +1077,7 @@ TEST(FlashAtt, Ctor) { t_dist_attr.set_dims_mapping(dim_mapping); t_dist_attr.set_dynamic_dims(std::vector(shape.size(), false)); auto input = - phi::distributed::DistMetaTensor(phi::make_ddim(shape), t_dist_attr); + phi::distributed::DistMetaTensor(common::make_ddim(shape), t_dist_attr); return input; }; @@ -1164,7 +1165,7 @@ TEST(Transpose, Ctor) { t_dist_attr.set_dims_mapping(dims_mapping); t_dist_attr.set_dynamic_dims({false, false, false}); phi::distributed::DistMetaTensor x = - phi::distributed::DistMetaTensor(phi::make_ddim(shape), t_dist_attr); + phi::distributed::DistMetaTensor(common::make_ddim(shape), t_dist_attr); std::vector perm = {1, 2, -3}; // test forward phi::distributed::SpmdInfo forward_spmd_info = @@ -1176,7 +1177,7 @@ TEST(Transpose, Ctor) { check_partial_dims(forward_spmd_info.second[0], {}); // test backward phi::distributed::DistMetaTensor out_grad = phi::distributed::DistMetaTensor( - phi::make_ddim({8, 10, 6}), + common::make_ddim({8, 10, 6}), PADDLE_GET_CONST(TensorDistAttr, forward_spmd_info.second[0])); phi::distributed::SpmdInfo backward_spmd_info = TransposeGradInferSpmd(out_grad, perm); @@ -1200,7 +1201,7 @@ TEST(Reshape, Ctor) { t_dist_attr.set_dims_mapping(dim_mapping); t_dist_attr.set_dynamic_dims(std::vector(shape.size(), false)); auto input = - phi::distributed::DistMetaTensor(phi::make_ddim(shape), t_dist_attr); + phi::distributed::DistMetaTensor(common::make_ddim(shape), t_dist_attr); return input; }; @@ -1246,28 +1247,32 @@ TEST(ElementwiseUnaryLike, Ctor) { // cast auto input = - phi::distributed::DistMetaTensor(phi::make_ddim(shape), t_dist_attr); + phi::distributed::DistMetaTensor(common::make_ddim(shape), t_dist_attr); auto infered_dist_attrs = phi::distributed::CastInferSpmd(input, phi::DataType::FLOAT32); check_element_unary_like(infered_dist_attrs); // full like - input = phi::distributed::DistMetaTensor(phi::make_ddim(shape), t_dist_attr); + input = + phi::distributed::DistMetaTensor(common::make_ddim(shape), t_dist_attr); infered_dist_attrs = phi::distributed::FullLikeInferSpmd(input, 1.0, phi::DataType::FLOAT32); check_element_unary_like(infered_dist_attrs); // pow - input = phi::distributed::DistMetaTensor(phi::make_ddim(shape), t_dist_attr); + input = + phi::distributed::DistMetaTensor(common::make_ddim(shape), t_dist_attr); infered_dist_attrs = phi::distributed::PowInferSpmd(input, 2); check_element_unary_like(infered_dist_attrs); // pow backward - input = phi::distributed::DistMetaTensor(phi::make_ddim(shape), t_dist_attr); + input = + phi::distributed::DistMetaTensor(common::make_ddim(shape), t_dist_attr); infered_dist_attrs = phi::distributed::PowGradInferSpmd(input, input, 2); // scale - input = phi::distributed::DistMetaTensor(phi::make_ddim(shape), t_dist_attr); + input = + phi::distributed::DistMetaTensor(common::make_ddim(shape), t_dist_attr); infered_dist_attrs = phi::distributed::ScaleInferSpmd(input, 1.0, 1.0, false); check_element_unary_like(infered_dist_attrs); } diff --git a/test/cpp/eager/CMakeLists.txt b/test/cpp/eager/CMakeLists.txt index b9729743a1c426..58c3547b9ef235 100644 --- a/test/cpp/eager/CMakeLists.txt +++ b/test/cpp/eager/CMakeLists.txt @@ -1,5 +1,6 @@ set(eager_deps phi + common hook_utils utils global_utils diff --git a/test/cpp/eager/data_structure_tests/accumulation_node_test.cc b/test/cpp/eager/data_structure_tests/accumulation_node_test.cc index 4bad555a439088..c1469d6e61a741 100644 --- a/test/cpp/eager/data_structure_tests/accumulation_node_test.cc +++ b/test/cpp/eager/data_structure_tests/accumulation_node_test.cc @@ -32,11 +32,11 @@ using namespace egr; // NOLINT TEST(AccumulationNode, SelectedRowsAddToTensor) { // Construct Eager Tensor phi::DenseTensorMeta meta = - phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::make_ddim({1, 1})); + phi::DenseTensorMeta(phi::DataType::FLOAT32, common::make_ddim({1, 1})); std::vector rows = {0}; std::shared_ptr sr0 = std::make_shared(rows, 1); - sr0->mutable_value()->Resize(phi::make_ddim({1, 1})); + sr0->mutable_value()->Resize(common::make_ddim({1, 1})); sr0->mutable_value()->mutable_data(paddle::platform::CPUPlace())[0] = static_cast(10.0f); paddle::Tensor et0 = paddle::Tensor(sr0); @@ -59,7 +59,7 @@ TEST(AccumulationNode, SelectedRowsAddToTensor) { // Initialize Grad Tensor std::shared_ptr grad_dt = std::make_shared(rows, 1); - grad_dt->mutable_value()->Resize(phi::make_ddim({1, 1})); + grad_dt->mutable_value()->Resize(common::make_ddim({1, 1})); grad_dt->mutable_value()->mutable_data( paddle::platform::CPUPlace())[0] = static_cast(0.0f); grad_meta->MutableGrad()->set_impl(grad_dt); @@ -97,17 +97,17 @@ TEST(AccumulationNode, SelectedRowsAddToTensor) { TEST(AccumulationNode, SelectedRowsMerge) { // Construct Eager Tensor phi::DenseTensorMeta meta = - phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::make_ddim({1, 1})); + phi::DenseTensorMeta(phi::DataType::FLOAT32, common::make_ddim({1, 1})); std::vector rows = {0}; std::shared_ptr sr0 = std::make_shared(rows, 1); - sr0->mutable_value()->Resize(phi::make_ddim({1, 1})); + sr0->mutable_value()->Resize(common::make_ddim({1, 1})); sr0->mutable_value()->mutable_data(paddle::platform::CPUPlace())[0] = static_cast(10.0f); paddle::Tensor et0 = paddle::Tensor(sr0); std::shared_ptr sr1 = std::make_shared(rows, 1); - sr1->mutable_value()->Resize(phi::make_ddim({1, 1})); + sr1->mutable_value()->Resize(common::make_ddim({1, 1})); sr1->mutable_value()->mutable_data(paddle::platform::CPUPlace())[0] = static_cast(20.0f); paddle::Tensor et1 = paddle::Tensor(sr1); @@ -122,7 +122,7 @@ TEST(AccumulationNode, SelectedRowsMerge) { // Initialize Grad Tensor std::shared_ptr grad_dt = std::make_shared(rows, 1); - grad_dt->mutable_value()->Resize(phi::make_ddim({1, 1})); + grad_dt->mutable_value()->Resize(common::make_ddim({1, 1})); grad_dt->mutable_value()->mutable_data( paddle::platform::CPUPlace())[0] = static_cast(0.0f); grad_meta->MutableGrad()->set_impl(grad_dt); @@ -162,17 +162,17 @@ TEST(AccumulationNode, SelectedRowsMerge) { TEST(AccumulationNode, SelectedRowsAddTensor) { // Construct Eager Tensor phi::DenseTensorMeta meta = - phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::make_ddim({1, 1})); + phi::DenseTensorMeta(phi::DataType::FLOAT32, common::make_ddim({1, 1})); std::vector rows = {0}; std::shared_ptr sr0 = std::make_shared(rows, 1); - sr0->mutable_value()->Resize(phi::make_ddim({1, 1})); + sr0->mutable_value()->Resize(common::make_ddim({1, 1})); sr0->mutable_value()->mutable_data(paddle::platform::CPUPlace())[0] = static_cast(10.0f); paddle::Tensor et0 = paddle::Tensor(sr0); std::shared_ptr sr1 = std::make_shared(rows, 1); - sr1->mutable_value()->Resize(phi::make_ddim({1, 1})); + sr1->mutable_value()->Resize(common::make_ddim({1, 1})); sr1->mutable_value()->mutable_data(paddle::platform::CPUPlace())[0] = static_cast(20.0f); paddle::Tensor et1 = paddle::Tensor(sr1); @@ -229,7 +229,7 @@ TEST(AccumulationNode, SelectedRowsAddTensor) { TEST(AccumulationNode, Tensor) { // Construct Eager Tensor phi::DenseTensorMeta meta = - phi::DenseTensorMeta(phi::DataType::FLOAT16, phi::make_ddim({1, 1})); + phi::DenseTensorMeta(phi::DataType::FLOAT16, common::make_ddim({1, 1})); std::shared_ptr dt0 = std::make_shared( std::make_unique( paddle::platform::CPUPlace()) diff --git a/test/cpp/eager/data_structure_tests/autograd_meta_test.cc b/test/cpp/eager/data_structure_tests/autograd_meta_test.cc index 651e3b63f07ac6..41eda2ec8080a4 100644 --- a/test/cpp/eager/data_structure_tests/autograd_meta_test.cc +++ b/test/cpp/eager/data_structure_tests/autograd_meta_test.cc @@ -40,7 +40,7 @@ TEST(AutogradMeta, MemberFunction) { CHECK(tmp_auto->Grad().defined() == false); auto* grad_t = tmp_auto->MutableGrad(); phi::DenseTensorMeta meta = - phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::make_ddim({1, 2})); + phi::DenseTensorMeta(phi::DataType::FLOAT32, common::make_ddim({1, 2})); std::shared_ptr dt = std::make_shared( std::make_unique( paddle::platform::CPUPlace()) diff --git a/test/cpp/eager/data_structure_tests/eager_tensor_test.cc b/test/cpp/eager/data_structure_tests/eager_tensor_test.cc index b2e3d8b5e7bd23..2ffdf033cf1cd1 100644 --- a/test/cpp/eager/data_structure_tests/eager_tensor_test.cc +++ b/test/cpp/eager/data_structure_tests/eager_tensor_test.cc @@ -16,9 +16,9 @@ #include "glog/logging.h" #include "gtest/gtest.h" +#include "paddle/common/layout.h" #include "paddle/fluid/imperative/var_helper.h" #include "paddle/phi/api/lib/utils/allocator.h" -#include "paddle/phi/common/layout.h" #include "paddle/phi/core/kernel_registry.h" namespace eager_test { @@ -37,7 +37,7 @@ TEST(Tensor, Constructor) { CHECK_EQ(et2.name(), "et2"); phi::DenseTensorMeta meta = - phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::make_ddim({1, 2})); + phi::DenseTensorMeta(phi::DataType::FLOAT32, common::make_ddim({1, 2})); std::shared_ptr dt = std::make_shared( std::make_unique( paddle::platform::CPUPlace()) @@ -67,7 +67,7 @@ TEST(Tensor, Constructor) { TEST(Tensor, MemberFunction) { paddle::Tensor et3; phi::DenseTensorMeta meta = - phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::make_ddim({1, 2})); + phi::DenseTensorMeta(phi::DataType::FLOAT32, common::make_ddim({1, 2})); std::shared_ptr dt = std::make_shared( std::make_unique( paddle::platform::CPUPlace()) @@ -87,7 +87,7 @@ TEST(Tensor, MemberFunction) { CHECK_EQ(et3.is_cpu(), true); CHECK_EQ(et3.is_gpu(), false); CHECK_EQ(et3.numel(), 2); - auto expected_dim = phi::make_ddim({1, 2}); + auto expected_dim = common::make_ddim({1, 2}); CHECK_EQ(et3.dims(), expected_dim); CHECK_EQ(et3.type(), phi::DataType::FLOAT32); CHECK_EQ(et3.layout(), phi::DataLayout::NCHW); @@ -121,7 +121,7 @@ TEST(Tensor, MemberFunction) { TEST(EagerVariable, Constructor) { paddle::Tensor t3; phi::DenseTensorMeta meta = - phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::make_ddim({1, 2})); + phi::DenseTensorMeta(phi::DataType::FLOAT32, common::make_ddim({1, 2})); std::shared_ptr dt = std::make_shared( std::make_unique( paddle::platform::CPUPlace()) @@ -159,7 +159,7 @@ TEST(EagerVariable, Constructor) { paddle::Tensor t7(std::make_shared(rows, 2)); std::dynamic_pointer_cast(t7.impl()) ->mutable_value() - ->Resize(phi::make_ddim(dims)); + ->Resize(common::make_ddim(dims)); auto* dt7_tmp_ptr = std::dynamic_pointer_cast(t7.impl()) ->mutable_value() ->mutable_data(paddle::platform::CPUPlace()); @@ -202,9 +202,10 @@ TEST(EagerVariable, Constructor) { TEST(EagerVariable, DataLayout) { paddle::Tensor tensor; - phi::DenseTensorMeta meta = phi::DenseTensorMeta(phi::DataType::FLOAT32, - phi::make_ddim({1, 1, 1, 1}), - phi::DataLayout::UNDEFINED); + phi::DenseTensorMeta meta = + phi::DenseTensorMeta(phi::DataType::FLOAT32, + common::make_ddim({1, 1, 1, 1}), + phi::DataLayout::UNDEFINED); std::shared_ptr dt = std::make_shared( std::make_unique( paddle::platform::CPUPlace()) diff --git a/test/cpp/eager/data_structure_tests/grad_node_info_test.cc b/test/cpp/eager/data_structure_tests/grad_node_info_test.cc index 0948e6f72aa0b0..dc7027eac030ec 100644 --- a/test/cpp/eager/data_structure_tests/grad_node_info_test.cc +++ b/test/cpp/eager/data_structure_tests/grad_node_info_test.cc @@ -37,7 +37,7 @@ void TestGradNodeBase(bool is_remove_gradient_hook) { paddle::small_vector, egr::kSlotSmallVectorSize> grads; phi::DenseTensorMeta meta = - phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::make_ddim({1, 1})); + phi::DenseTensorMeta(phi::DataType::FLOAT32, common::make_ddim({1, 1})); std::shared_ptr dt = std::make_shared( std::make_unique( paddle::platform::CPUPlace()) @@ -87,7 +87,7 @@ void TestGradNodeBase(bool is_remove_gradient_hook) { auto gradient_hook = [](const paddle::Tensor& et) -> paddle::Tensor { paddle::Tensor res; phi::DenseTensorMeta meta = - phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::make_ddim({1, 1})); + phi::DenseTensorMeta(phi::DataType::FLOAT32, common::make_ddim({1, 1})); std::shared_ptr dt = std::make_shared( std::make_unique( paddle::platform::CPUPlace()) @@ -125,7 +125,7 @@ TEST(GradNodeInfo, GradNodeBase) { TEST(GradNodeInfo, Edge) { phi::DenseTensorMeta meta = - phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::make_ddim({1, 1})); + phi::DenseTensorMeta(phi::DataType::FLOAT32, common::make_ddim({1, 1})); std::shared_ptr dt = std::make_shared( std::make_unique( paddle::platform::CPUPlace()) diff --git a/test/cpp/eager/data_structure_tests/grad_node_test.h b/test/cpp/eager/data_structure_tests/grad_node_test.h index c1125f0774a517..8ead02e88f6baa 100644 --- a/test/cpp/eager/data_structure_tests/grad_node_test.h +++ b/test/cpp/eager/data_structure_tests/grad_node_test.h @@ -38,7 +38,7 @@ class GradTestNode : public egr::GradNodeBase { val_ = std::dynamic_pointer_cast(grads[0][0].impl()) ->data()[0]; phi::DenseTensorMeta meta = - phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::make_ddim({1, 1})); + phi::DenseTensorMeta(phi::DataType::FLOAT32, common::make_ddim({1, 1})); std::shared_ptr dt = std::make_shared( std::make_unique( paddle::platform::CPUPlace()) diff --git a/test/cpp/eager/data_structure_tests/grad_tensor_holder_test.cc b/test/cpp/eager/data_structure_tests/grad_tensor_holder_test.cc index b9e5b23a04e0b0..8476eb132dc4cc 100644 --- a/test/cpp/eager/data_structure_tests/grad_tensor_holder_test.cc +++ b/test/cpp/eager/data_structure_tests/grad_tensor_holder_test.cc @@ -37,7 +37,7 @@ TEST(GradTensorHolder, Constructor) { // Construct Eager Tensor phi::DenseTensorMeta meta = - phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::make_ddim({2, 2})); + phi::DenseTensorMeta(phi::DataType::FLOAT32, common::make_ddim({2, 2})); std::shared_ptr dt = std::make_shared( std::make_unique( paddle::platform::CPUPlace()) @@ -55,7 +55,7 @@ TEST(GradTensorHolder, Constructor) { TEST(GradTensorHolder, Interfaces) { // Construct Eager Tensor phi::DenseTensorMeta meta = - phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::make_ddim({1, 1})); + phi::DenseTensorMeta(phi::DataType::FLOAT32, common::make_ddim({1, 1})); std::shared_ptr dt0 = std::make_shared( std::make_unique( paddle::platform::CPUPlace()) @@ -117,7 +117,8 @@ TEST(GradTensorHolder, SelectedRowsMergeAdd) { auto sr2 = std::make_shared(rows, table_size); // initialize a sparse table 1 - sr1->mutable_value()->Resize(phi::make_ddim({table_size, embedding_width})); + sr1->mutable_value()->Resize( + common::make_ddim({table_size, embedding_width})); auto* data_sr1 = sr1->mutable_value()->mutable_data(cpu); for (int64_t i = 0; i < table_size; ++i) { for (int64_t j = 0; j < embedding_width; ++j) { @@ -126,7 +127,8 @@ TEST(GradTensorHolder, SelectedRowsMergeAdd) { } // initialize a sparse table 2 - sr2->mutable_value()->Resize(phi::make_ddim({table_size, embedding_width})); + sr2->mutable_value()->Resize( + common::make_ddim({table_size, embedding_width})); auto* data_sr2 = sr2->mutable_value()->mutable_data(cpu); for (int64_t i = 0; i < table_size; ++i) { for (int64_t j = 0; j < embedding_width; ++j) { diff --git a/test/cpp/eager/data_structure_tests/tensor_wrapper_test.cc b/test/cpp/eager/data_structure_tests/tensor_wrapper_test.cc index a3a82b0c3b2018..38eb45fe192487 100644 --- a/test/cpp/eager/data_structure_tests/tensor_wrapper_test.cc +++ b/test/cpp/eager/data_structure_tests/tensor_wrapper_test.cc @@ -23,7 +23,7 @@ TEST(TensorWrapper, Basic) { VLOG(6) << "Test Full reserved"; paddle::Tensor et1; phi::DenseTensorMeta meta = - phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::make_ddim({1, 2})); + phi::DenseTensorMeta(phi::DataType::FLOAT32, common::make_ddim({1, 2})); std::shared_ptr dt = std::make_shared( std::make_unique( paddle::platform::CPUPlace()) @@ -52,7 +52,7 @@ TEST(TensorWrapper, Basic) { VLOG(6) << "Test reconstruct"; paddle::Tensor et2; phi::DenseTensorMeta meta2 = - phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::make_ddim({1, 2})); + phi::DenseTensorMeta(phi::DataType::FLOAT32, common::make_ddim({1, 2})); std::shared_ptr dt2 = std::make_shared( std::make_unique( paddle::platform::CPUPlace()) diff --git a/test/cpp/eager/performance_tests/CMakeLists.txt b/test/cpp/eager/performance_tests/CMakeLists.txt index 7b48812d6dd7fe..69388abb70861d 100644 --- a/test/cpp/eager/performance_tests/CMakeLists.txt +++ b/test/cpp/eager/performance_tests/CMakeLists.txt @@ -16,15 +16,15 @@ if(NOT (NOT WITH_PYTHON AND ON_INFER)) eager_prim_api) paddle_test(test_egr_performance_benchmark_eager_cpu SRCS - benchmark_eager_cpu.cc DEPS performance_benchmark_utils) + benchmark_eager_cpu.cc DEPS performance_benchmark_utils common) paddle_test(test_egr_performance_benchmark_fluid_cpu SRCS - benchmark_fluid_cpu.cc DEPS performance_benchmark_utils) + benchmark_fluid_cpu.cc DEPS performance_benchmark_utils common) if(WITH_GPU) paddle_test(test_egr_performance_benchmark_eager_cuda SRCS - benchmark_eager_cuda.cc DEPS performance_benchmark_utils) + benchmark_eager_cuda.cc DEPS performance_benchmark_utils common) paddle_test(test_egr_performance_benchmark_fluid_cuda SRCS - benchmark_fluid_cuda.cc DEPS performance_benchmark_utils) + benchmark_fluid_cuda.cc DEPS performance_benchmark_utils common) endif() if(WITH_ONNXRUNTIME AND WIN32) diff --git a/test/cpp/eager/performance_tests/benchmark_eager_cpu.cc b/test/cpp/eager/performance_tests/benchmark_eager_cpu.cc index b6e991e358fde6..f0865efab3156c 100644 --- a/test/cpp/eager/performance_tests/benchmark_eager_cpu.cc +++ b/test/cpp/eager/performance_tests/benchmark_eager_cpu.cc @@ -41,7 +41,7 @@ TEST(Benchmark, EagerScaleCPU) { eager_test::InitEnv(paddle::platform::CPUPlace()); for (const std::string mode : {"Accuracy", "Performance"}) { - paddle::framework::DDim ddim = phi::make_ddim({2, 4, 4, 4}); + paddle::framework::DDim ddim = common::make_ddim({2, 4, 4, 4}); paddle::Tensor tensor = eager_test::CreateTensorWithValue(ddim, paddle::platform::CPUPlace(), @@ -81,7 +81,7 @@ TEST(Benchmark, EagerMatmulCPU) { eager_test::InitEnv(paddle::platform::CPUPlace()); for (const std::string mode : {"Accuracy", "Performance"}) { - paddle::framework::DDim ddimX = phi::make_ddim({2, 2}); + paddle::framework::DDim ddimX = common::make_ddim({2, 2}); paddle::Tensor X = eager_test::CreateTensorWithValue(ddimX, paddle::platform::CPUPlace(), @@ -91,7 +91,7 @@ TEST(Benchmark, EagerMatmulCPU) { true); RetainGradForTensor(X); - paddle::framework::DDim ddimY = phi::make_ddim({2, 2}); + paddle::framework::DDim ddimY = common::make_ddim({2, 2}); paddle::Tensor Y = eager_test::CreateTensorWithValue(ddimY, paddle::platform::CPUPlace(), @@ -133,7 +133,7 @@ TEST(Benchmark, EagerIntermediateMatmulCPU) { paddle::imperative::SetCurrentTracer(tracer); for (const std::string mode : {"Accuracy", "Performance"}) { - paddle::framework::DDim ddimX = phi::make_ddim({2, 2}); + paddle::framework::DDim ddimX = common::make_ddim({2, 2}); paddle::Tensor X = eager_test::CreateTensorWithValue(ddimX, paddle::platform::CPUPlace(), @@ -143,7 +143,7 @@ TEST(Benchmark, EagerIntermediateMatmulCPU) { true); RetainGradForTensor(X); - paddle::framework::DDim ddimY = phi::make_ddim({2, 2}); + paddle::framework::DDim ddimY = common::make_ddim({2, 2}); paddle::Tensor Y = eager_test::CreateTensorWithValue(ddimY, paddle::platform::CPUPlace(), @@ -185,7 +185,7 @@ TEST(Benchmark, EagerIntermediateMLPCPU) { paddle::imperative::SetCurrentTracer(tracer); for (const std::string mode : {"Accuracy", "Performance"}) { - paddle::framework::DDim ddimX = phi::make_ddim({MLP_M, MLP_N}); + paddle::framework::DDim ddimX = common::make_ddim({MLP_M, MLP_N}); paddle::Tensor X = eager_test::CreateTensorWithValue(ddimX, paddle::platform::CPUPlace(), @@ -198,7 +198,7 @@ TEST(Benchmark, EagerIntermediateMLPCPU) { std::vector Ws; std::vector Bs; for (size_t i = 0; i < MLP_NUM_LINEAR; i++) { - paddle::framework::DDim ddimW = phi::make_ddim({MLP_N, MLP_K}); + paddle::framework::DDim ddimW = common::make_ddim({MLP_N, MLP_K}); paddle::Tensor W = eager_test::CreateTensorWithValue(ddimW, paddle::platform::CPUPlace(), @@ -208,7 +208,7 @@ TEST(Benchmark, EagerIntermediateMLPCPU) { true); RetainGradForTensor(W); - paddle::framework::DDim ddimB = phi::make_ddim({MLP_K}); + paddle::framework::DDim ddimB = common::make_ddim({MLP_K}); paddle::Tensor B = eager_test::CreateTensorWithValue(ddimB, paddle::platform::CPUPlace(), diff --git a/test/cpp/eager/performance_tests/benchmark_eager_cuda.cc b/test/cpp/eager/performance_tests/benchmark_eager_cuda.cc index 79e0d382f75289..6b3f395ea8f5b1 100644 --- a/test/cpp/eager/performance_tests/benchmark_eager_cuda.cc +++ b/test/cpp/eager/performance_tests/benchmark_eager_cuda.cc @@ -41,7 +41,7 @@ TEST(Benchmark, EagerScaleCUDA) { eager_test::InitEnv(paddle::platform::CUDAPlace()); for (const std::string mode : {"Accuracy", "WarmUp", "Performance"}) { - paddle::framework::DDim ddim = phi::make_ddim({2, 4, 4, 4}); + paddle::framework::DDim ddim = common::make_ddim({2, 4, 4, 4}); paddle::Tensor tensor = eager_test::CreateTensorWithValue(ddim, paddle::platform::CUDAPlace(), @@ -83,7 +83,7 @@ TEST(Benchmark, EagerMatmulCUDA) { eager_test::InitEnv(place); for (const std::string mode : {"Accuracy", "WarmUp", "Performance"}) { - paddle::framework::DDim ddimX = phi::make_ddim({2, 2}); + paddle::framework::DDim ddimX = common::make_ddim({2, 2}); paddle::Tensor X = eager_test::CreateTensorWithValue(ddimX, paddle::platform::CUDAPlace(), @@ -93,7 +93,7 @@ TEST(Benchmark, EagerMatmulCUDA) { true); RetainGradForTensor(X); - paddle::framework::DDim ddimY = phi::make_ddim({2, 2}); + paddle::framework::DDim ddimY = common::make_ddim({2, 2}); paddle::Tensor Y = eager_test::CreateTensorWithValue(ddimY, paddle::platform::CUDAPlace(), @@ -139,7 +139,7 @@ TEST(Benchmark, EagerIntermediateMatmulCUDA) { paddle::imperative::SetCurrentTracer(tracer); for (const std::string mode : {"Accuracy", "WarmUp", "Performance"}) { - paddle::framework::DDim ddimX = phi::make_ddim({2, 2}); + paddle::framework::DDim ddimX = common::make_ddim({2, 2}); paddle::Tensor X = eager_test::CreateTensorWithValue(ddimX, paddle::platform::CUDAPlace(), @@ -149,7 +149,7 @@ TEST(Benchmark, EagerIntermediateMatmulCUDA) { true); RetainGradForTensor(X); - paddle::framework::DDim ddimY = phi::make_ddim({2, 2}); + paddle::framework::DDim ddimY = common::make_ddim({2, 2}); paddle::Tensor Y = eager_test::CreateTensorWithValue(ddimY, paddle::platform::CUDAPlace(), @@ -195,7 +195,7 @@ TEST(Benchmark, EagerIntermediateMLPCUDA) { paddle::imperative::SetCurrentTracer(tracer); for (const std::string mode : {"Accuracy", "WarmUp", "Performance"}) { - paddle::framework::DDim ddimX = phi::make_ddim({MLP_M, MLP_N}); + paddle::framework::DDim ddimX = common::make_ddim({MLP_M, MLP_N}); paddle::Tensor X = eager_test::CreateTensorWithValue(ddimX, paddle::platform::CUDAPlace(), @@ -208,7 +208,7 @@ TEST(Benchmark, EagerIntermediateMLPCUDA) { std::vector Ws; std::vector Bs; for (size_t i = 0; i < MLP_NUM_LINEAR; i++) { - paddle::framework::DDim ddimW = phi::make_ddim({MLP_N, MLP_K}); + paddle::framework::DDim ddimW = common::make_ddim({MLP_N, MLP_K}); paddle::Tensor W = eager_test::CreateTensorWithValue(ddimW, paddle::platform::CUDAPlace(), @@ -218,7 +218,7 @@ TEST(Benchmark, EagerIntermediateMLPCUDA) { true); RetainGradForTensor(W); - paddle::framework::DDim ddimB = phi::make_ddim({MLP_K}); + paddle::framework::DDim ddimB = common::make_ddim({MLP_K}); paddle::Tensor B = eager_test::CreateTensorWithValue(ddimB, paddle::platform::CUDAPlace(), diff --git a/test/cpp/eager/performance_tests/benchmark_fluid_cpu.cc b/test/cpp/eager/performance_tests/benchmark_fluid_cpu.cc index 069065d03c21a1..f1ac8bc77e7f3a 100644 --- a/test/cpp/eager/performance_tests/benchmark_fluid_cpu.cc +++ b/test/cpp/eager/performance_tests/benchmark_fluid_cpu.cc @@ -51,7 +51,7 @@ TEST(Benchmark, FluidScaleCPU) { std::vector dims = {2, 4, 4, 4}; auto* x_tensor = X->MutableVar()->GetMutable(); - x_tensor->Resize(phi::make_ddim(dims)); + x_tensor->Resize(common::make_ddim(dims)); auto* mutable_x = x_tensor->mutable_data(place); paddle::memory::Copy(place, mutable_x, @@ -100,7 +100,7 @@ TEST(Benchmark, FluidMatmulCPU) { std::vector dims = {2, 2}; auto* x_tensor = X->MutableVar()->GetMutable(); - x_tensor->Resize(phi::make_ddim(dims)); + x_tensor->Resize(common::make_ddim(dims)); auto* mutable_x = x_tensor->mutable_data(place); paddle::memory::Copy(place, mutable_x, @@ -109,7 +109,7 @@ TEST(Benchmark, FluidMatmulCPU) { sizeof(float) * x_src_data.size()); auto* y_tensor = Y->MutableVar()->GetMutable(); - y_tensor->Resize(phi::make_ddim(dims)); + y_tensor->Resize(common::make_ddim(dims)); auto* mutable_y = y_tensor->mutable_data(place); paddle::memory::Copy(place, mutable_y, @@ -161,7 +161,7 @@ TEST(Benchmark, FluidMLPCPU) { X->SetOverridedStopGradient(false); auto* x_tensor = X->MutableVar()->GetMutable(); - x_tensor->Resize(phi::make_ddim(x_dims)); + x_tensor->Resize(common::make_ddim(x_dims)); auto* mutable_x = x_tensor->mutable_data(place); paddle::memory::Copy(place, mutable_x, @@ -180,7 +180,7 @@ TEST(Benchmark, FluidMLPCPU) { B->SetOverridedStopGradient(false); auto* w_tensor = W->MutableVar()->GetMutable(); - w_tensor->Resize(phi::make_ddim(w_dims)); + w_tensor->Resize(common::make_ddim(w_dims)); auto* mutable_w = w_tensor->mutable_data(place); paddle::memory::Copy(place, mutable_w, @@ -189,7 +189,7 @@ TEST(Benchmark, FluidMLPCPU) { sizeof(float) * w_src_data.size()); auto* b_tensor = B->MutableVar()->GetMutable(); - b_tensor->Resize(phi::make_ddim(b_dims)); + b_tensor->Resize(common::make_ddim(b_dims)); auto* mutable_b = b_tensor->mutable_data(place); paddle::memory::Copy(place, mutable_b, diff --git a/test/cpp/eager/performance_tests/benchmark_fluid_cuda.cc b/test/cpp/eager/performance_tests/benchmark_fluid_cuda.cc index 178fbdce86c3d6..909165bf99688a 100644 --- a/test/cpp/eager/performance_tests/benchmark_fluid_cuda.cc +++ b/test/cpp/eager/performance_tests/benchmark_fluid_cuda.cc @@ -52,7 +52,7 @@ TEST(Benchmark, FluidScaleCUDA) { std::vector dims = {2, 4, 4, 4}; auto* x_tensor = X->MutableVar()->GetMutable(); - x_tensor->Resize(phi::make_ddim(dims)); + x_tensor->Resize(common::make_ddim(dims)); auto* mutable_x = x_tensor->mutable_data(place); paddle::platform::DeviceContextPool& pool = @@ -115,7 +115,7 @@ TEST(Benchmark, FluidMatmulCUDA) { auto stream = dev_ctx->stream(); auto* x_tensor = X->MutableVar()->GetMutable(); - x_tensor->Resize(phi::make_ddim(dims)); + x_tensor->Resize(common::make_ddim(dims)); auto* mutable_x = x_tensor->mutable_data(place); paddle::memory::Copy(place, mutable_x, @@ -125,7 +125,7 @@ TEST(Benchmark, FluidMatmulCUDA) { stream); auto* y_tensor = Y->MutableVar()->GetMutable(); - y_tensor->Resize(phi::make_ddim(dims)); + y_tensor->Resize(common::make_ddim(dims)); auto* mutable_y = y_tensor->mutable_data(place); paddle::memory::Copy(place, mutable_y, @@ -185,7 +185,7 @@ TEST(Benchmark, FluidMLPCUDA) { X->SetOverridedStopGradient(false); auto* x_tensor = X->MutableVar()->GetMutable(); - x_tensor->Resize(phi::make_ddim(x_dims)); + x_tensor->Resize(common::make_ddim(x_dims)); auto* mutable_x = x_tensor->mutable_data(place); paddle::memory::Copy(place, mutable_x, @@ -205,7 +205,7 @@ TEST(Benchmark, FluidMLPCUDA) { B->SetOverridedStopGradient(false); auto* w_tensor = W->MutableVar()->GetMutable(); - w_tensor->Resize(phi::make_ddim(w_dims)); + w_tensor->Resize(common::make_ddim(w_dims)); auto* mutable_w = w_tensor->mutable_data(place); paddle::memory::Copy(place, mutable_w, @@ -215,7 +215,7 @@ TEST(Benchmark, FluidMLPCUDA) { stream); auto* b_tensor = B->MutableVar()->GetMutable(); - b_tensor->Resize(phi::make_ddim(b_dims)); + b_tensor->Resize(common::make_ddim(b_dims)); auto* mutable_b = b_tensor->mutable_data(place); paddle::memory::Copy(place, mutable_b, diff --git a/test/cpp/eager/task_tests/CMakeLists.txt b/test/cpp/eager/task_tests/CMakeLists.txt index 4df64e81d0ffc4..9bcd4b19f856a2 100755 --- a/test/cpp/eager/task_tests/CMakeLists.txt +++ b/test/cpp/eager/task_tests/CMakeLists.txt @@ -1,7 +1,7 @@ cc_test( test_egr_task_nan_inf_utils SRCS nan_inf_utils_test.cc - DEPS eager_nan_inf_utils phi) + DEPS eager_nan_inf_utils phi common) if(NOT ((NOT WITH_PYTHON) AND ON_INFER)) cc_test( diff --git a/test/cpp/eager/task_tests/backward_test.cc b/test/cpp/eager/task_tests/backward_test.cc index c520c92c7f3e29..a6730d2dead69d 100644 --- a/test/cpp/eager/task_tests/backward_test.cc +++ b/test/cpp/eager/task_tests/backward_test.cc @@ -38,7 +38,7 @@ TEST(Backward, SingleNodeEmptyGrad) { eager_test::InitEnv(paddle::platform::CPUPlace()); // Prepare Inputs - paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32}); + paddle::framework::DDim ddim = common::make_ddim({4, 16, 16, 32}); // Create Target Tensor paddle::Tensor target_tensor = @@ -90,7 +90,7 @@ TEST(Backward, SingleNodeCustomGrad) { // Prepare Inputs std::vector target_tensors; - paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32}); + paddle::framework::DDim ddim = common::make_ddim({4, 16, 16, 32}); // Create Target Tensor paddle::Tensor tensor = @@ -162,7 +162,7 @@ TEST(Backward, LinearNodes) { // Prepare Inputs std::vector target_tensors; - paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32}); + paddle::framework::DDim ddim = common::make_ddim({4, 16, 16, 32}); // Create Target Tensor paddle::Tensor tensor = @@ -237,7 +237,7 @@ TEST(Backward, WithAccumulation) { eager_test::InitEnv(paddle::platform::CPUPlace()); // Prepare Inputs - paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32}); + paddle::framework::DDim ddim = common::make_ddim({4, 16, 16, 32}); // Create Target Tensor std::vector target_tensors; diff --git a/test/cpp/eager/task_tests/cross_batch_accumulation_test.cc b/test/cpp/eager/task_tests/cross_batch_accumulation_test.cc index b4d62fa27c08fc..007f8f80dacc73 100644 --- a/test/cpp/eager/task_tests/cross_batch_accumulation_test.cc +++ b/test/cpp/eager/task_tests/cross_batch_accumulation_test.cc @@ -35,7 +35,7 @@ TEST(CrossBatchAccumulation, SingleScaleNode) { eager_test::InitEnv(paddle::platform::CPUPlace()); std::vector target_tensors; - paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32}); + paddle::framework::DDim ddim = common::make_ddim({4, 16, 16, 32}); paddle::Tensor tensor = eager_test::CreateTensorWithValue(ddim, diff --git a/test/cpp/eager/task_tests/eager_utils_test.cc b/test/cpp/eager/task_tests/eager_utils_test.cc index 77902fa5eed506..5326e359780c2a 100644 --- a/test/cpp/eager/task_tests/eager_utils_test.cc +++ b/test/cpp/eager/task_tests/eager_utils_test.cc @@ -31,7 +31,7 @@ namespace egr { TEST(EagerUtils, AutoGradMeta) { // Construct Eager Tensor phi::DenseTensorMeta meta = - phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::make_ddim({1, 1})); + phi::DenseTensorMeta(phi::DataType::FLOAT32, common::make_ddim({1, 1})); std::shared_ptr dt0 = std::make_shared( std::make_unique( paddle::platform::CPUPlace()) @@ -169,7 +169,7 @@ TEST(EagerUtils, PassStopGradient) { } TEST(EagerUtils, TrySyncToVar) { - paddle::framework::DDim ddim = phi::make_ddim({2, 4, 4, 4}); + paddle::framework::DDim ddim = common::make_ddim({2, 4, 4, 4}); auto tensor = CreateTestCPUTensor(5.0f, ddim); std::vector> var_bases = { egr::EagerUtils::TrySyncToVar(tensor)}; @@ -187,7 +187,7 @@ TEST(EagerUtils, TrySyncToVar) { } TEST(EagerUtils, TrySyncToVars) { - paddle::framework::DDim ddim = phi::make_ddim({2, 4, 4, 4}); + paddle::framework::DDim ddim = common::make_ddim({2, 4, 4, 4}); std::vector tensors = {CreateTestCPUTensor(1.0f, ddim), CreateTestCPUTensor(2.0f, ddim)}; diff --git a/test/cpp/eager/task_tests/forward_autograd_test.cc b/test/cpp/eager/task_tests/forward_autograd_test.cc index d7d1b87c99dfb6..c68e51ab2b08bd 100644 --- a/test/cpp/eager/task_tests/forward_autograd_test.cc +++ b/test/cpp/eager/task_tests/forward_autograd_test.cc @@ -35,7 +35,7 @@ TEST(Forward, SingleNode) { // Prepare Inputs std::vector target_tensors; - paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32}); + paddle::framework::DDim ddim = common::make_ddim({4, 16, 16, 32}); // Create Target Tensor paddle::Tensor t = @@ -85,7 +85,7 @@ TEST(Forward, LinearNodes) { // Prepare Inputs std::vector target_tensors; - paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32}); + paddle::framework::DDim ddim = common::make_ddim({4, 16, 16, 32}); // Create Target Tensor paddle::Tensor t = @@ -171,7 +171,7 @@ TEST(Forward, BranchedNodes) { // Prepare Inputs std::vector target_tensors; - paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32}); + paddle::framework::DDim ddim = common::make_ddim({4, 16, 16, 32}); // Create Target Tensor paddle::Tensor t = diff --git a/test/cpp/eager/task_tests/fwd_bwd_joint_test.cc b/test/cpp/eager/task_tests/fwd_bwd_joint_test.cc index 1aff3a2104fa15..133bd7e7c954ac 100644 --- a/test/cpp/eager/task_tests/fwd_bwd_joint_test.cc +++ b/test/cpp/eager/task_tests/fwd_bwd_joint_test.cc @@ -43,7 +43,8 @@ paddle::Tensor hook_function(const paddle::Tensor& t) { auto ret_meta = phi::DenseTensorMeta( t_dense->dtype(), t_dense->dims(), t_dense->layout()); auto place = t_dense->place(); - size_t bytes_size = phi::product(t_dense->dims()) * SizeOf(t_dense->dtype()); + size_t bytes_size = + common::product(t_dense->dims()) * SizeOf(t_dense->dtype()); auto ret_dense = std::make_shared( paddle::memory::Alloc(place, bytes_size), std::move(ret_meta)); @@ -64,7 +65,7 @@ TEST(FwdBwdJoint, SingleNode) { eager_test::InitEnv(paddle::platform::CPUPlace()); // 1. Prepare Input - paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32}); + paddle::framework::DDim ddim = common::make_ddim({4, 16, 16, 32}); paddle::Tensor tensor = eager_test::CreateTensorWithValue(ddim, paddle::platform::CPUPlace(), @@ -108,7 +109,7 @@ TEST(FwdBwdJoint, LinearNodes) { eager_test::InitEnv(paddle::platform::CPUPlace()); // 1. Prepare Input - paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32}); + paddle::framework::DDim ddim = common::make_ddim({4, 16, 16, 32}); paddle::Tensor tensor = eager_test::CreateTensorWithValue(ddim, paddle::platform::CPUPlace(), @@ -162,7 +163,7 @@ TEST(FwdBwdJoint, BranchedNodes) { eager_test::InitEnv(paddle::platform::CPUPlace()); // 1. Prepare Input - paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32}); + paddle::framework::DDim ddim = common::make_ddim({4, 16, 16, 32}); paddle::Tensor tensor = eager_test::CreateTensorWithValue(ddim, paddle::platform::CPUPlace(), @@ -235,7 +236,7 @@ TEST(FwdBwdJoint, GradientHook) { eager_test::InitEnv(paddle::platform::CPUPlace()); // 1. Prepare Input - paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32}); + paddle::framework::DDim ddim = common::make_ddim({4, 16, 16, 32}); paddle::Tensor tensor = eager_test::CreateTensorWithValue(ddim, paddle::platform::CPUPlace(), @@ -308,7 +309,7 @@ TEST(FwdBwdJoint, CrossBatchAccumulation) { eager_test::InitEnv(paddle::platform::CPUPlace()); // 1. Prepare Input - paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32}); + paddle::framework::DDim ddim = common::make_ddim({4, 16, 16, 32}); paddle::Tensor tensor = eager_test::CreateTensorWithValue(ddim, paddle::platform::CPUPlace(), @@ -363,7 +364,7 @@ TEST(FwdBwdJoint, SingleNodeCUDA) { eager_test::InitEnv(paddle::platform::CUDAPlace()); // 1. Prepare Input - paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32}); + paddle::framework::DDim ddim = common::make_ddim({4, 16, 16, 32}); paddle::Tensor tensor = eager_test::CreateTensorWithValue(ddim, paddle::platform::CUDAPlace(), @@ -404,7 +405,7 @@ TEST(FwdBwdJoint, BranchedNodesCUDA) { eager_test::InitEnv(paddle::platform::CUDAPlace()); // 1. Prepare Input - paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32}); + paddle::framework::DDim ddim = common::make_ddim({4, 16, 16, 32}); paddle::Tensor tensor = eager_test::CreateTensorWithValue(ddim, paddle::platform::CUDAPlace(), diff --git a/test/cpp/eager/task_tests/generated_test.cc b/test/cpp/eager/task_tests/generated_test.cc index 36032ef21f1b1f..3c1753a3dd5263 100644 --- a/test/cpp/eager/task_tests/generated_test.cc +++ b/test/cpp/eager/task_tests/generated_test.cc @@ -41,7 +41,7 @@ TEST(Generated, Sigmoid) { eager_test::InitEnv(paddle::platform::CPUPlace()); VLOG(6) << "Init Env"; // 1. Prepare Input - paddle::framework::DDim ddim = phi::make_ddim({2, 4, 4, 4}); + paddle::framework::DDim ddim = common::make_ddim({2, 4, 4, 4}); VLOG(6) << "Make Dim"; paddle::Tensor tensor = eager_test::CreateTensorWithValue(ddim, @@ -73,7 +73,7 @@ TEST(Generated, Matmul_v2) { paddle::imperative::SetCurrentTracer(tracer); // 1. Prepare Input - paddle::framework::DDim ddimX = phi::make_ddim({4, 16}); + paddle::framework::DDim ddimX = common::make_ddim({4, 16}); paddle::Tensor X = eager_test::CreateTensorWithValue(ddimX, paddle::platform::CPUPlace(), @@ -83,7 +83,7 @@ TEST(Generated, Matmul_v2) { true); egr_utils_api::RetainGradForTensor(X); - paddle::framework::DDim ddimY = phi::make_ddim({16, 20}); + paddle::framework::DDim ddimY = common::make_ddim({16, 20}); paddle::Tensor Y = eager_test::CreateTensorWithValue(ddimY, paddle::platform::CPUPlace(), @@ -113,7 +113,7 @@ TEST(Generated, ElementwiseAdd) { paddle::imperative::SetCurrentTracer(tracer); // 1. Prepare Input - paddle::framework::DDim ddimX = phi::make_ddim({4, 16}); + paddle::framework::DDim ddimX = common::make_ddim({4, 16}); paddle::Tensor X = eager_test::CreateTensorWithValue(ddimX, paddle::platform::CPUPlace(), @@ -123,7 +123,7 @@ TEST(Generated, ElementwiseAdd) { true); egr_utils_api::RetainGradForTensor(X); - paddle::framework::DDim ddimY = phi::make_ddim({4, 16}); + paddle::framework::DDim ddimY = common::make_ddim({4, 16}); paddle::Tensor Y = eager_test::CreateTensorWithValue(ddimY, paddle::platform::CPUPlace(), diff --git a/test/cpp/eager/task_tests/grad_test.cc b/test/cpp/eager/task_tests/grad_test.cc index ed4fb839dc6cd0..878ce0404954d4 100644 --- a/test/cpp/eager/task_tests/grad_test.cc +++ b/test/cpp/eager/task_tests/grad_test.cc @@ -37,7 +37,7 @@ TEST(Grad, SingleNodeEmptyGrad) { eager_test::InitEnv(paddle::platform::CPUPlace()); // Prepare Inputs - paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32}); + paddle::framework::DDim ddim = common::make_ddim({4, 16, 16, 32}); // Create Target Tensor (output) paddle::Tensor output_tensor = @@ -104,7 +104,7 @@ TEST(Grad, SingleNodeCustomGrad) { // Prepare Inputs std::vector target_tensors; - paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32}); + paddle::framework::DDim ddim = common::make_ddim({4, 16, 16, 32}); // Create Target Tensor paddle::Tensor tensor = @@ -183,7 +183,7 @@ TEST(Grad, LinearNodes) { // Prepare Target Tensor std::vector target_tensors; - paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32}); + paddle::framework::DDim ddim = common::make_ddim({4, 16, 16, 32}); // Create Target Tensor paddle::Tensor tensor = @@ -264,7 +264,7 @@ TEST(Grad, WithAccumulation) { eager_test::InitEnv(paddle::platform::CPUPlace()); // Prepare Inputs - paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32}); + paddle::framework::DDim ddim = common::make_ddim({4, 16, 16, 32}); // Create Target Tensor std::vector target_tensors; diff --git a/test/cpp/eager/task_tests/hook_test.cc b/test/cpp/eager/task_tests/hook_test.cc index 898590201eef63..b0812ea48d562d 100644 --- a/test/cpp/eager/task_tests/hook_test.cc +++ b/test/cpp/eager/task_tests/hook_test.cc @@ -38,7 +38,8 @@ paddle::Tensor hook_function(const paddle::Tensor& t) { auto ret_meta = phi::DenseTensorMeta( t_dense->dtype(), t_dense->dims(), t_dense->layout()); auto place = t_dense->place(); - size_t bytes_size = phi::product(t_dense->dims()) * SizeOf(t_dense->dtype()); + size_t bytes_size = + common::product(t_dense->dims()) * SizeOf(t_dense->dtype()); auto ret_dense = std::make_shared( paddle::memory::Alloc(place, bytes_size), std::move(ret_meta)); @@ -60,7 +61,7 @@ TEST(RetainGrad, HookBeforeRetainGrad) { // Prepare Inputs std::vector target_tensors; - paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32}); + paddle::framework::DDim ddim = common::make_ddim({4, 16, 16, 32}); // Create Target Tensor paddle::Tensor tensor = @@ -136,7 +137,7 @@ TEST(RetainGrad, HookAfterRetainGrad) { // Prepare Inputs std::vector target_tensors; - paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32}); + paddle::framework::DDim ddim = common::make_ddim({4, 16, 16, 32}); // Create Target Tensor paddle::Tensor tensor = diff --git a/test/cpp/eager/task_tests/hook_test_intermidiate.cc b/test/cpp/eager/task_tests/hook_test_intermidiate.cc index 37070d9b7b8f27..050672e2f07c52 100644 --- a/test/cpp/eager/task_tests/hook_test_intermidiate.cc +++ b/test/cpp/eager/task_tests/hook_test_intermidiate.cc @@ -41,7 +41,8 @@ paddle::Tensor hook_function(const paddle::Tensor& t) { auto ret_meta = phi::DenseTensorMeta( t_dense->dtype(), t_dense->dims(), t_dense->layout()); auto place = t_dense->place(); - size_t bytes_size = phi::product(t_dense->dims()) * SizeOf(t_dense->dtype()); + size_t bytes_size = + common::product(t_dense->dims()) * SizeOf(t_dense->dtype()); auto ret_dense = std::make_shared( paddle::memory::Alloc(place, bytes_size), std::move(ret_meta)); @@ -64,7 +65,7 @@ void test_sigmoid(bool is_remove_gradient_hook) { eager_test::InitEnv(paddle::platform::CPUPlace()); VLOG(6) << "Make Dim"; - paddle::framework::DDim ddim = phi::make_ddim({2, 4, 4, 4}); + paddle::framework::DDim ddim = common::make_ddim({2, 4, 4, 4}); VLOG(6) << "Make paddle::Tensor"; paddle::Tensor tensor = @@ -131,7 +132,7 @@ void test_elementwiseAdd(bool is_remove_gradient_hook) { paddle::imperative::SetCurrentTracer(tracer); // 1. Prepare Input - paddle::framework::DDim ddimX = phi::make_ddim({4, 16}); + paddle::framework::DDim ddimX = common::make_ddim({4, 16}); paddle::Tensor X = eager_test::CreateTensorWithValue(ddimX, paddle::platform::CPUPlace(), @@ -141,7 +142,7 @@ void test_elementwiseAdd(bool is_remove_gradient_hook) { true); egr_utils_api::RetainGradForTensor(X); - paddle::framework::DDim ddimY = phi::make_ddim({4, 16}); + paddle::framework::DDim ddimY = common::make_ddim({4, 16}); paddle::Tensor Y = eager_test::CreateTensorWithValue(ddimY, paddle::platform::CPUPlace(), @@ -195,7 +196,7 @@ void test_matmul(bool is_remove_gradient_hook) { paddle::imperative::SetCurrentTracer(tracer); // 1. Prepare Input - paddle::framework::DDim ddimX = phi::make_ddim({4, 16}); + paddle::framework::DDim ddimX = common::make_ddim({4, 16}); paddle::Tensor X = eager_test::CreateTensorWithValue(ddimX, paddle::platform::CPUPlace(), @@ -205,7 +206,7 @@ void test_matmul(bool is_remove_gradient_hook) { true); egr_utils_api::RetainGradForTensor(X); - paddle::framework::DDim ddimY = phi::make_ddim({16, 20}); + paddle::framework::DDim ddimY = common::make_ddim({16, 20}); paddle::Tensor Y = eager_test::CreateTensorWithValue(ddimY, paddle::platform::CPUPlace(), @@ -258,7 +259,7 @@ void test_backward_final_hooks() { eager_test::InitEnv(paddle::platform::CPUPlace()); VLOG(6) << "Make paddle::Tensor"; - paddle::framework::DDim ddimX = phi::make_ddim({4, 16}); + paddle::framework::DDim ddimX = common::make_ddim({4, 16}); paddle::Tensor X = eager_test::CreateTensorWithValue(ddimX, paddle::platform::CPUPlace(), @@ -266,7 +267,7 @@ void test_backward_final_hooks() { phi::DataLayout::NCHW, 3.0, true); - paddle::framework::DDim ddimY = phi::make_ddim({16, 20}); + paddle::framework::DDim ddimY = common::make_ddim({16, 20}); egr_utils_api::RetainGradForTensor(X); paddle::Tensor Y = diff --git a/test/cpp/eager/task_tests/tensor_utils_test.cc b/test/cpp/eager/task_tests/tensor_utils_test.cc index a39280101a5bc7..98d8c59ae2e47e 100644 --- a/test/cpp/eager/task_tests/tensor_utils_test.cc +++ b/test/cpp/eager/task_tests/tensor_utils_test.cc @@ -32,7 +32,7 @@ TEST(TensorUtils, Test) { // Prepare Inputs std::vector target_tensors; - paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32}); + paddle::framework::DDim ddim = common::make_ddim({4, 16, 16, 32}); // Create Target Tensor paddle::Tensor t = diff --git a/test/cpp/eager/test_utils.h b/test/cpp/eager/test_utils.h index 9d6ec2fbf797bb..393eec6f402bfb 100644 --- a/test/cpp/eager/test_utils.h +++ b/test/cpp/eager/test_utils.h @@ -35,8 +35,11 @@ inline paddle::Tensor CreateTensorWithValue( const phi::DataLayout& layout, float value, bool is_leaf = true) { - paddle::Tensor out = paddle::experimental::full( - phi::vectorize(ddim), paddle::experimental::Scalar(value), dtype, place); + paddle::Tensor out = + paddle::experimental::full(common::vectorize(ddim), + paddle::experimental::Scalar(value), + dtype, + place); auto meta = egr::EagerUtils::autograd_meta(&out); if (is_leaf) { diff --git a/test/cpp/fluid/CMakeLists.txt b/test/cpp/fluid/CMakeLists.txt index bba22ebf76b935..cf5a0d21302e1b 100644 --- a/test/cpp/fluid/CMakeLists.txt +++ b/test/cpp/fluid/CMakeLists.txt @@ -47,6 +47,7 @@ cc_test( generated_op elementwise_add_op phi + common generated_static_op) cc_test( gather_test @@ -59,7 +60,7 @@ cc_test( cc_test( scatter_test SRCS scatter_test.cc - DEPS tensor phi) + DEPS tensor phi common) cc_test( beam_search_decode_op_test SRCS beam_search_decode_op_test.cc @@ -79,7 +80,7 @@ if(WITH_GPU) nv_test( dropout_op_test SRCS dropout_op_test.cc - DEPS dropout_op tensor phi) + DEPS dropout_op tensor phi common) nv_test( test_leaky_relu_grad_grad_functor SRCS test_leaky_relu_grad_grad_functor.cc @@ -88,12 +89,18 @@ if(WITH_GPU) nv_test( feed_forward_test SRCS feed_forward_test.cu - DEPS fleet_executor elementwise_add_op matmul_op tensor phi ${CINN_DEPS}) + DEPS fleet_executor + elementwise_add_op + matmul_op + tensor + phi + common + ${CINN_DEPS}) elseif(WITH_ROCM) hip_test( dropout_op_test SRCS dropout_op_test.cc - DEPS dropout_op tensor phi) + DEPS dropout_op tensor phi common) hip_test( test_leaky_relu_grad_grad_functor SRCS test_leaky_relu_grad_grad_functor.cc @@ -117,6 +124,7 @@ if(WITH_CINN) op_debug_string_test.cc DEPS executor + common fleet_executor recurrent_op_helper recurrent_op @@ -124,14 +132,20 @@ if(WITH_CINN) ${COMMON_OP_DEPS} python) else() - paddle_test(op_debug_string_test SRCS op_debug_string_test.cc) + paddle_test(op_debug_string_test SRCS op_debug_string_test.cc DEPS common) endif() if(WITH_GPU) cc_test( copy_cross_scope_test SRCS copy_cross_scope_test.cc - DEPS op_registry copy_cross_scope_op scope device_context enforce executor) + DEPS op_registry + copy_cross_scope_op + scope + device_context + enforce + executor + common) endif() if(WITH_ONNXRUNTIME AND WIN32) diff --git a/test/cpp/fluid/assign_op_test.cc b/test/cpp/fluid/assign_op_test.cc index cc6c915c09a40c..8f53cce426456b 100644 --- a/test/cpp/fluid/assign_op_test.cc +++ b/test/cpp/fluid/assign_op_test.cc @@ -15,10 +15,10 @@ limitations under the License. */ #include +#include "paddle/common/ddim.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/variable.h" #include "paddle/fluid/platform/place.h" -#include "paddle/phi/core/ddim.h" TEST(AssignOp, AssignLoDTensor) { paddle::platform::CPUPlace cpu_place; @@ -28,7 +28,7 @@ TEST(AssignOp, AssignLoDTensor) { paddle::operators::AssignFunctor assign_functor(&output, ctx); phi::DenseTensor input; - paddle::framework::DDim in_dims = phi::make_ddim({3, 4}); + paddle::framework::DDim in_dims = common::make_ddim({3, 4}); int* in_data = input.mutable_data(in_dims, cpu_place); for (int i = 0; i < 12; ++i) { in_data[i] = i; @@ -54,7 +54,7 @@ TEST(AssignOp, AssignLoDTensorArray) { paddle::framework::LoDTensorArray input; for (int i = 0; i < 5; ++i) { - paddle::framework::DDim in_dims = phi::make_ddim({i + 1, i + 2}); + paddle::framework::DDim in_dims = common::make_ddim({i + 1, i + 2}); phi::DenseTensor lod_tensor; float* in_data = lod_tensor.mutable_data(in_dims, cpu_place); for (int j = 0; j < (i + 1) * (i + 2); ++j) { @@ -68,7 +68,7 @@ TEST(AssignOp, AssignLoDTensorArray) { auto& out_array = output.Get(); for (int i = 0; i < 5; ++i) { paddle::framework::DDim out_dims = out_array[i].dims(); - EXPECT_EQ(phi::make_ddim({i + 1, i + 2}), out_dims); + EXPECT_EQ(common::make_ddim({i + 1, i + 2}), out_dims); const float* out_data = out_array[i].data(); for (int j = 0; j < (i + 1) * (i + 2); ++j) { EXPECT_EQ(static_cast(j), out_data[j]); @@ -89,7 +89,7 @@ TEST(AssignOp, AssignSelectedRows) { phi::SelectedRows input(rows, height); phi::DenseTensor* input_tensor = input.mutable_value(); - paddle::framework::DDim in_dims = phi::make_ddim({3, 4}); + paddle::framework::DDim in_dims = common::make_ddim({3, 4}); int* in_data = input_tensor->mutable_data(in_dims, cpu_place); for (int i = 0; i < 12; ++i) { in_data[i] = i; diff --git a/test/cpp/fluid/benchmark/CMakeLists.txt b/test/cpp/fluid/benchmark/CMakeLists.txt index 9111dfe2ff35f8..bc634f55cab792 100644 --- a/test/cpp/fluid/benchmark/CMakeLists.txt +++ b/test/cpp/fluid/benchmark/CMakeLists.txt @@ -11,7 +11,8 @@ cc_test( scope ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS} - phi) + phi + common) if(WITH_ONNXRUNTIME AND WIN32) # Copy onnxruntime for some c++ test in Windows, since the test will diff --git a/test/cpp/fluid/benchmark/op_tester.cc b/test/cpp/fluid/benchmark/op_tester.cc index 6f68ab23a45669..a06e8c02c8d238 100644 --- a/test/cpp/fluid/benchmark/op_tester.cc +++ b/test/cpp/fluid/benchmark/op_tester.cc @@ -280,14 +280,14 @@ void OpTester::SetupTensor(phi::DenseTensor *tensor, std::mt19937 rng(seed++); std::uniform_real_distribution uniform_dist(0, 1); - T *ptr = tensor->mutable_data(phi::make_ddim(shape), place_); + T *ptr = tensor->mutable_data(common::make_ddim(shape), place_); phi::DenseTensor cpu_tensor; T *cpu_ptr = nullptr; if (!platform::is_cpu_place(place_)) { - cpu_ptr = - cpu_tensor.mutable_data(phi::make_ddim(shape), platform::CPUPlace()); + cpu_ptr = cpu_tensor.mutable_data(common::make_ddim(shape), + platform::CPUPlace()); } else { cpu_ptr = ptr; } diff --git a/test/cpp/fluid/benchmark/op_tester.h b/test/cpp/fluid/benchmark/op_tester.h index de8f62cfe07cd6..5dc2461fbd96a4 100644 --- a/test/cpp/fluid/benchmark/op_tester.h +++ b/test/cpp/fluid/benchmark/op_tester.h @@ -19,9 +19,9 @@ limitations under the License. */ #include #include +#include "paddle/common/ddim.h" #include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/operator.h" -#include "paddle/phi/core/ddim.h" #include "test/cpp/fluid/benchmark/op_tester_config.h" namespace paddle { diff --git a/test/cpp/fluid/cinn/CMakeLists.txt b/test/cpp/fluid/cinn/CMakeLists.txt index da4a085cef41d5..0fae3ea78737c5 100644 --- a/test/cpp/fluid/cinn/CMakeLists.txt +++ b/test/cpp/fluid/cinn/CMakeLists.txt @@ -6,6 +6,7 @@ if(WITH_TESTING) DEPS fleet_executor phi + common lod_tensor scope proto_desc diff --git a/test/cpp/fluid/cinn/cinn_launch_context_test.cc b/test/cpp/fluid/cinn/cinn_launch_context_test.cc index 5e7fbea5d876ff..032aad828365c9 100644 --- a/test/cpp/fluid/cinn/cinn_launch_context_test.cc +++ b/test/cpp/fluid/cinn/cinn_launch_context_test.cc @@ -27,6 +27,7 @@ limitations under the License. */ #include "paddle/cinn/hlir/framework/scope.h" #include "paddle/cinn/hlir/framework/tensor.h" #include "paddle/cinn/runtime/cinn_runtime.h" +#include "paddle/common/ddim.h" #include "paddle/fluid/framework/new_executor/interpretercore.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/paddle2cinn/build_cinn_pass.h" @@ -35,7 +36,6 @@ limitations under the License. */ #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/operators/cinn/cinn_op_helper.h" -#include "paddle/phi/core/ddim.h" #include "paddle/pir/core/program.h" #include "paddle/pir/core/value.h" @@ -200,7 +200,7 @@ TEST_F(CinnLaunchContextTest, TestConstructResult) { auto* buffer = launch_context->GetCinnBufferOfVar(var_name); auto&& scope = compiled_obj->scope; ASSERT_EQ(framework::DDim(buffer->dims, buffer->dimensions), - phi::make_ddim(scope->GetTensor(arg_name)->shape().data())); + common::make_ddim(scope->GetTensor(arg_name)->shape().data())); }; check_argument_fn("var1", "cinn_var1"); check_argument_fn("var2", "cinn_var2"); @@ -216,11 +216,11 @@ TEST_F(CinnLaunchContextTest, TestCheckTensorEquivalent) { auto* tensor2 = scope.Var("var2")->GetMutable(); // dimension not equivalent - tensor1->mutable_data(phi::make_ddim({3, 5}), place); + tensor1->mutable_data(common::make_ddim({3, 5}), place); ASSERT_THROW(launch_context->CheckTensorEquivalent("var1", *tensor1), paddle::platform::EnforceNotMet); // data type not equivalent - tensor2->mutable_data(phi::make_ddim({6, 7, 8}), place); + tensor2->mutable_data(common::make_ddim({6, 7, 8}), place); ASSERT_THROW(launch_context->CheckTensorEquivalent("var2", *tensor2), paddle::platform::EnforceNotMet); } @@ -243,7 +243,7 @@ TEST_F(CinnLaunchContextTest, TestBuildCompiledProgram) { ASSERT_NE(var, nullptr); auto* buffer = launch_context->GetCinnBufferOfVar(var_name); ASSERT_EQ(framework::DDim(buffer->dims, buffer->dimensions), - phi::make_ddim(var->GetShape())); + common::make_ddim(var->GetShape())); } ASSERT_TRUE(block.FindVar("var1")->Persistable()); ASSERT_FALSE(block.FindVar("var5")->Persistable()); @@ -274,7 +274,7 @@ TEST_F(CinnLaunchContextTest, TestCallbackAssignment) { // assign external variables auto* tensor1 = scope.Var("var1")->GetMutable(); - float* data1 = tensor1->mutable_data(phi::make_ddim({3, 4}), place); + float* data1 = tensor1->mutable_data(common::make_ddim({3, 4}), place); data1[0] = 9.99f; data1[10] = 19.99f; // check argument is set correctly and alloc/free callbacks work well diff --git a/test/cpp/fluid/cinn/cinn_launch_op_test.cc b/test/cpp/fluid/cinn/cinn_launch_op_test.cc index 5765a2c50269ee..487a0e7d7820a4 100644 --- a/test/cpp/fluid/cinn/cinn_launch_op_test.cc +++ b/test/cpp/fluid/cinn/cinn_launch_op_test.cc @@ -21,12 +21,12 @@ limitations under the License. */ #include #include "gtest/gtest.h" +#include "paddle/common/ddim.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/paddle2cinn/cinn_compiler.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/platform/cpu_helper.h" #include "paddle/fluid/platform/init.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/flags.h" #include "paddle/phi/core/kernel_registry.h" #include "test/cpp/fluid/cinn/test_helper.h" diff --git a/test/cpp/fluid/cinn/test_helper.h b/test/cpp/fluid/cinn/test_helper.h index 040a1858101365..5ffb1120bb7886 100644 --- a/test/cpp/fluid/cinn/test_helper.h +++ b/test/cpp/fluid/cinn/test_helper.h @@ -20,12 +20,12 @@ limitations under the License. */ #include #include "gtest/gtest.h" +#include "paddle/common/ddim.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/paddle2cinn/build_cinn_pass.h" #include "paddle/fluid/framework/scope.h" -#include "paddle/phi/core/ddim.h" namespace paddle::operators { diff --git a/test/cpp/fluid/controlflow/conditional_block_op_test.cc b/test/cpp/fluid/controlflow/conditional_block_op_test.cc index 62552dc1fc8ad7..a4575f258d72e3 100644 --- a/test/cpp/fluid/controlflow/conditional_block_op_test.cc +++ b/test/cpp/fluid/controlflow/conditional_block_op_test.cc @@ -32,14 +32,14 @@ TEST(ConditionalBlockGrad, NoNeedRunLoDTensorArray) { Variable* cond_var = scope.Var("condition"); phi::DenseTensor* cond_tensor = cond_var->GetMutable(); - paddle::framework::DDim cond_dims = phi::make_ddim({1}); + paddle::framework::DDim cond_dims = common::make_ddim({1}); bool* cond_data = cond_tensor->mutable_data(cond_dims, place); cond_data[0] = false; Variable* input_var = scope.Var("input_lod_tensor_array"); LoDTensorArray* input_tensors = input_var->GetMutable(); for (int i = 0; i < 5; ++i) { - paddle::framework::DDim in_dims = phi::make_ddim({i + 1, i + 2}); + paddle::framework::DDim in_dims = common::make_ddim({i + 1, i + 2}); phi::DenseTensor lod_tensor; float* in_data = lod_tensor.mutable_data(in_dims, place); for (int j = 0; j < (i + 1) * (i + 2); ++j) { @@ -66,7 +66,7 @@ TEST(ConditionalBlockGrad, NoNeedRunLoDTensorArray) { const LoDTensorArray& out_tensors = input_grad_var->Get(); for (int i = 0; i < 5; ++i) { paddle::framework::DDim out_dims = out_tensors[i].dims(); - EXPECT_EQ(phi::make_ddim({i + 1, i + 2}), out_dims); + EXPECT_EQ(common::make_ddim({i + 1, i + 2}), out_dims); const float* out_data = out_tensors[i].data(); for (int j = 0; j < (i + 1) * (i + 2); ++j) { EXPECT_EQ(0, out_data[j]); diff --git a/test/cpp/fluid/dlnne/dlnne_engine_op_test.cc b/test/cpp/fluid/dlnne/dlnne_engine_op_test.cc index 01e12bf1132aa0..af373874aa0b2a 100644 --- a/test/cpp/fluid/dlnne/dlnne_engine_op_test.cc +++ b/test/cpp/fluid/dlnne/dlnne_engine_op_test.cc @@ -37,7 +37,7 @@ void CreateCUDATensor(framework::Scope* scope, const std::vector& shape) { auto* var = scope->Var(name); auto* tensor = var->GetMutable(); - auto dims = phi::make_ddim(shape); + auto dims = common::make_ddim(shape); tensor->Resize(dims); platform::CUDAPlace place; phi::GPUContext ctx(place); diff --git a/test/cpp/fluid/elementwise/CMakeLists.txt b/test/cpp/fluid/elementwise/CMakeLists.txt index 304063cd81c4c2..f8a9f8b061cb30 100644 --- a/test/cpp/fluid/elementwise/CMakeLists.txt +++ b/test/cpp/fluid/elementwise/CMakeLists.txt @@ -1,12 +1,30 @@ cc_test( test_elementwise_add_op_inplace SRCS test_elementwise_add_op_inplace.cc - DEPS executor op_registry elementwise_add_op scope device_context enforce) + DEPS executor + op_registry + elementwise_add_op + scope + device_context + enforce + common) cc_test( test_elementwise_div_grad_grad SRCS test_elementwise_div_grad_grad.cc - DEPS executor op_registry elementwise_div_op scope device_context enforce) + DEPS executor + op_registry + elementwise_div_op + scope + device_context + enforce + common) cc_test( test_elementwise_add_grad_grad SRCS test_elementwise_add_grad_grad.cc - DEPS executor op_registry elementwise_add_op scope device_context enforce) + DEPS executor + op_registry + elementwise_add_op + scope + device_context + enforce + common) diff --git a/test/cpp/fluid/elementwise/test_elementwise_add_grad_grad.cc b/test/cpp/fluid/elementwise/test_elementwise_add_grad_grad.cc index 4fefd8864c5e9f..630bf64a7d6900 100644 --- a/test/cpp/fluid/elementwise/test_elementwise_add_grad_grad.cc +++ b/test/cpp/fluid/elementwise/test_elementwise_add_grad_grad.cc @@ -13,9 +13,9 @@ // limitations under the License. #include "gtest/gtest.h" +#include "paddle/common/ddim.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/place.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/kernel_registry.h" #include "test/cpp/fluid/elementwise/test_elementwise_op_grad_grad.h" @@ -44,7 +44,7 @@ class TestElementwiseAddGradGradWithoutDDX using TestElementwiseOpGradGrad::expected_outs_; using TestElementwiseOpGradGrad::dims_; void ComputeExpectedOuts() override { - size_t numel = static_cast(phi::product(dims_)); + size_t numel = static_cast(common::product(dims_)); std::vector dy(numel); std::vector ddout(numel); for (size_t i = 0; i < numel; ++i) { diff --git a/test/cpp/fluid/elementwise/test_elementwise_add_op_inplace.cc b/test/cpp/fluid/elementwise/test_elementwise_add_op_inplace.cc index db026084a56c75..0f665448171017 100644 --- a/test/cpp/fluid/elementwise/test_elementwise_add_op_inplace.cc +++ b/test/cpp/fluid/elementwise/test_elementwise_add_op_inplace.cc @@ -62,7 +62,7 @@ bool TestMain(const platform::Place &place, y->Resize(dims); z->Resize(dims); - size_t numel = static_cast(phi::product(dims)); + size_t numel = static_cast(common::product(dims)); auto x_ptr = x->mutable_data(place); auto y_ptr = y->mutable_data(place); diff --git a/test/cpp/fluid/elementwise/test_elementwise_div_grad_grad.cc b/test/cpp/fluid/elementwise/test_elementwise_div_grad_grad.cc index e74da1c884f67b..ddf1229cd0367b 100644 --- a/test/cpp/fluid/elementwise/test_elementwise_div_grad_grad.cc +++ b/test/cpp/fluid/elementwise/test_elementwise_div_grad_grad.cc @@ -56,7 +56,7 @@ class TestElementwiseDivGradGradWithoutDout using TestElementwiseOpGradGrad::expected_outs_; using TestElementwiseOpGradGrad::dims_; void ComputeExpectedOuts() override { - size_t numel = static_cast(phi::product(dims_)); + size_t numel = static_cast(common::product(dims_)); std::vector dy(numel); std::vector ddout(numel); for (size_t i = 0; i < numel; ++i) { diff --git a/test/cpp/fluid/elementwise/test_elementwise_op_grad_grad.h b/test/cpp/fluid/elementwise/test_elementwise_op_grad_grad.h index 4edbf5ddd05c83..ab67c559532d96 100644 --- a/test/cpp/fluid/elementwise/test_elementwise_op_grad_grad.h +++ b/test/cpp/fluid/elementwise/test_elementwise_op_grad_grad.h @@ -69,7 +69,7 @@ class TestElementwiseOpGradGrad { } void Setup() { - size_t numel = static_cast(phi::product(dims_)); + size_t numel = static_cast(common::product(dims_)); // init vars in scope and feed inputs for (auto in_name : inputs_) { InitVarInScope(in_name); @@ -127,7 +127,7 @@ class TestElementwiseOpGradGrad { cpu_out = out_tensor; } auto *out_ptr = cpu_out.data(); - size_t numel = static_cast(phi::product(dims_)); + size_t numel = static_cast(common::product(dims_)); #ifdef PADDLE_WITH_HIP auto is_equal = std::equal( out_ptr, diff --git a/test/cpp/fluid/feed_forward_test.cu b/test/cpp/fluid/feed_forward_test.cu index 7febf20e771187..b82f22cd03b5f3 100644 --- a/test/cpp/fluid/feed_forward_test.cu +++ b/test/cpp/fluid/feed_forward_test.cu @@ -62,8 +62,8 @@ void GetLinearOp(const std::vector &x, auto x_ptr = tensor_x->mutable_data(ctx.GetPlace()); auto y_ptr = tensor_y->mutable_data(ctx.GetPlace()); auto z_ptr = tensor_out->mutable_data(ctx.GetPlace()); - auto size_x = static_cast(phi::product(x_dim)); - auto size_y = static_cast(phi::product(y_dim)); + auto size_x = static_cast(common::product(x_dim)); + auto size_y = static_cast(common::product(y_dim)); auto size_z = x_dim[0] * x_dim[1] * y_dim[0]; cudaMemcpy(x_ptr, x.data(), size_x * sizeof(T), cudaMemcpyHostToDevice); cudaMemcpy(y_ptr, y.data(), size_y * sizeof(T), cudaMemcpyHostToDevice); @@ -158,8 +158,8 @@ void GetLinearOpGrad(const std::vector &x_vec, auto dinput_ptr = tensor_dx->mutable_data(ctx.GetPlace()); auto dweight_ptr = tensor_dy->mutable_data(ctx.GetPlace()); - auto size_x = static_cast(phi::product(x_dim)); - auto size_y = static_cast(phi::product(y_dim)); + auto size_x = static_cast(common::product(x_dim)); + auto size_y = static_cast(common::product(y_dim)); auto size_z = x_dim[0] * x_dim[1] * y_dim[0]; cudaMemcpy(x_ptr, x_vec.data(), size_x * sizeof(T), cudaMemcpyHostToDevice); cudaMemcpy(y_ptr, y_vec.data(), size_y * sizeof(T), cudaMemcpyHostToDevice); diff --git a/test/cpp/fluid/framework/CMakeLists.txt b/test/cpp/fluid/framework/CMakeLists.txt index 5085fa1dbab413..9b65af64656c17 100644 --- a/test/cpp/fluid/framework/CMakeLists.txt +++ b/test/cpp/fluid/framework/CMakeLists.txt @@ -39,7 +39,7 @@ cc_test( cc_test( lod_tensor_test SRCS lod_tensor_test.cc - DEPS phi lod_tensor memory) + DEPS phi common lod_tensor memory) if(WITH_GPU) nv_test( @@ -61,7 +61,7 @@ cc_test( cc_test( threadpool_test SRCS threadpool_test.cc - DEPS phi) + DEPS phi common) cc_test( var_type_traits_test @@ -87,12 +87,12 @@ if(WITH_GPU) nv_test( data_device_transform_test SRCS data_device_transform_test.cu - DEPS operator op_registry device_context phi scope) + DEPS operator op_registry device_context phi common scope) elseif(WITH_ROCM) hip_test( data_device_transform_test SRCS data_device_transform_test.cu - DEPS operator op_registry device_context phi scope) + DEPS operator op_registry device_context phi common scope) endif() if(WITH_GPU) @@ -206,6 +206,7 @@ if(WITH_PSCORE) heter_server gloo_wrapper phi + common ${RPC_DEPS} graph_gpu_wrapper) else() @@ -223,6 +224,7 @@ if(WITH_PSCORE) heter_server gloo_wrapper phi + common ${RPC_DEPS}) endif() else() @@ -274,7 +276,7 @@ cc_test_old( cc_test( infershape_utils_test SRCS infershape_utils_test.cc - DEPS infershape_utils phi) + DEPS infershape_utils phi common) if(WITH_TESTING AND TEST selected_rows_utils_test) set_tests_properties(selected_rows_utils_test PROPERTIES TIMEOUT 120) @@ -320,7 +322,8 @@ if(WITH_CINN) python) set_tests_properties(cinn_cache_key_test PROPERTIES LABELS "RUN_TYPE=CINN") - paddle_test(build_cinn_pass_test SRCS paddle2cinn/build_cinn_pass_test.cc) + paddle_test(build_cinn_pass_test SRCS paddle2cinn/build_cinn_pass_test.cc + DEPS common) set_tests_properties(build_cinn_pass_test PROPERTIES LABELS "RUN_TYPE=CINN") # target_link_libraries(build_cinn_pass_test ${PYTHON_LIBRARIES}) diff --git a/test/cpp/fluid/framework/copy_same_tensor_test.cc b/test/cpp/fluid/framework/copy_same_tensor_test.cc index 9b892c0c1b092e..edb293168256f2 100644 --- a/test/cpp/fluid/framework/copy_same_tensor_test.cc +++ b/test/cpp/fluid/framework/copy_same_tensor_test.cc @@ -17,11 +17,11 @@ #include #include "gtest/gtest.h" +#include "paddle/common/ddim.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/place.h" -#include "paddle/phi/core/ddim.h" #include "paddle/utils/flags.h" PD_DECLARE_bool(use_system_allocator); @@ -80,13 +80,13 @@ static bool CopySameTensorTestMain(const DDim &dims, const void *ground_truth_ptr = src_cpu_tensor.data(); const void *result_ptr = dst_cpu_tensor.data(); - size_t byte_num = phi::product(dims) * sizeof(T); + size_t byte_num = common::product(dims) * sizeof(T); return std::memcmp(ground_truth_ptr, result_ptr, byte_num) == 0; } TEST(test_tensor_copy, test_copy_same_tensor) { using DataType = float; - auto dims = phi::make_ddim({3, 4, 5}); + auto dims = common::make_ddim({3, 4, 5}); auto places = CreatePlaceList(); for (auto &src_p : places) { diff --git a/test/cpp/fluid/framework/data_layout_transform_test.cc b/test/cpp/fluid/framework/data_layout_transform_test.cc index 8927ac7b949d7f..85094c8b6b5889 100644 --- a/test/cpp/fluid/framework/data_layout_transform_test.cc +++ b/test/cpp/fluid/framework/data_layout_transform_test.cc @@ -21,7 +21,7 @@ TEST(DataTransform, DataLayoutFunction) { auto place = paddle::platform::CPUPlace(); phi::DenseTensor in = phi::DenseTensor(); phi::DenseTensor out = phi::DenseTensor(); - in.mutable_data(phi::make_ddim({2, 3, 1, 2}), place); + in.mutable_data(common::make_ddim({2, 3, 1, 2}), place); in.set_layout(phi::DataLayout::kNHWC); auto kernel_nhwc = @@ -32,19 +32,19 @@ TEST(DataTransform, DataLayoutFunction) { paddle::framework::TransDataLayout(kernel_nhwc, kernel_ncwh, in, &out, place); EXPECT_TRUE(out.layout() == phi::DataLayout::kNCHW); - EXPECT_TRUE(out.dims() == phi::make_ddim({2, 2, 3, 1})); + EXPECT_TRUE(out.dims() == common::make_ddim({2, 2, 3, 1})); paddle::framework::TransDataLayout(kernel_ncwh, kernel_nhwc, in, &out, place); EXPECT_TRUE(in.layout() == phi::DataLayout::kNHWC); - EXPECT_TRUE(in.dims() == phi::make_ddim({2, 3, 1, 2})); + EXPECT_TRUE(in.dims() == common::make_ddim({2, 3, 1, 2})); } #ifdef PADDLE_WITH_DNNL TEST(DataTransformBf16, GetDataFromTensorDNNL) { auto place = paddle::platform::CPUPlace(); phi::DenseTensor in = phi::DenseTensor(); - in.mutable_data(phi::make_ddim({2, 3, 1, 2}), + in.mutable_data(common::make_ddim({2, 3, 1, 2}), place); void* in_data = @@ -56,7 +56,7 @@ TEST(DataTransformBf16, GetDataFromTensorDNNL) { TEST(DataTransformInt32, GetDataFromTensorDNNL) { auto place = paddle::platform::CPUPlace(); phi::DenseTensor in = phi::DenseTensor(); - in.mutable_data(phi::make_ddim({2, 3, 1, 2}), place); + in.mutable_data(common::make_ddim({2, 3, 1, 2}), place); void* in_data = phi::funcs::GetDataFromTensor(in, dnnl::memory::data_type::s32); diff --git a/test/cpp/fluid/framework/data_type_transform_test.cc b/test/cpp/fluid/framework/data_type_transform_test.cc index b0ed3328348480..528aaa88d63b5a 100644 --- a/test/cpp/fluid/framework/data_type_transform_test.cc +++ b/test/cpp/fluid/framework/data_type_transform_test.cc @@ -45,7 +45,7 @@ TEST(DataTypeTransform, CPUTransform) { phi::DenseTensor in; phi::DenseTensor out; - float* ptr = in.mutable_data(phi::make_ddim({2, 3}), place); + float* ptr = in.mutable_data(common::make_ddim({2, 3}), place); int data_number = 2 * 3; for (int i = 0; i < data_number; ++i) { @@ -71,7 +71,7 @@ TEST(DataTypeTransform, CPUTransform) { phi::DenseTensor out; paddle::platform::float16* ptr = in.mutable_data( - phi::make_ddim({2, 3}), place); + common::make_ddim({2, 3}), place); int data_number = 2 * 3; for (int i = 0; i < data_number; ++i) { @@ -111,7 +111,7 @@ TEST(DataTypeTransform, CPUTransform) { // transform float to float16 float* in_data_float = - in.mutable_data(phi::make_ddim({2, 3}), place); + in.mutable_data(common::make_ddim({2, 3}), place); for (int i = 0; i < data_number; ++i) { in_data_float[i] = static_cast(i); } @@ -125,7 +125,7 @@ TEST(DataTypeTransform, CPUTransform) { // transform double to float16 double* in_data_double = - in.mutable_data(phi::make_ddim({2, 3}), place); + in.mutable_data(common::make_ddim({2, 3}), place); for (int i = 0; i < data_number; ++i) { in_data_double[i] = i; } @@ -138,7 +138,7 @@ TEST(DataTypeTransform, CPUTransform) { } // transform int to float16 - int* in_data_int = in.mutable_data(phi::make_ddim({2, 3}), place); + int* in_data_int = in.mutable_data(common::make_ddim({2, 3}), place); for (int i = 0; i < data_number; ++i) { in_data_int[i] = i; } @@ -152,7 +152,7 @@ TEST(DataTypeTransform, CPUTransform) { // transform int64 to float16 int64_t* in_data_int64 = - in.mutable_data(phi::make_ddim({2, 3}), place); + in.mutable_data(common::make_ddim({2, 3}), place); for (int i = 0; i < data_number; ++i) { in_data_int64[i] = i; } @@ -165,7 +165,8 @@ TEST(DataTypeTransform, CPUTransform) { } // transform bool to float16 - bool* in_data_bool = in.mutable_data(phi::make_ddim({2, 3}), place); + bool* in_data_bool = + in.mutable_data(common::make_ddim({2, 3}), place); for (int i = 0; i < data_number; ++i) { in_data_bool[i] = i; } @@ -184,7 +185,7 @@ TEST(DataTypeTransform, CPUTransform) { phi::DenseTensor out; paddle::platform::bfloat16* ptr = - in.mutable_data(phi::make_ddim({2, 3}), + in.mutable_data(common::make_ddim({2, 3}), place); int data_number = 2 * 3; @@ -225,7 +226,7 @@ TEST(DataTypeTransform, CPUTransform) { // transform float to bfloat16 float* in_data_float = - in.mutable_data(phi::make_ddim({2, 3}), place); + in.mutable_data(common::make_ddim({2, 3}), place); for (int i = 0; i < data_number; ++i) { in_data_float[i] = static_cast(i); } @@ -239,7 +240,7 @@ TEST(DataTypeTransform, CPUTransform) { // transform double to bfloat16 double* in_data_double = - in.mutable_data(phi::make_ddim({2, 3}), place); + in.mutable_data(common::make_ddim({2, 3}), place); for (int i = 0; i < data_number; ++i) { in_data_double[i] = i; } @@ -252,7 +253,7 @@ TEST(DataTypeTransform, CPUTransform) { } // transform int to bfloat16 - int* in_data_int = in.mutable_data(phi::make_ddim({2, 3}), place); + int* in_data_int = in.mutable_data(common::make_ddim({2, 3}), place); for (int i = 0; i < data_number; ++i) { in_data_int[i] = i; } @@ -266,7 +267,7 @@ TEST(DataTypeTransform, CPUTransform) { // transform int64 to bfloat16 int64_t* in_data_int64 = - in.mutable_data(phi::make_ddim({2, 3}), place); + in.mutable_data(common::make_ddim({2, 3}), place); for (int i = 0; i < data_number; ++i) { in_data_int64[i] = i; } @@ -279,7 +280,8 @@ TEST(DataTypeTransform, CPUTransform) { } // transform bool to bfloat16 - bool* in_data_bool = in.mutable_data(phi::make_ddim({2, 3}), place); + bool* in_data_bool = + in.mutable_data(common::make_ddim({2, 3}), place); for (int i = 0; i < data_number; ++i) { in_data_bool[i] = i; } @@ -297,7 +299,7 @@ TEST(DataTypeTransform, CPUTransform) { phi::DenseTensor in; phi::DenseTensor out; - int32_t* ptr = in.mutable_data(phi::make_ddim({2, 3}), place); + int32_t* ptr = in.mutable_data(common::make_ddim({2, 3}), place); int data_number = 2 * 3; for (int i = 0; i < data_number; ++i) { @@ -339,7 +341,7 @@ TEST(DataTypeTransform, CPUTransform) { // transform float to int32 float* in_data_float = - in.mutable_data(phi::make_ddim({2, 3}), place); + in.mutable_data(common::make_ddim({2, 3}), place); for (int i = 0; i < data_number; ++i) { in_data_float[i] = static_cast(i); } @@ -352,7 +354,7 @@ TEST(DataTypeTransform, CPUTransform) { // transform double to int32 double* in_data_double = - in.mutable_data(phi::make_ddim({2, 3}), place); + in.mutable_data(common::make_ddim({2, 3}), place); for (int i = 0; i < data_number; ++i) { in_data_double[i] = i; } @@ -365,7 +367,7 @@ TEST(DataTypeTransform, CPUTransform) { // transform bfloat16 to int32 paddle::platform::bfloat16* in_data_bf16 = - in.mutable_data(phi::make_ddim({2, 3}), + in.mutable_data(common::make_ddim({2, 3}), place); for (int i = 0; i < data_number; ++i) { in_data_bf16[i] = i; @@ -379,7 +381,7 @@ TEST(DataTypeTransform, CPUTransform) { // transform int64 to int32 int64_t* in_data_int64 = - in.mutable_data(phi::make_ddim({2, 3}), place); + in.mutable_data(common::make_ddim({2, 3}), place); for (int i = 0; i < data_number; ++i) { in_data_int64[i] = i; } @@ -391,7 +393,8 @@ TEST(DataTypeTransform, CPUTransform) { } // transform bool to int32 - bool* in_data_bool = in.mutable_data(phi::make_ddim({2, 3}), place); + bool* in_data_bool = + in.mutable_data(common::make_ddim({2, 3}), place); for (int i = 0; i < data_number; ++i) { in_data_bool[i] = i; } diff --git a/test/cpp/fluid/framework/data_type_transform_test.cu b/test/cpp/fluid/framework/data_type_transform_test.cu index f9394bea7fc372..e854408a071721 100644 --- a/test/cpp/fluid/framework/data_type_transform_test.cu +++ b/test/cpp/fluid/framework/data_type_transform_test.cu @@ -50,7 +50,8 @@ TEST(DataTypeTransform, GPUTransform) { phi::DenseTensor out_gpu; phi::DenseTensor out; - float* in_ptr = in.mutable_data(phi::make_ddim({2, 3}), cpu_place); + float* in_ptr = + in.mutable_data(common::make_ddim({2, 3}), cpu_place); float arr[6] = {0, 1, 2, 3, 4, 5}; int data_number = sizeof(arr) / sizeof(arr[0]); memcpy(in_ptr, arr, sizeof(arr)); @@ -86,7 +87,7 @@ TEST(DataTypeTransform, GPUTransform) { phi::DenseTensor out; paddle::platform::float16* ptr = in.mutable_data( - phi::make_ddim({2, 3}), cpu_place); + common::make_ddim({2, 3}), cpu_place); paddle::platform::float16 arr[6] = {paddle::platform::float16(0), paddle::platform::float16(1), paddle::platform::float16(2), @@ -152,7 +153,7 @@ TEST(DataTypeTransform, GPUTransform) { // transform float to float16 float* in_data_float = - in.mutable_data(phi::make_ddim({2, 3}), cpu_place); + in.mutable_data(common::make_ddim({2, 3}), cpu_place); for (int i = 0; i < data_number; ++i) { in_data_float[i] = i; } @@ -172,7 +173,7 @@ TEST(DataTypeTransform, GPUTransform) { // transform double to float16 double* in_data_double = - in.mutable_data(phi::make_ddim({2, 3}), cpu_place); + in.mutable_data(common::make_ddim({2, 3}), cpu_place); for (int i = 0; i < data_number; ++i) { in_data_double[i] = i; } @@ -191,7 +192,8 @@ TEST(DataTypeTransform, GPUTransform) { } // transform int to float16 - int* in_data_int = in.mutable_data(phi::make_ddim({2, 3}), cpu_place); + int* in_data_int = + in.mutable_data(common::make_ddim({2, 3}), cpu_place); for (int i = 0; i < data_number; ++i) { in_data_int[i] = i; } @@ -211,7 +213,7 @@ TEST(DataTypeTransform, GPUTransform) { // transform int64 to float16 int64_t* in_data_int64 = - in.mutable_data(phi::make_ddim({2, 3}), cpu_place); + in.mutable_data(common::make_ddim({2, 3}), cpu_place); for (int i = 0; i < data_number; ++i) { in_data_int64[i] = i; } @@ -231,7 +233,7 @@ TEST(DataTypeTransform, GPUTransform) { // transform bool to float16 bool* in_data_bool = - in.mutable_data(phi::make_ddim({2, 3}), cpu_place); + in.mutable_data(common::make_ddim({2, 3}), cpu_place); for (int i = 0; i < data_number; ++i) { in_data_bool[i] = i; } diff --git a/test/cpp/fluid/framework/eigen_test.cc b/test/cpp/fluid/framework/eigen_test.cc index 4771922986b62e..2307cca56f152e 100644 --- a/test/cpp/fluid/framework/eigen_test.cc +++ b/test/cpp/fluid/framework/eigen_test.cc @@ -17,13 +17,13 @@ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/platform/place.h" -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" namespace paddle { namespace framework { TEST(EigenDim, From) { - EigenDim<3>::Type ed = EigenDim<3>::From(phi::make_ddim({1, 2, 3})); + EigenDim<3>::Type ed = EigenDim<3>::From(common::make_ddim({1, 2, 3})); ASSERT_EQ(1, ed[0]); ASSERT_EQ(2, ed[1]); ASSERT_EQ(3, ed[2]); @@ -32,7 +32,7 @@ TEST(EigenDim, From) { TEST(Eigen, DenseTensor) { phi::DenseTensor t; float* p = - t.mutable_data(phi::make_ddim({1, 2, 3}), platform::CPUPlace()); + t.mutable_data(common::make_ddim({1, 2, 3}), platform::CPUPlace()); for (int i = 0; i < 1 * 2 * 3; i++) { p[i] = static_cast(i); } @@ -54,7 +54,7 @@ TEST(Eigen, DenseTensor) { TEST(Eigen, ScalarFrom) { phi::DenseTensor t; - int* p = t.mutable_data(phi::make_ddim({1}), platform::CPUPlace()); + int* p = t.mutable_data(common::make_ddim({1}), platform::CPUPlace()); *p = static_cast(100); EigenScalar::Type es = EigenScalar::From(t); @@ -65,7 +65,8 @@ TEST(Eigen, ScalarFrom) { TEST(Eigen, VectorFrom) { phi::DenseTensor t; - float* p = t.mutable_data(phi::make_ddim({6}), platform::CPUPlace()); + float* p = + t.mutable_data(common::make_ddim({6}), platform::CPUPlace()); for (int i = 0; i < 6; i++) { p[i] = static_cast(i); } @@ -82,7 +83,7 @@ TEST(Eigen, VectorFrom) { TEST(Eigen, VectorFlatten) { phi::DenseTensor t; float* p = - t.mutable_data(phi::make_ddim({1, 2, 3}), platform::CPUPlace()); + t.mutable_data(common::make_ddim({1, 2, 3}), platform::CPUPlace()); for (int i = 0; i < 1 * 2 * 3; i++) { p[i] = static_cast(i); } @@ -99,7 +100,7 @@ TEST(Eigen, VectorFlatten) { TEST(Eigen, Matrix) { phi::DenseTensor t; float* p = - t.mutable_data(phi::make_ddim({2, 3}), platform::CPUPlace()); + t.mutable_data(common::make_ddim({2, 3}), platform::CPUPlace()); for (int i = 0; i < 2 * 3; i++) { p[i] = static_cast(i); } diff --git a/test/cpp/fluid/framework/operator_test.cc b/test/cpp/fluid/framework/operator_test.cc index baca5b3f06743a..d40a45ae5172a3 100644 --- a/test/cpp/fluid/framework/operator_test.cc +++ b/test/cpp/fluid/framework/operator_test.cc @@ -549,7 +549,7 @@ void SetGetLoDLevelTestMain(std::string op_type) { auto op = paddle::framework::OpRegistry::CreateOp(op_desc); auto* x_var = scope.Var("x.0"); auto* x = x_var->GetMutable(); - x->mutable_data(phi::make_ddim({64}), place); + x->mutable_data(common::make_ddim({64}), place); auto* out_var = scope.Var("out.0"); out_var->GetMutable(); diff --git a/test/cpp/fluid/framework/paddle2cinn/cinn_cache_key_test.cc b/test/cpp/fluid/framework/paddle2cinn/cinn_cache_key_test.cc index cd2da68a7f6ddf..f9406840dd2d86 100644 --- a/test/cpp/fluid/framework/paddle2cinn/cinn_cache_key_test.cc +++ b/test/cpp/fluid/framework/paddle2cinn/cinn_cache_key_test.cc @@ -18,10 +18,10 @@ #include #include "gtest/gtest.h" +#include "paddle/common/ddim.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/program_desc.h" -#include "paddle/phi/core/ddim.h" namespace paddle { namespace framework { @@ -47,7 +47,7 @@ TEST(CinnCacheKeyTest, TestAsUnorderedKeyByStructure) { std::map feed_tensors = { {"X", tensor_pointer}}; - DDim ddim = phi::make_ddim({1, 2, 3}); + DDim ddim = common::make_ddim({1, 2, 3}); std::map feed_shapes = {{"X", ddim}}; std::map feed_dtypes = {{"X", fp32}}; @@ -125,7 +125,7 @@ TEST(CinnCacheKeyTest, TestAsUnorderedKeyByAddress) { std::map feed_tensors = { {"X", tensor_pointer}}; - DDim ddim = phi::make_ddim({1, 2, 3}); + DDim ddim = common::make_ddim({1, 2, 3}); std::map feed_shapes = {{"X", ddim}}; std::map feed_dtypes = {{"X", fp32}}; std::map new_dtypes = {{"X", DataType::FLOAT64}}; diff --git a/test/cpp/fluid/framework/paddle2cinn/cinn_compiler_test.cc b/test/cpp/fluid/framework/paddle2cinn/cinn_compiler_test.cc index 519b78115748be..63c05d19a738e3 100644 --- a/test/cpp/fluid/framework/paddle2cinn/cinn_compiler_test.cc +++ b/test/cpp/fluid/framework/paddle2cinn/cinn_compiler_test.cc @@ -26,6 +26,7 @@ #include "glog/logging.h" #include "gtest/gtest.h" #include "paddle/cinn/common/target.h" +#include "paddle/common/ddim.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/pass.h" #include "paddle/fluid/framework/lod_tensor.h" @@ -35,7 +36,6 @@ #include "paddle/fluid/operators/cinn/cinn_launch_op.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/place.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/flags.h" #include "paddle/utils/flags.h" @@ -258,7 +258,7 @@ TEST(CinnCompilerTest, Compile) { std::unordered_map create_inputs; for (const auto& pair : inputs_info) { auto& tensor = create_inputs[pair.first]; - tensor.Resize(phi::make_ddim(pair.second)); + tensor.Resize(common::make_ddim(pair.second)); tensor.mutable_data(platform::CPUPlace()); } std::map input_tensors; diff --git a/test/cpp/fluid/framework/reader_test.cc b/test/cpp/fluid/framework/reader_test.cc index bca4f7de8ad0a0..cb53bdcf080923 100644 --- a/test/cpp/fluid/framework/reader_test.cc +++ b/test/cpp/fluid/framework/reader_test.cc @@ -40,7 +40,7 @@ class StubRootReader : public paddle::framework::ReaderBase { TEST(READER, decorate_chain) { paddle::framework::proto::VarType::Type dtype = paddle::framework::proto::VarType::FP32; - paddle::framework::DDim dim = phi::make_ddim({5, 7}); + paddle::framework::DDim dim = common::make_ddim({5, 7}); std::vector init_dims(4, dim); std::vector init_types(4, dtype); std::vector init_need_check(4, true); diff --git a/test/cpp/fluid/framework/selected_rows_utils_test.cc b/test/cpp/fluid/framework/selected_rows_utils_test.cc index 15735b87d0f9d5..6af07e03432fe8 100644 --- a/test/cpp/fluid/framework/selected_rows_utils_test.cc +++ b/test/cpp/fluid/framework/selected_rows_utils_test.cc @@ -30,7 +30,8 @@ class SelectedRowsTester : public ::testing::Test { phi::DenseTensor* value = selected_rows_->mutable_value(); auto* data = value->mutable_data( - phi::make_ddim({static_cast(rows.size()), row_numel}), place_); + common::make_ddim({static_cast(rows.size()), row_numel}), + place_); for (int64_t i = 0; i < value->numel(); ++i) { data[i] = static_cast(i); } @@ -44,11 +45,11 @@ class SelectedRowsTester : public ::testing::Test { TEST_F(SelectedRowsTester, height) { ASSERT_EQ(selected_rows_->height(), 10); } TEST_F(SelectedRowsTester, dims) { - ASSERT_EQ(selected_rows_->value().dims(), phi::make_ddim({3, 100})); + ASSERT_EQ(selected_rows_->value().dims(), common::make_ddim({3, 100})); } TEST_F(SelectedRowsTester, complete_dims) { - ASSERT_EQ(selected_rows_->GetCompleteDims(), phi::make_ddim({10, 100})); + ASSERT_EQ(selected_rows_->GetCompleteDims(), common::make_ddim({10, 100})); } TEST_F(SelectedRowsTester, SerializeAndDeseralize) { @@ -78,7 +79,8 @@ TEST(SelectedRows, SparseTable) { int64_t table_size = 100; int64_t embedding_width = 8; // initialize a sparse table - table.mutable_value()->Resize(phi::make_ddim({table_size, embedding_width})); + table.mutable_value()->Resize( + common::make_ddim({table_size, embedding_width})); auto* data = table.mutable_value()->mutable_data(cpu); for (int64_t i = 0; i < table_size; ++i) { for (int64_t j = 0; j < embedding_width; ++j) { @@ -99,7 +101,7 @@ TEST(SelectedRows, SparseTable) { ASSERT_EQ(table.rows().size(), 3UL); phi::DenseTensor ids; - ids.Resize(phi::make_ddim({4})); + ids.Resize(common::make_ddim({4})); auto* ids_data = ids.mutable_data(cpu); ids_data[0] = static_cast(6); ids_data[1] = static_cast(6); @@ -107,8 +109,8 @@ TEST(SelectedRows, SparseTable) { ids_data[3] = static_cast(10); phi::DenseTensor get_value; - auto* value_data = - get_value.mutable_data(phi::make_ddim({4, embedding_width}), cpu); + auto* value_data = get_value.mutable_data( + common::make_ddim({4, embedding_width}), cpu); table.Get(ids, &get_value); for (int j = 0; j < embedding_width; ++j) { @@ -176,7 +178,8 @@ TEST(SelectedRows, MultiThreadAutoIndex) { int64_t table_size = 100000; int64_t embedding_width = 8; // initialize a sparse table - table.mutable_value()->Resize(phi::make_ddim({table_size, embedding_width})); + table.mutable_value()->Resize( + common::make_ddim({table_size, embedding_width})); auto* data = table.mutable_value()->mutable_data(cpu); for (int64_t i = 0; i < table_size; ++i) { for (int64_t j = 0; j < embedding_width; ++j) { diff --git a/test/cpp/fluid/framework/tensor_test.cc b/test/cpp/fluid/framework/tensor_test.cc index 5ef6f53d38d509..be6da7c50453f3 100644 --- a/test/cpp/fluid/framework/tensor_test.cc +++ b/test/cpp/fluid/framework/tensor_test.cc @@ -54,26 +54,26 @@ TEST(DenseTensor, MutableData) { float* p1 = nullptr; float* p2 = nullptr; // initialization - p1 = src_tensor.mutable_data(phi::make_ddim({1, 2, 3}), + p1 = src_tensor.mutable_data(common::make_ddim({1, 2, 3}), platform::CPUPlace()); auto p1_holder = src_tensor.Holder(); EXPECT_NE(p1, nullptr); // set src_tensor a new dim with large size // momery is supposed to be re-allocated - p2 = src_tensor.mutable_data(phi::make_ddim({3, 4}), + p2 = src_tensor.mutable_data(common::make_ddim({3, 4}), platform::CPUPlace()); EXPECT_NE(p2, nullptr); auto p2_holder1 = src_tensor.Holder(); EXPECT_NE(p1_holder.get(), p2_holder1.get()); // set src_tensor a new dim with same size // momery block is supposed to be unchanged - p1 = src_tensor.mutable_data(phi::make_ddim({2, 2, 3}), + p1 = src_tensor.mutable_data(common::make_ddim({2, 2, 3}), platform::CPUPlace()); auto p2_holder2 = src_tensor.Holder(); EXPECT_EQ(p2_holder1.get(), p2_holder2.get()); // set src_tensor a new dim with smaller size // momery block is supposed to be unchanged - p2 = src_tensor.mutable_data(phi::make_ddim({2, 2}), + p2 = src_tensor.mutable_data(common::make_ddim({2, 2}), platform::CPUPlace()); auto p2_holder3 = src_tensor.Holder(); EXPECT_EQ(p1, p2); @@ -83,7 +83,7 @@ TEST(DenseTensor, MutableData) { float* p4 = nullptr; // set src_tensor a different type but smaller size. // memory block is supposed to be unchanged. - auto* tmp = src_tensor.mutable_data(phi::make_ddim({2, 2}), + auto* tmp = src_tensor.mutable_data(common::make_ddim({2, 2}), platform::CPUPlace()); p3 = reinterpret_cast(tmp); auto p3_holder1 = src_tensor.Holder(); @@ -92,7 +92,7 @@ TEST(DenseTensor, MutableData) { // set src_tensor a different type but bigger size. // memory block is supposed to be changed. - auto* tmp2 = src_tensor.mutable_data(phi::make_ddim({2, 2, 3}), + auto* tmp2 = src_tensor.mutable_data(common::make_ddim({2, 2, 3}), platform::CPUPlace()); auto p3_holder2 = src_tensor.Holder(); p4 = reinterpret_cast(tmp2); @@ -103,12 +103,12 @@ TEST(DenseTensor, MutableData) { // changed. { phi::DenseTensor src_tensor; - int8_t* p1 = src_tensor.mutable_data(phi::make_ddim({1}), + int8_t* p1 = src_tensor.mutable_data(common::make_ddim({1}), platform::CPUPlace()); EXPECT_NE(p1, nullptr); *p1 = 1; - uint8_t* p2 = src_tensor.mutable_data(phi::make_ddim({1}), + uint8_t* p2 = src_tensor.mutable_data(common::make_ddim({1}), platform::CPUPlace()); EXPECT_NE(p2, nullptr); EXPECT_EQ(static_cast(p2[0]), 1); @@ -120,25 +120,25 @@ TEST(DenseTensor, MutableData) { float* p1 = nullptr; float* p2 = nullptr; // initialization - p1 = src_tensor.mutable_data(phi::make_ddim({1, 2, 3}), + p1 = src_tensor.mutable_data(common::make_ddim({1, 2, 3}), platform::CUDAPlace(0)); auto p1_holder = src_tensor.Holder(); EXPECT_NE(p1, nullptr); // set src_tensor a new dim with large size // momery is supposed to be re-allocated - p2 = src_tensor.mutable_data(phi::make_ddim({3, 1024}), + p2 = src_tensor.mutable_data(common::make_ddim({3, 1024}), platform::CUDAPlace(0)); auto p2_holder = src_tensor.Holder(); EXPECT_NE(p2, nullptr); EXPECT_NE(p1_holder.get(), p2_holder.get()); // set src_tensor a new dim with same size // momery block is supposed to be unchanged - p1 = src_tensor.mutable_data(phi::make_ddim({2, 2, 3}), + p1 = src_tensor.mutable_data(common::make_ddim({2, 2, 3}), platform::CUDAPlace(0)); EXPECT_EQ(p1, p2); // set src_tensor a new dim with smaller size // momery block is supposed to be unchanged - p2 = src_tensor.mutable_data(phi::make_ddim({2, 2}), + p2 = src_tensor.mutable_data(common::make_ddim({2, 2}), platform::CUDAPlace(0)); EXPECT_EQ(p1, p2); } @@ -162,7 +162,7 @@ TEST(DenseTensor, ShareDataWith) { } ASSERT_TRUE(caught); - src_tensor.mutable_data(phi::make_ddim({2, 3, 4}), + src_tensor.mutable_data(common::make_ddim({2, 3, 4}), platform::CPUPlace()); dst_tensor.ShareDataWith(src_tensor); ASSERT_EQ(src_tensor.data(), dst_tensor.data()); @@ -172,7 +172,7 @@ TEST(DenseTensor, ShareDataWith) { { phi::DenseTensor src_tensor; phi::DenseTensor dst_tensor; - src_tensor.mutable_data(phi::make_ddim({2, 3, 4}), + src_tensor.mutable_data(common::make_ddim({2, 3, 4}), platform::CUDAPlace(0)); dst_tensor.ShareDataWith(src_tensor); ASSERT_EQ(src_tensor.data(), dst_tensor.data()); @@ -183,7 +183,7 @@ TEST(DenseTensor, ShareDataWith) { TEST(DenseTensor, Slice) { { phi::DenseTensor src_tensor; - src_tensor.mutable_data(phi::make_ddim({5, 3, 4}), + src_tensor.mutable_data(common::make_ddim({5, 3, 4}), platform::CPUPlace()); phi::DenseTensor slice_tensor = src_tensor.Slice(1, 3); phi::DDim slice_dims = slice_tensor.dims(); @@ -209,7 +209,7 @@ TEST(DenseTensor, Slice) { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) { phi::DenseTensor src_tensor; - src_tensor.mutable_data(phi::make_ddim({6, 9}), + src_tensor.mutable_data(common::make_ddim({6, 9}), platform::CUDAPlace(0)); phi::DenseTensor slice_tensor = src_tensor.Slice(2, 6); phi::DDim slice_dims = slice_tensor.dims(); @@ -270,7 +270,8 @@ TEST(DenseTensor, FP16) { TEST(DenseTensor, Split) { { phi::DenseTensor src_tensor; - src_tensor.mutable_data(phi::make_ddim({6, 2}), platform::CPUPlace()); + src_tensor.mutable_data(common::make_ddim({6, 2}), + platform::CPUPlace()); std::vector split_tensor_list = src_tensor.Split(2, 0); ASSERT_EQ(split_tensor_list.size(), 3UL); EXPECT_EQ(split_tensor_list[0].dims()[0], 2); @@ -298,7 +299,7 @@ TEST(DenseTensor, Split) { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) { phi::DenseTensor src_tensor; - src_tensor.mutable_data(phi::make_ddim({6, 4}), + src_tensor.mutable_data(common::make_ddim({6, 4}), platform::CUDAPlace(0)); std::vector split_tensor_list = src_tensor.Split(2, 0); ASSERT_EQ(split_tensor_list.size(), 3UL); @@ -332,7 +333,8 @@ TEST(DenseTensor, Split) { TEST(DenseTensor, Chunk) { { phi::DenseTensor src_tensor; - src_tensor.mutable_data(phi::make_ddim({6, 2}), platform::CPUPlace()); + src_tensor.mutable_data(common::make_ddim({6, 2}), + platform::CPUPlace()); std::vector split_tensor_list = src_tensor.Chunk(3, 0); ASSERT_EQ(split_tensor_list.size(), 3UL); EXPECT_EQ(split_tensor_list[0].dims()[0], 2); @@ -360,7 +362,7 @@ TEST(DenseTensor, Chunk) { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) { phi::DenseTensor src_tensor; - src_tensor.mutable_data(phi::make_ddim({6, 4}), + src_tensor.mutable_data(common::make_ddim({6, 4}), platform::CUDAPlace(0)); std::vector split_tensor_list = src_tensor.Chunk(3, 0); ASSERT_EQ(split_tensor_list.size(), 3UL); diff --git a/test/cpp/fluid/framework/tensor_util_test.cc b/test/cpp/fluid/framework/tensor_util_test.cc index 65c2bf3b64b4a9..6b9c25750ac070 100644 --- a/test/cpp/fluid/framework/tensor_util_test.cc +++ b/test/cpp/fluid/framework/tensor_util_test.cc @@ -26,7 +26,7 @@ TEST(TensorCopy, Tensor) { phi::DenseTensor dst_tensor; phi::CPUContext cpu_ctx((platform::CPUPlace())); - int* src_ptr = src_tensor.mutable_data(phi::make_ddim({3, 3}), + int* src_ptr = src_tensor.mutable_data(common::make_ddim({3, 3}), platform::CPUPlace()); std::array arr = {1, 2, 3, 4, 5, 6, 7, 8, 9}; @@ -65,7 +65,7 @@ TEST(TensorCopy, Tensor) { phi::DenseTensor gpu_tensor; phi::DenseTensor dst_tensor; - int* src_ptr = src_tensor.mutable_data(phi::make_ddim({3, 3}), + int* src_ptr = src_tensor.mutable_data(common::make_ddim({3, 3}), platform::CPUPlace()); int arr[9] = {1, 2, 3, 4, 5, 6, 7, 8, 9}; @@ -129,7 +129,7 @@ TEST(TensorFromVector, Tensor) { phi::DenseTensor cpu_tensor; // Copy to CPU phi::DenseTensor - cpu_tensor.Resize(phi::make_ddim({3, 3})); + cpu_tensor.Resize(common::make_ddim({3, 3})); auto cpu_place = new paddle::platform::CPUPlace(); paddle::framework::TensorFromVector(src_vec, &cpu_tensor); @@ -142,7 +142,7 @@ TEST(TensorFromVector, Tensor) { } src_vec.erase(src_vec.begin(), src_vec.begin() + 5); - cpu_tensor.Resize(phi::make_ddim({2, 2})); + cpu_tensor.Resize(common::make_ddim({2, 2})); paddle::framework::TensorFromVector(src_vec, &cpu_tensor); cpu_ptr = cpu_tensor.data(); src_ptr = src_vec.data(); @@ -162,13 +162,13 @@ TEST(TensorFromVector, Tensor) { phi::DenseTensor dst_tensor; // Copy to CPU phi::DenseTensor - cpu_tensor.Resize(phi::make_ddim({3, 3})); + cpu_tensor.Resize(common::make_ddim({3, 3})); auto cpu_place = new paddle::platform::CPUPlace(); phi::CPUContext cpu_ctx(*cpu_place); paddle::framework::TensorFromVector(src_vec, cpu_ctx, &cpu_tensor); // Copy to GPUTensor - gpu_tensor.Resize(phi::make_ddim({3, 3})); + gpu_tensor.Resize(common::make_ddim({3, 3})); auto gpu_place = new paddle::platform::CUDAPlace(); phi::GPUContext gpu_ctx(*gpu_place); gpu_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance() @@ -193,9 +193,9 @@ TEST(TensorFromVector, Tensor) { src_vec.erase(src_vec.begin(), src_vec.begin() + 5); - cpu_tensor.Resize(phi::make_ddim({2, 2})); + cpu_tensor.Resize(common::make_ddim({2, 2})); paddle::framework::TensorFromVector(src_vec, cpu_ctx, &cpu_tensor); - gpu_tensor.Resize(phi::make_ddim({2, 2})); + gpu_tensor.Resize(common::make_ddim({2, 2})); paddle::framework::TensorFromVector(src_vec, gpu_ctx, &gpu_tensor); paddle::framework::TensorCopy(gpu_tensor, *cpu_place, gpu_ctx, &dst_tensor); @@ -307,7 +307,7 @@ TEST(TensorFromDLPack, Tensor) { std::vector src_vec = {1, 2, 3, 4, 5, 6, 7, 8, 9}; phi::DenseTensor cpu_tensor; - cpu_tensor.Resize(phi::make_ddim({3, 3})); + cpu_tensor.Resize(common::make_ddim({3, 3})); paddle::platform::CPUPlace cpu_place; phi::CPUContext cpu_ctx(cpu_place); paddle::framework::TensorFromVector(src_vec, cpu_ctx, &cpu_tensor); @@ -333,13 +333,13 @@ TEST(TensorFromDLPack, Tensor) { phi::DenseTensor gpu_tensor_from_dlpack; // Copy to CPU phi::DenseTensor - cpu_tensor.Resize(phi::make_ddim({3, 3})); + cpu_tensor.Resize(common::make_ddim({3, 3})); paddle::platform::CPUPlace cpu_place; phi::CPUContext cpu_ctx(cpu_place); paddle::framework::TensorFromVector(src_vec, cpu_ctx, &cpu_tensor); // Copy to GPUTensor - gpu_tensor.Resize(phi::make_ddim({3, 3})); + gpu_tensor.Resize(common::make_ddim({3, 3})); paddle::platform::CUDAPlace gpu_place; auto& gpu_ctx = *paddle::platform::DeviceContextPool::Instance().GetByPlace(gpu_place); diff --git a/test/cpp/fluid/fused/CMakeLists.txt b/test/cpp/fluid/fused/CMakeLists.txt index 59bf35e05c021a..b27dbaae673678 100644 --- a/test/cpp/fluid/fused/CMakeLists.txt +++ b/test/cpp/fluid/fused/CMakeLists.txt @@ -16,6 +16,7 @@ if(WITH_GPU OR WITH_ROCM) generated_op device_context phi + common memory) nv_test( test_fused_dropout_act_bias @@ -26,6 +27,7 @@ if(WITH_GPU OR WITH_ROCM) generated_op device_context phi + common memory) nv_test( test_fused_layernorm_residual_dropout_bias @@ -37,6 +39,7 @@ if(WITH_GPU OR WITH_ROCM) generated_op device_context phi + common memory ${CINN_DEPS}) endif() @@ -51,6 +54,7 @@ if(WITH_GPU OR WITH_ROCM) op_registry device_context phi + common memory) cc_test( test_cudnn_bn_add_relu @@ -61,6 +65,7 @@ if(WITH_GPU OR WITH_ROCM) op_registry device_context phi + common memory) endif() endif() diff --git a/test/cpp/fluid/fused/cudnn_bn_add_relu_test.cc b/test/cpp/fluid/fused/cudnn_bn_add_relu_test.cc index ee220f993bfaa2..ae4697833d7584 100644 --- a/test/cpp/fluid/fused/cudnn_bn_add_relu_test.cc +++ b/test/cpp/fluid/fused/cudnn_bn_add_relu_test.cc @@ -44,7 +44,7 @@ template void InitRandomTensor(const std::vector &dims, phi::DenseTensor *cpu_out) { T *cpu_out_ptr = - cpu_out->mutable_data(phi::make_ddim(dims), platform::CPUPlace()); + cpu_out->mutable_data(common::make_ddim(dims), platform::CPUPlace()); std::default_random_engine random(0); std::uniform_real_distribution dis(-1.0, 1.0); for (int i = 0; i < cpu_out->numel(); ++i) { @@ -57,7 +57,7 @@ void InitConstantTensor(const std::vector &dims, T value, phi::DenseTensor *cpu_out) { T *cpu_out_ptr = - cpu_out->mutable_data(phi::make_ddim(dims), platform::CPUPlace()); + cpu_out->mutable_data(common::make_ddim(dims), platform::CPUPlace()); for (int i = 0; i < cpu_out->numel(); ++i) { cpu_out_ptr[i] = value; } @@ -652,7 +652,7 @@ class CudnnBNAddReluTester { saved_mean->Resize({1, 1, 1, channels_}); saved_var->Resize({1, 1, 1, channels_}); - auto param_shape = phi::vectorize(bn_scale->dims()); + auto param_shape = common::vectorize(bn_scale->dims()); op::CudnnBNStatsFinalize bn_op(ctx, param_shape); bn_op.Forward(ctx, *sum, @@ -759,17 +759,17 @@ class CudnnBNAddReluTester { &equiv_bias_z); } - y.Resize(phi::make_ddim({batch_size_, height_, width_, channels_})); + y.Resize(common::make_ddim({batch_size_, height_, width_, channels_})); int c = channels_; int64_t nhw = ele_count_; int32_t c_int32_elems = ((c + 63) & ~63) / 32; int32_t nhw_int32_elems = (nhw + 31) & ~31; - bitmask.Resize(phi::make_ddim({nhw_int32_elems, c_int32_elems, 1})); + bitmask.Resize(common::make_ddim({nhw_int32_elems, c_int32_elems, 1})); - auto data_shape = phi::vectorize(x.dims()); - auto param_shape = phi::vectorize(bn_scale_x.dims()); - auto bitmask_shape = phi::vectorize(bitmask.dims()); + auto data_shape = common::vectorize(x.dims()); + auto param_shape = common::vectorize(bn_scale_x.dims()); + auto bitmask_shape = common::vectorize(bitmask.dims()); // 2. Scale Bias + Relu op::CudnnScaleBiasAddRelu sbar_op(ctx, @@ -841,14 +841,14 @@ class CudnnBNAddReluTester { saved_mean.Resize({1, 1, 1, channels_}); saved_var.Resize({1, 1, 1, channels_}); - dx.Resize(phi::make_ddim({batch_size_, height_, width_, channels_})); - dz.Resize(phi::make_ddim({batch_size_, height_, width_, channels_})); - dscale.Resize(phi::make_ddim({1, 1, 1, channels_})); - dbias.Resize(phi::make_ddim({1, 1, 1, channels_})); + dx.Resize(common::make_ddim({batch_size_, height_, width_, channels_})); + dz.Resize(common::make_ddim({batch_size_, height_, width_, channels_})); + dscale.Resize(common::make_ddim({1, 1, 1, channels_})); + dbias.Resize(common::make_ddim({1, 1, 1, channels_})); - auto data_shape = phi::vectorize(x.dims()); - auto param_shape = phi::vectorize(bn_scale.dims()); - auto bitmask_shape = phi::vectorize(bitmask.dims()); + auto data_shape = common::vectorize(x.dims()); + auto param_shape = common::vectorize(bn_scale.dims()); + auto bitmask_shape = common::vectorize(bitmask.dims()); std::string act_type = "relu"; op::CudnnScaleBiasAddRelu sbar_op( diff --git a/test/cpp/fluid/fused/cudnn_norm_conv_test.cc b/test/cpp/fluid/fused/cudnn_norm_conv_test.cc index 16ea8f5ade0842..97d76aa65b0073 100644 --- a/test/cpp/fluid/fused/cudnn_norm_conv_test.cc +++ b/test/cpp/fluid/fused/cudnn_norm_conv_test.cc @@ -38,7 +38,7 @@ template void InitRandomTensor(const std::vector &dims, phi::DenseTensor *cpu_out) { T *cpu_out_ptr = - cpu_out->mutable_data(phi::make_ddim(dims), platform::CPUPlace()); + cpu_out->mutable_data(common::make_ddim(dims), platform::CPUPlace()); std::default_random_engine random(0); std::uniform_real_distribution dis(0.0, 1.0); @@ -335,14 +335,14 @@ class CudnnNormConvolutionTester { paddle::framework::TensorCopySync(cpu_input_, place, &input); paddle::framework::TensorCopySync(cpu_filter_nhwc_, place, &filter_nhwc); - output.Resize(phi::make_ddim( + output.Resize(common::make_ddim( {batch_size_, out_height_, out_width_, output_channels_})); - sum.Resize(phi::make_ddim({1, 1, 1, output_channels_})); - sum_of_square.Resize(phi::make_ddim({1, 1, 1, output_channels_})); + sum.Resize(common::make_ddim({1, 1, 1, output_channels_})); + sum_of_square.Resize(common::make_ddim({1, 1, 1, output_channels_})); - auto input_shape = phi::vectorize(input.dims()); - auto filter_shape = phi::vectorize(filter_nhwc.dims()); - auto output_shape = phi::vectorize(output.dims()); + auto input_shape = common::vectorize(input.dims()); + auto filter_shape = common::vectorize(filter_nhwc.dims()); + auto output_shape = common::vectorize(output.dims()); op::CudnnNormConvolution conv_op(ctx, input_shape, filter_shape, @@ -376,9 +376,9 @@ class CudnnNormConvolutionTester { input_grad.Resize(input.dims()); filter_grad.Resize(filter_nhwc.dims()); - auto input_shape = phi::vectorize(input.dims()); - auto filter_shape = phi::vectorize(filter_nhwc.dims()); - auto output_shape = phi::vectorize(output_grad.dims()); + auto input_shape = common::vectorize(input.dims()); + auto filter_shape = common::vectorize(filter_nhwc.dims()); + auto output_shape = common::vectorize(output_grad.dims()); op::CudnnNormConvolutionGrad conv_grad_op(ctx, input_shape, filter_shape, diff --git a/test/cpp/fluid/fused/fusion_group_op_test.cc b/test/cpp/fluid/fused/fusion_group_op_test.cc index 19d7d48ae0fa99..80aff6543e55bf 100644 --- a/test/cpp/fluid/fused/fusion_group_op_test.cc +++ b/test/cpp/fluid/fused/fusion_group_op_test.cc @@ -33,7 +33,7 @@ phi::DenseTensor* CreateTensor(framework::Scope* scope, auto* var = scope->Var(name); auto* tensor = var->GetMutable(); if (!shape.empty()) { - tensor->mutable_data(phi::make_ddim(shape), place); + tensor->mutable_data(common::make_ddim(shape), place); } return tensor; } @@ -45,7 +45,8 @@ void SetupRandomCPUTensor(phi::DenseTensor* tensor, std::mt19937 rng(seed++); std::uniform_real_distribution uniform_dist(0, 1); - T* ptr = tensor->mutable_data(phi::make_ddim(shape), platform::CPUPlace()); + T* ptr = + tensor->mutable_data(common::make_ddim(shape), platform::CPUPlace()); for (int64_t i = 0; i < tensor->numel(); ++i) { ptr[i] = static_cast(uniform_dist(rng)) - static_cast(0.5); } diff --git a/test/cpp/fluid/gather_test.cc b/test/cpp/fluid/gather_test.cc index 9a09d747a55658..358334cc9d3271 100644 --- a/test/cpp/fluid/gather_test.cc +++ b/test/cpp/fluid/gather_test.cc @@ -26,16 +26,16 @@ TEST(Gather, GatherData) { int* p_src = nullptr; int* p_index = nullptr; - p_src = src->mutable_data(phi::make_ddim({3, 4}), + p_src = src->mutable_data(common::make_ddim({3, 4}), paddle::platform::CPUPlace()); - p_index = index->mutable_data(phi::make_ddim({2}), + p_index = index->mutable_data(common::make_ddim({2}), paddle::platform::CPUPlace()); for (int i = 0; i < 12; ++i) p_src[i] = i; p_index[0] = 1; p_index[1] = 0; - int* p_output = output->mutable_data(phi::make_ddim({2, 4}), + int* p_output = output->mutable_data(common::make_ddim({2, 4}), paddle::platform::CPUPlace()); auto* cpu_place = new paddle::platform::CPUPlace(); diff --git a/test/cpp/fluid/lite/CMakeLists.txt b/test/cpp/fluid/lite/CMakeLists.txt index 6533073258ff5b..325b59582a0994 100644 --- a/test/cpp/fluid/lite/CMakeLists.txt +++ b/test/cpp/fluid/lite/CMakeLists.txt @@ -1,4 +1,4 @@ -paddle_test(test_lite_engine_op SRCS lite_engine_op_test.cc) +paddle_test(test_lite_engine_op SRCS lite_engine_op_test.cc DEPS common) if(WITH_ONNXRUNTIME AND WIN32) # Copy onnxruntime for some c++ test in Windows, since the test will diff --git a/test/cpp/fluid/math/CMakeLists.txt b/test/cpp/fluid/math/CMakeLists.txt index 1edc2f25e68341..46d70de2687997 100644 --- a/test/cpp/fluid/math/CMakeLists.txt +++ b/test/cpp/fluid/math/CMakeLists.txt @@ -1,15 +1,15 @@ cc_test( selected_rows_functor_test SRCS selected_rows_functor_test.cc - DEPS allocator phi) + DEPS allocator phi common) cc_test( im2col_test SRCS im2col_test.cc - DEPS phi) + DEPS phi common) cc_test( vol2col_test SRCS vol2col_test.cc - DEPS phi) + DEPS phi common) cc_test( beam_search_test SRCS beam_search_test.cc @@ -18,13 +18,13 @@ if(WITH_GPU) nv_test( selected_rows_functor_gpu_test SRCS selected_rows_functor_test.cu.cc - DEPS phi) + DEPS phi common) endif() if(WITH_ROCM) hip_test( selected_rows_functor_gpu_test SRCS selected_rows_functor_test.cu.cc - DEPS phi) + DEPS phi common) endif() cc_test( concat_test diff --git a/test/cpp/fluid/math/beam_search_test.cc b/test/cpp/fluid/math/beam_search_test.cc index d8e56e6102dd7a..428828aa2cb17e 100644 --- a/test/cpp/fluid/math/beam_search_test.cc +++ b/test/cpp/fluid/math/beam_search_test.cc @@ -32,7 +32,7 @@ void PrepareCPUTensors(phi::DenseTensor* ids, ids->set_lod(lod); scores->set_lod(lod); - auto dims = phi::make_ddim({4, 3}); + auto dims = common::make_ddim({4, 3}); ids->Resize(dims); scores->Resize(dims); @@ -52,13 +52,13 @@ void PrepareCPUTensors(phi::DenseTensor* ids, } // pre_ids - pre_ids->Resize(phi::make_ddim({4, 1})); + pre_ids->Resize(common::make_ddim({4, 1})); for (int i = 0; i < 4; i++) { pre_ids->mutable_data(place)[i] = i + 1; } // pre_scores - pre_scores->Resize(phi::make_ddim({4, 1})); + pre_scores->Resize(common::make_ddim({4, 1})); for (int i = 0; i < 4; i++) { pre_scores->mutable_data(place)[i] = 0.1 * (i + 1); // NOLINT } diff --git a/test/cpp/fluid/math/concat_test.cc b/test/cpp/fluid/math/concat_test.cc index b350167cfb46b8..080a659ecdbbc6 100644 --- a/test/cpp/fluid/math/concat_test.cc +++ b/test/cpp/fluid/math/concat_test.cc @@ -37,9 +37,9 @@ void ConcatCase1(DeviceContext* context) { phi::DenseTensor input_b; phi::DenseTensor out; - auto dim_a = phi::make_ddim({2, 3, 4}); - auto dim_b = phi::make_ddim({3, 3, 4}); - auto dim_out = phi::make_ddim({5, 3, 4}); + auto dim_a = common::make_ddim({2, 3, 4}); + auto dim_b = common::make_ddim({3, 3, 4}); + auto dim_out = common::make_ddim({5, 3, 4}); input_a.mutable_data(dim_a, Place()); input_b.mutable_data(dim_b, Place()); @@ -142,9 +142,9 @@ void ConcatCase2(DeviceContext* context) { phi::DenseTensor input_b; phi::DenseTensor out; - auto dim_a = phi::make_ddim({2, 3, 4}); - auto dim_b = phi::make_ddim({2, 4, 4}); - auto dim_out = phi::make_ddim({2, 7, 4}); + auto dim_a = common::make_ddim({2, 3, 4}); + auto dim_b = common::make_ddim({2, 4, 4}); + auto dim_out = common::make_ddim({2, 7, 4}); input_a.mutable_data(dim_a, Place()); input_b.mutable_data(dim_b, Place()); @@ -251,9 +251,9 @@ void ConcatCase3(DeviceContext* context) { phi::DenseTensor input_b; phi::DenseTensor out; - auto dim_a = phi::make_ddim({2, 3, 4}); - auto dim_b = phi::make_ddim({2, 3, 5}); - auto dim_out = phi::make_ddim({2, 3, 9}); + auto dim_a = common::make_ddim({2, 3, 4}); + auto dim_b = common::make_ddim({2, 3, 5}); + auto dim_out = common::make_ddim({2, 3, 9}); input_a.mutable_data(dim_a, Place()); input_b.mutable_data(dim_b, Place()); @@ -362,9 +362,9 @@ void ConcatCase4(DeviceContext* context) { phi::DenseTensor input_b; phi::DenseTensor out; - auto dim_a = phi::make_ddim({2, 3, 4}); - auto dim_b = phi::make_ddim({2, 3, 4}); - auto dim_out = phi::make_ddim({2, 6, 4}); + auto dim_a = common::make_ddim({2, 3, 4}); + auto dim_b = common::make_ddim({2, 3, 4}); + auto dim_out = common::make_ddim({2, 6, 4}); input_a.mutable_data(dim_a, Place()); input_b.mutable_data(dim_b, Place()); diff --git a/test/cpp/fluid/math/selected_rows_functor_test.cc b/test/cpp/fluid/math/selected_rows_functor_test.cc index a2c88c723fefa6..a32140f4a9c35b 100644 --- a/test/cpp/fluid/math/selected_rows_functor_test.cc +++ b/test/cpp/fluid/math/selected_rows_functor_test.cc @@ -33,7 +33,7 @@ TEST(selected_rows_functor, cpu_add) { new phi::SelectedRows(rows1, height)}; auto* in1_value = selected_rows1->mutable_value(); in1_value->mutable_data( - phi::make_ddim({static_cast(rows1.size()), row_numel}), + common::make_ddim({static_cast(rows1.size()), row_numel}), cpu_place); functor(ctx, in1_value, 1.0); @@ -42,7 +42,7 @@ TEST(selected_rows_functor, cpu_add) { new phi::SelectedRows(rows2, height)}; auto* in2_value = selected_rows2->mutable_value(); in2_value->mutable_data( - phi::make_ddim({static_cast(rows2.size()), row_numel}), + common::make_ddim({static_cast(rows2.size()), row_numel}), cpu_place); functor(ctx, in2_value, 2.0); @@ -50,7 +50,7 @@ TEST(selected_rows_functor, cpu_add) { auto* out_value = output->mutable_value(); // simplely concat two SelectedRows - out_value->mutable_data(phi::make_ddim({7, 10}), cpu_place); + out_value->mutable_data(common::make_ddim({7, 10}), cpu_place); phi::funcs::SelectedRowsAdd add_functor; add_functor(ctx, *selected_rows1, *selected_rows2, output.get()); @@ -84,11 +84,13 @@ TEST(selected_rows_functor, cpu_add) { EXPECT_EQ(out_data[6 * row_numel + 9], 2.0); std::unique_ptr tensor1{new phi::DenseTensor()}; - tensor1->mutable_data(phi::make_ddim({height, row_numel}), cpu_place); + tensor1->mutable_data(common::make_ddim({height, row_numel}), + cpu_place); functor(ctx, tensor1.get(), 3.0); std::unique_ptr tensor2{new phi::DenseTensor()}; - tensor2->mutable_data(phi::make_ddim({height, row_numel}), cpu_place); + tensor2->mutable_data(common::make_ddim({height, row_numel}), + cpu_place); phi::funcs::SelectedRowsAddTensor add_tensor_functor; add_tensor_functor(ctx, *output, *tensor1, tensor2.get()); @@ -125,7 +127,7 @@ TEST(selected_rows_functor, cpu_add_to) { new phi::SelectedRows(rows1, height)}; auto* in1_value = selected_rows1->mutable_value(); in1_value->mutable_data( - phi::make_ddim({static_cast(rows1.size()), row_numel}), + common::make_ddim({static_cast(rows1.size()), row_numel}), cpu_place); functor(ctx, in1_value, 1.0); @@ -134,7 +136,7 @@ TEST(selected_rows_functor, cpu_add_to) { new phi::SelectedRows(rows2, height)}; auto* in2_value = selected_rows2->mutable_value(); in2_value->mutable_data( - phi::make_ddim({static_cast(rows2.size()), row_numel}), + common::make_ddim({static_cast(rows2.size()), row_numel}), cpu_place); functor(ctx, in2_value, 2.0); @@ -143,7 +145,7 @@ TEST(selected_rows_functor, cpu_add_to) { auto* out_value = output->mutable_value(); // simplely concat two SelectedRows - out_value->mutable_data(phi::make_ddim({7, 10}), cpu_place); + out_value->mutable_data(common::make_ddim({7, 10}), cpu_place); phi::funcs::SelectedRowsAddTo add_to_functor; add_to_functor(ctx, *selected_rows1, 0, output.get()); @@ -178,7 +180,8 @@ TEST(selected_rows_functor, cpu_add_to) { EXPECT_EQ(out_data[6 * row_numel + 9], 2.0); std::unique_ptr tensor1{new phi::DenseTensor()}; - tensor1->mutable_data(phi::make_ddim({height, row_numel}), cpu_place); + tensor1->mutable_data(common::make_ddim({height, row_numel}), + cpu_place); functor(ctx, tensor1.get(), 3.0); phi::funcs::SelectedRowsAddToTensor @@ -217,7 +220,7 @@ TEST(selected_rows_functor, cpu_merge_average_float) { new phi::SelectedRows(rows, height)}; auto* in_value = selected_rows->mutable_value(); in_value->mutable_data( - phi::make_ddim({static_cast(rows.size()), row_numel}), + common::make_ddim({static_cast(rows.size()), row_numel}), cpu_place); functor(ctx, in_value, 1.0); @@ -255,7 +258,7 @@ TEST(selected_rows_functor, cpu_merge_add_float) { new phi::SelectedRows(rows, height)}; auto* in_value = selected_rows->mutable_value(); in_value->mutable_data( - phi::make_ddim({static_cast(rows.size()), row_numel}), + common::make_ddim({static_cast(rows.size()), row_numel}), cpu_place); functor(ctx, in_value, 1.0); @@ -294,7 +297,7 @@ TEST(selected_rows_functor, cpu_merge_add_int) { new phi::SelectedRows(rows, height)}; auto* in_value = selected_rows->mutable_value(); in_value->mutable_data( - phi::make_ddim({static_cast(rows.size()), row_numel}), + common::make_ddim({static_cast(rows.size()), row_numel}), cpu_place); functor(ctx, in_value, 1); @@ -334,7 +337,7 @@ TEST(selected_rows_functor, cpu_merge_add_multi) { new phi::SelectedRows(rows1, height)}; auto* in1_value = selected_rows1->mutable_value(); in1_value->mutable_data( - phi::make_ddim({static_cast(rows1.size()), row_numel}), + common::make_ddim({static_cast(rows1.size()), row_numel}), cpu_place); set_const(ctx, in1_value, 1.0); @@ -343,7 +346,7 @@ TEST(selected_rows_functor, cpu_merge_add_multi) { new phi::SelectedRows(rows2, height)}; auto* in2_value = selected_rows2->mutable_value(); in2_value->mutable_data( - phi::make_ddim({static_cast(rows2.size()), row_numel}), + common::make_ddim({static_cast(rows2.size()), row_numel}), cpu_place); set_const(ctx, in2_value, 1.0); @@ -357,7 +360,7 @@ TEST(selected_rows_functor, cpu_merge_add_multi) { merge_add_functor(ctx, inputs, output.get()); EXPECT_EQ(output->height(), height); - EXPECT_EQ(output->value().dims(), phi::make_ddim({3, row_numel})); + EXPECT_EQ(output->value().dims(), common::make_ddim({3, row_numel})); std::vector ret_rows{2, 3, 5}; EXPECT_EQ(output->rows(), ret_rows); @@ -386,7 +389,7 @@ TEST(selected_rows_functor, cpu_merge_add_multi_noduplicated) { new phi::SelectedRows(rows1, height)}; auto* in1_value = selected_rows1->mutable_value(); in1_value->mutable_data( - phi::make_ddim({static_cast(rows1.size()), row_numel}), + common::make_ddim({static_cast(rows1.size()), row_numel}), cpu_place); set_const(ctx, in1_value, 1.0); @@ -395,7 +398,7 @@ TEST(selected_rows_functor, cpu_merge_add_multi_noduplicated) { new phi::SelectedRows(rows2, height)}; auto* in2_value = selected_rows2->mutable_value(); in2_value->mutable_data( - phi::make_ddim({static_cast(rows2.size()), row_numel}), + common::make_ddim({static_cast(rows2.size()), row_numel}), cpu_place); set_const(ctx, in2_value, 2.0); @@ -409,7 +412,7 @@ TEST(selected_rows_functor, cpu_merge_add_multi_noduplicated) { merge_add_functor(ctx, inputs, output.get()); EXPECT_EQ(output->height(), height); - EXPECT_EQ(output->value().dims(), phi::make_ddim({10, row_numel})); + EXPECT_EQ(output->value().dims(), common::make_ddim({10, row_numel})); std::vector ret_rows{1, 3, 5, 7, 9, 0, 2, 4, 6, 8}; EXPECT_EQ(output->rows(), ret_rows); @@ -442,7 +445,7 @@ TEST(selected_rows_functor, cpu_sum_to) { new phi::SelectedRows(rows1, height)}; auto* in1_value = selected_rows1->mutable_value(); in1_value->mutable_data( - phi::make_ddim({static_cast(rows1.size()), row_numel}), + common::make_ddim({static_cast(rows1.size()), row_numel}), cpu_place); functor(ctx, in1_value, 1.0); @@ -451,7 +454,7 @@ TEST(selected_rows_functor, cpu_sum_to) { new phi::SelectedRows(rows2, height)}; auto* in2_value = selected_rows2->mutable_value(); in2_value->mutable_data( - phi::make_ddim({static_cast(rows2.size()), row_numel}), + common::make_ddim({static_cast(rows2.size()), row_numel}), cpu_place); functor(ctx, in2_value, 2.0); @@ -459,7 +462,7 @@ TEST(selected_rows_functor, cpu_sum_to) { output->set_height(height); auto* out_value = output->mutable_value(); // simplely concat two SelectedRows - out_value->mutable_data(phi::make_ddim({7, 10}), cpu_place); + out_value->mutable_data(common::make_ddim({7, 10}), cpu_place); phi::funcs::SelectedRowsSumTo sum_to_functor; sum_to_functor(ctx, std::vector( @@ -491,7 +494,8 @@ TEST(selected_rows_functor, cpu_sum_to) { EXPECT_EQ(out_data[5 * row_numel + 7], 2.0); EXPECT_EQ(out_data[6 * row_numel + 9], 2.0); std::unique_ptr tensor1{new phi::DenseTensor()}; - tensor1->mutable_data(phi::make_ddim({height, row_numel}), cpu_place); + tensor1->mutable_data(common::make_ddim({height, row_numel}), + cpu_place); functor(ctx, tensor1.get(), 3.0); phi::funcs::SelectedRowsAddToTensor add_to_tensor_functor; diff --git a/test/cpp/fluid/math/selected_rows_functor_test.cu.cc b/test/cpp/fluid/math/selected_rows_functor_test.cu.cc index b507f096082f94..a11dbe7c8158f7 100644 --- a/test/cpp/fluid/math/selected_rows_functor_test.cu.cc +++ b/test/cpp/fluid/math/selected_rows_functor_test.cu.cc @@ -15,10 +15,10 @@ limitations under the License. */ #include "paddle/phi/kernels/funcs/selected_rows_functor.h" #include "gtest/gtest.h" +#include "paddle/common/errors.h" #include "paddle/phi/backends/context_pool.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/kernels/funcs/math_function.h" @@ -36,7 +36,7 @@ TEST(selected_rows_functor, gpu_add) { new phi::SelectedRows(rows1, height)}; auto* in1_value = selected_rows1->mutable_value(); in1_value->mutable_data( - phi::make_ddim({static_cast(rows1.size()), row_numel}), + common::make_ddim({static_cast(rows1.size()), row_numel}), gpu_place); functor(ctx, in1_value, 1.0); #ifdef PADDLE_WITH_HIP @@ -56,7 +56,7 @@ TEST(selected_rows_functor, gpu_add) { new phi::SelectedRows(rows2, height)}; auto* in2_value = selected_rows2->mutable_value(); in2_value->mutable_data( - phi::make_ddim({static_cast(rows2.size()), row_numel}), + common::make_ddim({static_cast(rows2.size()), row_numel}), gpu_place); functor(ctx, in2_value, 2.0); @@ -64,7 +64,7 @@ TEST(selected_rows_functor, gpu_add) { auto* out_value = output->mutable_value(); // simply concat two SelectedRows - out_value->mutable_data(phi::make_ddim({7, 10}), gpu_place); + out_value->mutable_data(common::make_ddim({7, 10}), gpu_place); phi::funcs::SelectedRowsAdd add_functor; add_functor(ctx, *selected_rows1, *selected_rows2, output.get()); @@ -101,11 +101,13 @@ TEST(selected_rows_functor, gpu_add) { EXPECT_EQ(out_cpu_data[6 * row_numel + 9], 2.0); std::unique_ptr tensor1{new phi::DenseTensor()}; - tensor1->mutable_data(phi::make_ddim({height, row_numel}), gpu_place); + tensor1->mutable_data(common::make_ddim({height, row_numel}), + gpu_place); functor(ctx, tensor1.get(), 3.0); std::unique_ptr tensor2{new phi::DenseTensor()}; - tensor2->mutable_data(phi::make_ddim({height, row_numel}), gpu_place); + tensor2->mutable_data(common::make_ddim({height, row_numel}), + gpu_place); phi::funcs::SelectedRowsAddTensor add_tensor_functor; add_tensor_functor(ctx, *output, *tensor1, tensor2.get()); @@ -144,7 +146,7 @@ TEST(selected_rows_functor, gpu_add_to) { new phi::SelectedRows(rows1, height)}; auto* in1_value = selected_rows1->mutable_value(); in1_value->mutable_data( - phi::make_ddim({static_cast(rows1.size()), row_numel}), + common::make_ddim({static_cast(rows1.size()), row_numel}), gpu_place); functor(ctx, in1_value, 1.0); @@ -153,7 +155,7 @@ TEST(selected_rows_functor, gpu_add_to) { new phi::SelectedRows(rows2, height)}; auto* in2_value = selected_rows2->mutable_value(); in2_value->mutable_data( - phi::make_ddim({static_cast(rows2.size()), row_numel}), + common::make_ddim({static_cast(rows2.size()), row_numel}), gpu_place); functor(ctx, in2_value, 2.0); @@ -162,7 +164,7 @@ TEST(selected_rows_functor, gpu_add_to) { auto* out_value = output->mutable_value(); // simply concat two SelectedRows - out_value->mutable_data(phi::make_ddim({7, 10}), gpu_place); + out_value->mutable_data(common::make_ddim({7, 10}), gpu_place); phi::funcs::SelectedRowsAddTo add_to_functor; add_to_functor(ctx, *selected_rows1, 0, output.get()); @@ -200,7 +202,8 @@ TEST(selected_rows_functor, gpu_add_to) { EXPECT_EQ(out_cpu_data[6 * row_numel + 9], 2.0); std::unique_ptr tensor1{new phi::DenseTensor()}; - tensor1->mutable_data(phi::make_ddim({height, row_numel}), gpu_place); + tensor1->mutable_data(common::make_ddim({height, row_numel}), + gpu_place); functor(ctx, tensor1.get(), 3.0); phi::funcs::SelectedRowsAddToTensor @@ -242,7 +245,7 @@ TEST(selected_rows_functor, gpu_merge_add) { new phi::SelectedRows(rows1, height)}; auto* in1_value = selected_rows1->mutable_value(); in1_value->mutable_data( - phi::make_ddim({static_cast(rows1.size()), row_numel}), + common::make_ddim({static_cast(rows1.size()), row_numel}), gpu_place); set_const(ctx, in1_value, 1.0); @@ -251,7 +254,7 @@ TEST(selected_rows_functor, gpu_merge_add) { new phi::SelectedRows(rows2, height)}; auto* in2_value = selected_rows2->mutable_value(); in2_value->mutable_data( - phi::make_ddim({static_cast(rows2.size()), row_numel}), + common::make_ddim({static_cast(rows2.size()), row_numel}), gpu_place); set_const(ctx, in2_value, 1.0); @@ -268,7 +271,7 @@ TEST(selected_rows_functor, gpu_merge_add) { phi::Copy(ctx, output->value(), cpu_place, true, &output_cpu); EXPECT_EQ(output->height(), height); - EXPECT_EQ(output->value().dims(), phi::make_ddim({3, row_numel})); + EXPECT_EQ(output->value().dims(), common::make_ddim({3, row_numel})); std::vector ret_rows{2, 3, 5}; EXPECT_EQ(output->rows(), ret_rows); diff --git a/test/cpp/fluid/mkldnn/CMakeLists.txt b/test/cpp/fluid/mkldnn/CMakeLists.txt index f83fd91963be20..22ea64bdbdb0c1 100644 --- a/test/cpp/fluid/mkldnn/CMakeLists.txt +++ b/test/cpp/fluid/mkldnn/CMakeLists.txt @@ -7,6 +7,7 @@ cc_test( elementwise_add_op activation_op phi + common scope device_context enforce @@ -21,6 +22,7 @@ cc_test( cpu_quantize_placement_pass cpu_quantize_pass phi + common scope device_context) @@ -32,6 +34,7 @@ cc_test( depthwise_conv tensor phi + common scope device_context enforce @@ -43,6 +46,7 @@ set(TEST_MKLDNN_CACHING_DEPS elementwise_add_op activation_op phi + common scope device_context enforce @@ -74,6 +78,7 @@ if(WIN32 AND WITH_TESTING) generated_op generated_static_op phi + common transpose_op fused_transpose_op scope @@ -95,6 +100,7 @@ cc_test( generated_static_op generated_op phi + common scope device_context enforce) diff --git a/test/cpp/fluid/mkldnn/test_mkldnn_caching.cc b/test/cpp/fluid/mkldnn/test_mkldnn_caching.cc index 24be9e518d37a5..0f62301cdfe6df 100644 --- a/test/cpp/fluid/mkldnn/test_mkldnn_caching.cc +++ b/test/cpp/fluid/mkldnn/test_mkldnn_caching.cc @@ -102,7 +102,7 @@ void RunOperator(const platform::Place &place, std::uniform_real_distribution dist(static_cast(10.0), static_cast(20.0)); std::mt19937 engine; - size_t numel = static_cast(phi::product(dims)); + size_t numel = static_cast(common::product(dims)); for (int i = 0; i < num_inputs[op_type]; ++i) { input_names[i].tensor->Resize(dims); auto data_ptr = input_names[i].tensor->mutable_data(place); diff --git a/test/cpp/fluid/mkldnn/test_mkldnn_op_inplace.cc b/test/cpp/fluid/mkldnn/test_mkldnn_op_inplace.cc index 4beb314fe6b76b..1c1a0cfb219140 100644 --- a/test/cpp/fluid/mkldnn/test_mkldnn_op_inplace.cc +++ b/test/cpp/fluid/mkldnn/test_mkldnn_op_inplace.cc @@ -69,7 +69,7 @@ bool TestMain(const platform::Place &place, std::uniform_real_distribution dist(static_cast(10.0), static_cast(20.0)); std::mt19937 engine; - size_t numel = static_cast(phi::product(dims)); + size_t numel = static_cast(common::product(dims)); for (int i = 0; i < num_inputs; ++i) { input_names[i].tensor->Resize(dims); auto data_ptr = input_names[i].tensor->mutable_data(place); diff --git a/test/cpp/fluid/mkldnn/test_mkldnn_op_nhwc.cc b/test/cpp/fluid/mkldnn/test_mkldnn_op_nhwc.cc index b152623a6ddcdd..bf652e2de943ed 100644 --- a/test/cpp/fluid/mkldnn/test_mkldnn_op_nhwc.cc +++ b/test/cpp/fluid/mkldnn/test_mkldnn_op_nhwc.cc @@ -62,7 +62,7 @@ void Test_Pool2d_Transpose_NHWC(const std::string &transpose_type) { std::uniform_real_distribution dist(static_cast(10.0), static_cast(20.0)); std::mt19937 engine; - size_t numel = static_cast(phi::product(dims)); + size_t numel = static_cast(common::product(dims)); input_name.tensor->Resize(dims); auto data_ptr = input_name.tensor->mutable_data(p); for (size_t i = 0; i < numel; ++i) { @@ -123,7 +123,7 @@ TEST(test_pool2d_relu_relu_nhwc, cpu_place) { std::uniform_real_distribution dist(static_cast(10.0), static_cast(20.0)); std::mt19937 engine; - size_t numel = static_cast(phi::product(dims)); + size_t numel = static_cast(common::product(dims)); input_name.tensor->Resize(dims); auto data_ptr = input_name.tensor->mutable_data(p); for (size_t i = 0; i < numel; ++i) { @@ -186,7 +186,7 @@ TEST(test_pool2d_shape_nhwc, cpu_place) { std::uniform_real_distribution dist(static_cast(10.0), static_cast(20.0)); std::mt19937 engine; - size_t numel = static_cast(phi::product(dims)); + size_t numel = static_cast(common::product(dims)); input_name.tensor->Resize(dims); auto data_ptr = input_name.tensor->mutable_data(p); for (size_t i = 0; i < numel; ++i) { @@ -242,7 +242,7 @@ TEST(test_pool2d_crop_nhwc, cpu_place) { // Initialize input data std::uniform_real_distribution dist(10.0f, 20.0f); std::mt19937 engine; - size_t numel = static_cast(phi::product(dims)); + size_t numel = static_cast(common::product(dims)); input_name.tensor->Resize(dims); auto data_ptr = input_name.tensor->mutable_data(p); for (size_t i = 0; i < numel; ++i) { @@ -250,11 +250,11 @@ TEST(test_pool2d_crop_nhwc, cpu_place) { } // Second input (Y) to crop is having no buffer // but as it is MKLDNN then its shape order should be NCHW - auto expected_dims_nchw = phi::vectorize(expected_dims); + auto expected_dims_nchw = common::vectorize(expected_dims); std::rotate(expected_dims_nchw.begin() + 1, expected_dims_nchw.end() - 1, expected_dims_nchw.end()); - second_crop_input_name.tensor->Resize(phi::make_ddim(expected_dims_nchw)); + second_crop_input_name.tensor->Resize(common::make_ddim(expected_dims_nchw)); const auto second_crop_input_md = dnnl::memory::desc(expected_dims_nchw, dnnl::memory::data_type::f32, diff --git a/test/cpp/fluid/nccl/nccl_op_test.cu.cc b/test/cpp/fluid/nccl/nccl_op_test.cu.cc index 87c0708e12d398..b8a47b97031653 100644 --- a/test/cpp/fluid/nccl/nccl_op_test.cu.cc +++ b/test/cpp/fluid/nccl/nccl_op_test.cu.cc @@ -102,7 +102,7 @@ class NCCLTester : public ::testing::Test { if (!send_tensor->numel()) { send_tensor->mutable_data(kDims, place); - std::vector send_vector(phi::product(kDims), GetGPUData(gpu_id)); + std::vector send_vector(common::product(kDims), GetGPUData(gpu_id)); paddle::framework::TensorFromVector(send_vector, *ctx, send_tensor); VLOG(1) << "Send Tensor filled with elements " << send_tensor->numel(); } @@ -111,7 +111,7 @@ class NCCLTester : public ::testing::Test { PADDLE_ENFORCE_EQ( send_tensor->numel(), - phi::product(kDims), + common::product(kDims), paddle::platform::errors::InvalidArgument("Tensor numel not match!")); auto op = f::OpRegistry::CreateOp(*op1); @@ -184,7 +184,7 @@ void NCCLTester::testNcclAllReduceOp() { dev_ctx->stream()); dev_ctx->Wait(); - for (int64_t j = 0; j < phi::product(kDims); ++j) { + for (int64_t j = 0; j < common::product(kDims); ++j) { ASSERT_NEAR(ct[j], expected_result, 1e-5); } } @@ -241,7 +241,7 @@ void NCCLTester::testNcclReduceOp() { dev_ctx->stream()); dev_ctx->Wait(); - for (int64_t j = 0; j < phi::product(kDims); ++j) { + for (int64_t j = 0; j < common::product(kDims); ++j) { ASSERT_NEAR(ct[j], expected_result, 1e-5); } } @@ -299,7 +299,7 @@ void NCCLTester::testNcclBcastOp() { dev_ctx->stream()); dev_ctx->Wait(); - for (int64_t j = 0; j < phi::product(kDims); ++j) { + for (int64_t j = 0; j < common::product(kDims); ++j) { ASSERT_NEAR(ct[j], result, 1e-5); } } diff --git a/test/cpp/fluid/pscore/CMakeLists.txt b/test/cpp/fluid/pscore/CMakeLists.txt index eb6d3b4385487a..9413c8aaa43f92 100644 --- a/test/cpp/fluid/pscore/CMakeLists.txt +++ b/test/cpp/fluid/pscore/CMakeLists.txt @@ -51,25 +51,28 @@ endif() set_source_files_properties( heter_server_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -paddle_test(heter_server_test SRCS heter_server_test.cc) +paddle_test(heter_server_test SRCS heter_server_test.cc DEPS common) set_source_files_properties( send_and_recv_op_cpu_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -paddle_test(send_and_recv_cpu_test SRCS send_and_recv_op_cpu_test.cc) +paddle_test(send_and_recv_cpu_test SRCS send_and_recv_op_cpu_test.cc DEPS + common) set_source_files_properties( send_and_recv_op_gpu_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -paddle_test(send_and_recv_gpu_test SRCS send_and_recv_op_gpu_test.cc) +paddle_test(send_and_recv_gpu_test SRCS send_and_recv_op_gpu_test.cc DEPS + common) set_source_files_properties( heter_listen_and_server_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -paddle_test(heter_listen_and_server_test SRCS heter_listen_and_server_test.cc) +paddle_test(heter_listen_and_server_test SRCS heter_listen_and_server_test.cc + DEPS common) #set_source_files_properties(heter_cloud_comm_cpu_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -#cc_test(heter_cloud_comm_cpu_test SRCS heter_cloud_comm_cpu_test.cc DEPS executor scope proto_desc generated_static_op heter_listen_and_serv_op ${RPC_DEPS} ${DISTRIBUTE_DEPS} phi) +#cc_test(heter_cloud_comm_cpu_test SRCS heter_cloud_comm_cpu_test.cc DEPS executor scope proto_desc generated_static_op heter_listen_and_serv_op ${RPC_DEPS} ${DISTRIBUTE_DEPS} phi common) set_source_files_properties( switch_server_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) @@ -80,9 +83,11 @@ cc_binary( DEPS executor scope + common proto_desc generated_op heter_listen_and_serv_op ${RPC_DEPS} ${DISTRIBUTE_DEPS} - phi) + phi + common) diff --git a/test/cpp/fluid/scatter_test.cc b/test/cpp/fluid/scatter_test.cc index 7f774089fd9ca5..f4fa2c9894c642 100644 --- a/test/cpp/fluid/scatter_test.cc +++ b/test/cpp/fluid/scatter_test.cc @@ -24,9 +24,9 @@ TEST(scatter, ScatterUpdate) { phi::DenseTensor index; phi::DenseTensor output; - auto* p_src = src.mutable_data(phi::make_ddim({1, 4}), + auto* p_src = src.mutable_data(common::make_ddim({1, 4}), paddle::platform::CPUPlace()); - auto* p_index = index.mutable_data(phi::make_ddim({1}), + auto* p_index = index.mutable_data(common::make_ddim({1}), paddle::platform::CPUPlace()); for (size_t i = 0; i < 4; ++i) { @@ -34,7 +34,7 @@ TEST(scatter, ScatterUpdate) { } p_index[0] = 1; - auto* p_output = output.mutable_data(phi::make_ddim({4, 4}), + auto* p_output = output.mutable_data(common::make_ddim({4, 4}), paddle::platform::CPUPlace()); for (int64_t i = 0; i < output.numel(); ++i) { diff --git a/test/cpp/fluid/test_common_infer_shape_functions.cc b/test/cpp/fluid/test_common_infer_shape_functions.cc index 84332f110216c3..8519e6ca7f8494 100644 --- a/test/cpp/fluid/test_common_infer_shape_functions.cc +++ b/test/cpp/fluid/test_common_infer_shape_functions.cc @@ -13,12 +13,12 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "gtest/gtest.h" +#include "paddle/common/ddim.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/var_type.h" #include "paddle/fluid/imperative/infer_shape_context.h" #include "paddle/fluid/imperative/layer.h" #include "paddle/fluid/operators/common_infer_shape_functions.h" -#include "paddle/phi/core/ddim.h" USE_OP_ITSELF(relu); USE_OP_ITSELF(elementwise_add); diff --git a/test/cpp/imperative/CMakeLists.txt b/test/cpp/imperative/CMakeLists.txt index 82ae6c7a3fa749..491a008a963283 100644 --- a/test/cpp/imperative/CMakeLists.txt +++ b/test/cpp/imperative/CMakeLists.txt @@ -32,7 +32,7 @@ endif() cc_test( test_gradient_accmulator SRCS test_gradient_accmulator.cc - DEPS memcpy selected_rows_utils gradient_accumulator phi phi_utils) + DEPS memcpy selected_rows_utils gradient_accumulator phi common phi_utils) cc_test( test_layer SRCS test_layer.cc diff --git a/test/cpp/imperative/heter_ccl_context_test.cc b/test/cpp/imperative/heter_ccl_context_test.cc index 8c544669dc799b..37520d10f172af 100644 --- a/test/cpp/imperative/heter_ccl_context_test.cc +++ b/test/cpp/imperative/heter_ccl_context_test.cc @@ -50,7 +50,7 @@ void AllReduceByStream(int local_rank, int device_id) { // input and output data framework::Variable* src_dev_var(new framework::Variable()); auto* src_dev_tensor = src_dev_var->GetMutable(); - src_dev_tensor->mutable_data(phi::make_ddim({data_size}), place); + src_dev_tensor->mutable_data(common::make_ddim({data_size}), place); std::vector src_vec; for (int i = 0; i < data_size; i++) { @@ -61,7 +61,7 @@ void AllReduceByStream(int local_rank, int device_id) { framework::Variable* dst_dev_var(new framework::Variable()); auto* dst_dev_tensor = dst_dev_var->GetMutable(); - dst_dev_tensor->mutable_data(phi::make_ddim({data_size}), place); + dst_dev_tensor->mutable_data(common::make_ddim({data_size}), place); // call allreduce hpc.AllReduceByStream(*src_dev_var, dst_dev_var, 0, false); diff --git a/test/cpp/imperative/nccl_context_test.cc b/test/cpp/imperative/nccl_context_test.cc index 80bd28e1c8b03f..8b9958ee561824 100644 --- a/test/cpp/imperative/nccl_context_test.cc +++ b/test/cpp/imperative/nccl_context_test.cc @@ -87,7 +87,7 @@ void Broadcast(int local_rank, int device_id) { framework::Variable* src_dev_var(new framework::Variable()); auto* src_dev_tensor = src_dev_var->GetMutable(); - src_dev_tensor->mutable_data(phi::make_ddim({data_size}), place); + src_dev_tensor->mutable_data(common::make_ddim({data_size}), place); // fill data for rank 0 only std::vector src_vec; diff --git a/test/cpp/imperative/test_gradient_accmulator.cc b/test/cpp/imperative/test_gradient_accmulator.cc index bb264250ecf567..0af376da0a7310 100644 --- a/test/cpp/imperative/test_gradient_accmulator.cc +++ b/test/cpp/imperative/test_gradient_accmulator.cc @@ -40,7 +40,8 @@ TEST(Test__SelectedRowsMerge_Test, SelectedRowsMerge) { auto sr2 = std::make_shared(rows, table_size); // initialize a sparse table 1 - sr1->mutable_value()->Resize(phi::make_ddim({table_size, embedding_width})); + sr1->mutable_value()->Resize( + common::make_ddim({table_size, embedding_width})); auto* data_sr1 = sr1->mutable_value()->mutable_data(cpu); for (int64_t i = 0; i < table_size; ++i) { for (int64_t j = 0; j < embedding_width; ++j) { @@ -49,7 +50,8 @@ TEST(Test__SelectedRowsMerge_Test, SelectedRowsMerge) { } // initialize a sparse table 2 - sr2->mutable_value()->Resize(phi::make_ddim({table_size, embedding_width})); + sr2->mutable_value()->Resize( + common::make_ddim({table_size, embedding_width})); auto* data_sr2 = sr2->mutable_value()->mutable_data(cpu); for (int64_t i = 0; i < table_size; ++i) { for (int64_t j = 0; j < embedding_width; ++j) { @@ -92,8 +94,8 @@ int TensorddTest(Place1 place1, Place2 place2, T t1, T t2) { std::vector dims = {2, 5}; auto* src = var1.GetMutable(); auto* dst = var2.GetMutable(); - src->Resize(phi::make_ddim(dims)); - dst->Resize(phi::make_ddim(dims)); + src->Resize(common::make_ddim(dims)); + dst->Resize(common::make_ddim(dims)); auto* src_mutable = src->mutable_data(place1); auto* dst_mutable = dst->mutable_data(place2); diff --git a/test/cpp/imperative/test_group.cc b/test/cpp/imperative/test_group.cc index f2eeb24b7eccef..2243a24dee90d0 100644 --- a/test/cpp/imperative/test_group.cc +++ b/test/cpp/imperative/test_group.cc @@ -99,7 +99,7 @@ void GroupConcatSplit(Place place, size_t size) { { // concat auto* tensor = group.dense_contents_.GetMutable(); - tensor->Resize(phi::make_ddim({group.all_length_})) + tensor->Resize(common::make_ddim({group.all_length_})) .mutable_data(place, framework::TransToPhiDataType(group.dtype_)); group.ConcatTensors(*dev_ctx); diff --git a/test/cpp/imperative/test_hooks.cc b/test/cpp/imperative/test_hooks.cc index 5307139a42652e..3118d38be3a933 100644 --- a/test/cpp/imperative/test_hooks.cc +++ b/test/cpp/imperative/test_hooks.cc @@ -85,7 +85,7 @@ TEST(TestHooks, TestGradVarLeafBackwardHook) { auto* x_tensor = x->MutableVar()->GetMutable(); auto* y_tensor = y->MutableVar()->GetMutable(); - x_tensor->Resize(phi::make_ddim(x_dims)); + x_tensor->Resize(common::make_ddim(x_dims)); auto* mutable_x = x_tensor->mutable_data(place); memory::Copy(place, mutable_x, @@ -93,7 +93,7 @@ TEST(TestHooks, TestGradVarLeafBackwardHook) { src_data.data(), sizeof(float) * src_data.size()); - y_tensor->Resize(phi::make_ddim(y_dims)); + y_tensor->Resize(common::make_ddim(y_dims)); auto* mutable_y = y_tensor->mutable_data(place); memory::Copy(place, mutable_y, @@ -175,7 +175,7 @@ void GradVarLeafBackwardHookWithGradAccmulatedTest() { auto* y_tensor = y->MutableVar()->GetMutable(); auto* z_tensor = z->MutableVar()->GetMutable(); - x_tensor->Resize(phi::make_ddim(x_dims)); + x_tensor->Resize(common::make_ddim(x_dims)); auto* mutable_x = x_tensor->mutable_data(place); memory::Copy(place, mutable_x, @@ -183,7 +183,7 @@ void GradVarLeafBackwardHookWithGradAccmulatedTest() { src_data.data(), sizeof(float) * src_data.size()); - y_tensor->Resize(phi::make_ddim(y_dims)); + y_tensor->Resize(common::make_ddim(y_dims)); auto* mutable_y = y_tensor->mutable_data(place); memory::Copy(place, mutable_y, @@ -191,7 +191,7 @@ void GradVarLeafBackwardHookWithGradAccmulatedTest() { src_data.data(), sizeof(float) * src_data.size()); - z_tensor->Resize(phi::make_ddim(z_dims)); + z_tensor->Resize(common::make_ddim(z_dims)); auto* mutable_z = z_tensor->mutable_data(place); memory::Copy(place, mutable_z, diff --git a/test/cpp/imperative/test_prepare_op.cc b/test/cpp/imperative/test_prepare_op.cc index 22473cac68dfef..e46390d88bdc4f 100644 --- a/test/cpp/imperative/test_prepare_op.cc +++ b/test/cpp/imperative/test_prepare_op.cc @@ -138,7 +138,7 @@ TEST(test_prepare_op, test_prepare_data) { // prepare an cpu only input auto* vin_tensor = vin->MutableVar()->GetMutable(); - vin_tensor->Resize(phi::make_ddim(dims)); + vin_tensor->Resize(common::make_ddim(dims)); auto* vin_mutable_tensor = vin_tensor->mutable_data(cpu_place); paddle::memory::Copy(cpu_place, vin_mutable_tensor, @@ -196,7 +196,7 @@ void TestPrepareDataSamePlace(framework::AttributeMap attr_map) { // prepare an cpu only input auto* vin_tensor = vin->MutableVar()->GetMutable(); - vin_tensor->Resize(phi::make_ddim(dims)); + vin_tensor->Resize(common::make_ddim(dims)); auto* vin_mutable_tensor = vin_tensor->mutable_data(cpu_place); paddle::memory::Copy(cpu_place, vin_mutable_tensor, diff --git a/test/cpp/imperative/test_tracer.cc b/test/cpp/imperative/test_tracer.cc index efb7dbf3603ec7..5c29b61dfbe23b 100644 --- a/test/cpp/imperative/test_tracer.cc +++ b/test/cpp/imperative/test_tracer.cc @@ -72,14 +72,14 @@ TEST(test_tracer, test_trace_op) { auto* x_in_tensor = x_in->MutableVar()->GetMutable(); auto* y_in_tensor = y_in->MutableVar()->GetMutable(); - x_in_tensor->Resize(phi::make_ddim(dims1)); + x_in_tensor->Resize(common::make_ddim(dims1)); auto* mutable_x = x_in_tensor->mutable_data(place); paddle::memory::Copy(place, mutable_x, place, src_data.data(), sizeof(float) * src_data.size()); - y_in_tensor->Resize(phi::make_ddim(dims2)); + y_in_tensor->Resize(common::make_ddim(dims2)); auto* mutable_y = y_in_tensor->mutable_data(place); paddle::memory::Copy(place, mutable_y, @@ -124,14 +124,14 @@ TEST(test_tracer, test_trace_op_with_backward) { auto* x_in_tensor = x_in->MutableVar()->GetMutable(); auto* y_in_tensor = y_in->MutableVar()->GetMutable(); - x_in_tensor->Resize(phi::make_ddim(dims1)); + x_in_tensor->Resize(common::make_ddim(dims1)); auto* mutable_x = x_in_tensor->mutable_data(place); paddle::memory::Copy(place, mutable_x, place, src_data.data(), sizeof(float) * src_data.size()); - y_in_tensor->Resize(phi::make_ddim(dims2)); + y_in_tensor->Resize(common::make_ddim(dims2)); auto* mutable_y = y_in_tensor->mutable_data(place); paddle::memory::Copy(place, mutable_y, @@ -170,14 +170,14 @@ TEST(test_tracer, test_track_backward_output) { auto* x_in_tensor = x_in->MutableVar()->GetMutable(); auto* y_in_tensor = y_in->MutableVar()->GetMutable(); - x_in_tensor->Resize(phi::make_ddim(dims1)); + x_in_tensor->Resize(common::make_ddim(dims1)); auto* mutable_x = x_in_tensor->mutable_data(place); paddle::memory::Copy(place, mutable_x, place, src_data.data(), sizeof(float) * src_data.size()); - y_in_tensor->Resize(phi::make_ddim(dims2)); + y_in_tensor->Resize(common::make_ddim(dims2)); auto* mutable_y = y_in_tensor->mutable_data(place); paddle::memory::Copy(place, mutable_y, @@ -215,14 +215,14 @@ TEST(test_tracer, test_track_backward_input) { auto* x_in_tensor = x_in->MutableVar()->GetMutable(); auto* y_in_tensor = y_in->MutableVar()->GetMutable(); - x_in_tensor->Resize(phi::make_ddim(dims1)); + x_in_tensor->Resize(common::make_ddim(dims1)); auto* mutable_x = x_in_tensor->mutable_data(place); paddle::memory::Copy(place, mutable_x, place, src_data.data(), sizeof(float) * src_data.size()); - y_in_tensor->Resize(phi::make_ddim(dims2)); + y_in_tensor->Resize(common::make_ddim(dims2)); auto* mutable_y = y_in_tensor->mutable_data(place); paddle::memory::Copy(place, mutable_y, @@ -263,14 +263,14 @@ TEST(test_tracer, test_trace_op_with_multi_device_inputs) { auto* x_in_tensor = x_in->MutableVar()->GetMutable(); auto* y_in_tensor = y_in->MutableVar()->GetMutable(); - x_in_tensor->Resize(phi::make_ddim(dims1)); + x_in_tensor->Resize(common::make_ddim(dims1)); auto* mutable_x = x_in_tensor->mutable_data(place); paddle::memory::Copy(place, mutable_x, place, src_data.data(), sizeof(float) * src_data.size()); - y_in_tensor->Resize(phi::make_ddim(dims2)); + y_in_tensor->Resize(common::make_ddim(dims2)); auto* mutable_y = y_in_tensor->mutable_data(gpu_place); paddle::memory::Copy(gpu_place, mutable_y, @@ -400,14 +400,14 @@ TEST(test_tracer, test_var_without_grad_var) { auto* x_in_tensor = x_in->MutableVar()->GetMutable(); auto* y_in_tensor = y_in->MutableVar()->GetMutable(); - x_in_tensor->Resize(phi::make_ddim(dims1)); + x_in_tensor->Resize(common::make_ddim(dims1)); auto* mutable_x = x_in_tensor->mutable_data(place); paddle::memory::Copy(place, mutable_x, place, src_data.data(), sizeof(float) * src_data.size()); - y_in_tensor->Resize(phi::make_ddim(dims2)); + y_in_tensor->Resize(common::make_ddim(dims2)); auto* mutable_y = y_in_tensor->mutable_data(place); paddle::memory::Copy(place, mutable_y, @@ -619,14 +619,14 @@ TEST(test_tracer, eager_tracer) { auto* x_in_tensor = x_in->MutableVar()->GetMutable(); auto* y_in_tensor = y_in->MutableVar()->GetMutable(); - x_in_tensor->Resize(phi::make_ddim(dims1)); + x_in_tensor->Resize(common::make_ddim(dims1)); auto* mutable_x = x_in_tensor->mutable_data(place); paddle::memory::Copy(place, mutable_x, place, src_data.data(), sizeof(float) * src_data.size()); - y_in_tensor->Resize(phi::make_ddim(dims2)); + y_in_tensor->Resize(common::make_ddim(dims2)); auto* mutable_y = y_in_tensor->mutable_data(place); paddle::memory::Copy(place, mutable_y, diff --git a/test/cpp/inference/analysis/CMakeLists.txt b/test/cpp/inference/analysis/CMakeLists.txt index 74a1e91fc4dc1c..5094272adaadf1 100644 --- a/test/cpp/inference/analysis/CMakeLists.txt +++ b/test/cpp/inference/analysis/CMakeLists.txt @@ -44,6 +44,7 @@ if(NOT APPLE) SRCS analyzer_tester.cc EXTRA_DEPS + common paddle_inference_shared ARGS --inference_model_dir=${WORD2VEC_MODEL_DIR}) diff --git a/test/cpp/inference/api/CMakeLists.txt b/test/cpp/inference/api/CMakeLists.txt index a0ac3631b7181d..bb4a8ed761ad60 100644 --- a/test/cpp/inference/api/CMakeLists.txt +++ b/test/cpp/inference/api/CMakeLists.txt @@ -7,12 +7,12 @@ set(inference_api_tester_deps paddle_inference_api analysis_config) cc_test( test_paddle_inference_api SRCS api_tester.cc - DEPS ${inference_api_tester_deps}) + DEPS ${inference_api_tester_deps} common) cc_test( inference_api_helper_test SRCS helper_test.cc - DEPS ${inference_api_tester_deps}) + DEPS ${inference_api_tester_deps} common) if(WITH_ONNXRUNTIME AND WIN32) # Copy onnxruntime for some c++ test in Windows, since the test will @@ -120,6 +120,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) SRCS ${filename} EXTRA_DEPS + common paddle_inference_shared ARGS --infer_model=${install_dir}/model @@ -133,6 +134,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) SRCS ${filename} EXTRA_DEPS + common paddle_inference_shared ARGS --infer_model=${install_dir}/model @@ -150,6 +152,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) SRCS ${filename} EXTRA_DEPS + common paddle_inference_shared ARGS --infer_model=${install_dir}/mobilenet_v2_models/1 @@ -159,7 +162,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) function(inference_analysis_api_test_build TARGET_NAME filename) inference_analysis_test_build(${TARGET_NAME} SRCS ${filename} EXTRA_DEPS - paddle_inference_shared) + common paddle_inference_shared) endfunction() function(inference_analysis_api_int8_test_run TARGET_NAME test_binary @@ -219,7 +222,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) function(inference_analysis_api_test_with_fake_data_build TARGET_NAME filename) inference_analysis_test_build(${TARGET_NAME} SRCS ${filename} EXTRA_DEPS - paddle_inference_shared) + common paddle_inference_shared) endfunction() function(inference_analysis_api_test_with_fake_data_run TARGET_NAME @@ -325,7 +328,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) download_model_and_data_without_verify( ${RNN1_INSTALL_DIR} "rnn1/model.tar.gz" "rnn1/data.txt.tar.gz") inference_analysis_api_test(test_analyzer_rnn1 ${RNN1_INSTALL_DIR} - analyzer_rnn1_tester.cc) + analyzer_rnn1_tester.cc EXTRA_DEPS common) # seq_pool1 set(SEQ_POOL1_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/seq_pool") @@ -334,19 +337,19 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) "seq_pool1_data.txt.tar.gz") inference_analysis_api_test( test_analyzer_seq_pool1_compare_determine ${SEQ_POOL1_INSTALL_DIR} - analyzer_seq_pool1_compare_determine_tester.cc) + analyzer_seq_pool1_compare_determine_tester.cc EXTRA_DEPS common) inference_analysis_api_test( test_analyzer_seq_pool1 ${SEQ_POOL1_INSTALL_DIR} - analyzer_seq_pool1_compare_tester.cc) + analyzer_seq_pool1_compare_tester.cc EXTRA_DEPS common) inference_analysis_api_test( test_analyzer_seq_pool1_fuse_compare_zero_copy ${SEQ_POOL1_INSTALL_DIR} - analyzer_seq_pool1_fuse_compare_zero_copy_tester.cc) + analyzer_seq_pool1_fuse_compare_zero_copy_tester.cc EXTRA_DEPS common) inference_analysis_api_test( test_analyzer_seq_pool1_fuse_statis ${SEQ_POOL1_INSTALL_DIR} - analyzer_seq_pool1_fuse_statis_tester.cc) + analyzer_seq_pool1_fuse_statis_tester.cc EXTRA_DEPS common) inference_analysis_api_test( test_analyzer_seq_pool1_profile ${SEQ_POOL1_INSTALL_DIR} - analyzer_seq_pool1_profile_tester.cc) + analyzer_seq_pool1_profile_tester.cc EXTRA_DEPS common) if(NOT WIN32) set_tests_properties(test_analyzer_seq_pool1_compare_determine PROPERTIES TIMEOUT 120) @@ -376,7 +379,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) download_model_and_data_without_verify( ${RNN2_INSTALL_DIR} "rnn2_model.tar.gz" "rnn2_data.txt.tar.gz") inference_analysis_api_test(test_analyzer_rnn2 ${RNN2_INSTALL_DIR} - analyzer_rnn2_tester.cc) + analyzer_rnn2_tester.cc EXTRA_DEPS common) # TODO(luotao, Superjom) Disable DAM test, temporarily fix # https://github.com/PaddlePaddle/Paddle/issues/15032#issuecomment-455990914. @@ -385,7 +388,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) set(DAM_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/dam") download_model_and_data_without_verify(${DAM_INSTALL_DIR} "DAM_model.tar.gz" "DAM_data.txt.tar.gz") - #inference_analysis_api_test(test_analyzer_dam ${DAM_INSTALL_DIR} analyzer_dam_tester.cc EXTRA_DEPS legacy_allocator) + #inference_analysis_api_test(test_analyzer_dam ${DAM_INSTALL_DIR} analyzer_dam_tester.cc EXTRA_DEPS legacy_allocator common) # small DAM set(DAM_SMALL_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/small_dam") @@ -398,13 +401,14 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) analyzer_dam_tester.cc EXTRA_DEPS paddle_inference_shared + common ARGS --infer_model=${DAM_SMALL_INSTALL_DIR}/model --infer_data=${DAM_SMALL_INSTALL_DIR}/data.txt) #save model inference_analysis_api_test(test_analyzer_save_model ${DAM_SMALL_INSTALL_DIR} - analyzer_save_model_tester.cc) + analyzer_save_model_tester.cc EXTRA_DEPS common) # chinese_ner set(CHINESE_NER_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/chinese_ner") @@ -412,7 +416,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) ${CHINESE_NER_INSTALL_DIR} "chinese_ner_model.tar.gz" "chinese_ner-data.txt.tar.gz") inference_analysis_api_test(test_analyzer_ner ${CHINESE_NER_INSTALL_DIR} - analyzer_ner_tester.cc) + analyzer_ner_tester.cc EXTRA_DEPS common) # lac set(LAC_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/lac") @@ -420,7 +424,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) ${LAC_INSTALL_DIR} "lac_model.tar.gz" 419ca6eb85f57a01bfe173591910aec5 "lac_data.txt.tar.gz" 9983539cd6b34fbdc411e43422776bfd) inference_analysis_api_test(test_analyzer_lac ${LAC_INSTALL_DIR} - analyzer_lac_tester.cc) + analyzer_lac_tester.cc EXTRA_DEPS common) # Ernie set(ERNIE_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/Ernie") @@ -431,13 +435,14 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) 73beea65abda2edb61c1662cd3180c62) if(WITH_GPU) inference_analysis_api_test(test_analyzer_ernie ${ERNIE_INSTALL_DIR} - analyzer_ernie_tester.cc) + analyzer_ernie_tester.cc EXTRA_DEPS common) inference_analysis_api_test(gpu_ernie_half_test ${ERNIE_INSTALL_DIR} - gpu_ernie_half_test.cc) + gpu_ernie_half_test.cc EXTRA_DEPS common) set_tests_properties(gpu_ernie_half_test PROPERTIES TIMEOUT 60) endif() inference_analysis_api_int8_test( - test_analyzer_ernie_int8 ${ERNIE_INSTALL_DIR} analyzer_ernie_int8_tester.cc) + test_analyzer_ernie_int8 ${ERNIE_INSTALL_DIR} analyzer_ernie_int8_tester.cc + EXTRA_DEPS common) # Ernie large set(ERNIE_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/Ernie_Large") @@ -453,6 +458,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) analyzer_ernie_tester.cc EXTRA_DEPS paddle_inference_shared + common ARGS --infer_model=${ERNIE_INSTALL_DIR}/model --infer_data=${ERNIE_INSTALL_DIR}/data.txt @@ -477,7 +483,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) 36ae620020cc3377f45ed330dd36238f) inference_analysis_api_test( test_analyzer_text_classification ${TEXT_CLASSIFICATION_INSTALL_DIR} - analyzer_text_classification_tester.cc) + analyzer_text_classification_tester.cc EXTRA_DEPS common) # seq_conv1 set(SEQ_CONV1_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/seq_conv1") @@ -485,7 +491,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) ${SEQ_CONV1_INSTALL_DIR} "seq_conv1_model.tar.gz" "seq_conv1_data.txt.tar.gz") inference_analysis_api_test(test_analyzer_seq_conv1 ${SEQ_CONV1_INSTALL_DIR} - analyzer_seq_conv1_tester.cc) + analyzer_seq_conv1_tester.cc EXTRA_DEPS common) # transformer, the dataset only works on batch_size=8 now set(TRANSFORMER_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/transformer") @@ -497,6 +503,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) SRCS analyzer_transformer_compare_tester.cc EXTRA_DEPS + common paddle_inference_shared ARGS --infer_model=${TRANSFORMER_INSTALL_DIR}/model @@ -508,6 +515,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) SRCS analyzer_transformer_fuse_tester.cc EXTRA_DEPS + common paddle_inference_shared ARGS --infer_model=${TRANSFORMER_INSTALL_DIR}/model @@ -519,6 +527,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) SRCS analyzer_transformer_profile_tester.cc EXTRA_DEPS + common paddle_inference_shared ARGS --infer_model=${TRANSFORMER_INSTALL_DIR}/model @@ -537,6 +546,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) SRCS analyzer_vit_ocr_tester.cc EXTRA_DEPS + common paddle_inference_shared ARGS --infer_model=${VIT_OCR_INSTALL_DIR}/vit_ocr/model @@ -550,7 +560,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) "inference-vis-demos/ocr.tar.gz") endif() inference_analysis_api_test(test_analyzer_ocr ${OCR_INSTALL_DIR} - analyzer_vis_tester.cc) + analyzer_vis_tester.cc EXTRA_DEPS common) # densebox set(DENSEBOX_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/densebox") @@ -560,6 +570,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) SRCS analyzer_detect_functional_mkldnn_tester.cc EXTRA_DEPS + common paddle_inference_shared ARGS --infer_model=${DENSEBOX_INSTALL_DIR}/model @@ -573,8 +584,9 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) ${MOBILENET_INSTALL_DIR} "http://paddlemodels.bj.bcebos.com/" "inference-vis-demos/mobilenet.tar.gz") endif() - inference_analysis_api_test(test_analyzer_mobilenet_transpose - ${MOBILENET_INSTALL_DIR} analyzer_vis_tester.cc) + inference_analysis_api_test( + test_analyzer_mobilenet_transpose ${MOBILENET_INSTALL_DIR} + analyzer_vis_tester.cc EXTRA_DEPS common) ### Image classification tests with fake data set(IMG_CLASS_TEST_APP "test_analyzer_image_classification") @@ -913,7 +925,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) download_model_and_data_without_verify( ${BERT_INSTALL_DIR} "bert_emb128_model.tar.gz" "bert_data_len20.txt.tar.gz") inference_analysis_api_test(test_analyzer_bert ${BERT_INSTALL_DIR} - analyzer_bert_tester.cc) + analyzer_bert_tester.cc EXTRA_DEPS common) # multiple models prediction set(MMP_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/multi_model_prediction") @@ -921,7 +933,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) PaddleInference/mobilenet_v2_models.tar.gz) inference_multiple_models_analysis_api_test( test_analyzer_multi_model_prediction ${MMP_INSTALL_DIR} - analyzer_mmp_tester.cc) + analyzer_mmp_tester.cc EXTRA_DEPS common) if(WITH_GPU AND TENSORRT_FOUND) set(TRT_MODEL_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/trt_models") @@ -942,6 +954,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) SRCS trt_mobilenet_test.cc EXTRA_DEPS + common paddle_inference_shared ARGS --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models) @@ -950,6 +963,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) SRCS trt_mark_trt_engine_outputs_test.cc EXTRA_DEPS + common paddle_inference_shared ARGS --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models) @@ -958,6 +972,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) SRCS trt_disable_tensorrt_half_ops_test.cc EXTRA_DEPS + common paddle_inference_shared ARGS --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models) @@ -966,6 +981,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) SRCS trt_cascade_rcnn_test.cc EXTRA_DEPS + common paddle_inference_shared ARGS --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models) @@ -975,6 +991,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) trt_split_converter_test.cc EXTRA_DEPS paddle_inference_shared + common ARGS --infer_model=${TEST_SPLIT_CONVERTER_MODEL}/) inference_analysis_test( @@ -982,6 +999,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) SRCS analyzer_capi_exp_gpu_tester.cc EXTRA_DEPS + common paddle_inference_c_shared ARGS --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models) @@ -990,6 +1008,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) SRCS analyzer_capi_exp_xpu_tester.cc EXTRA_DEPS + common paddle_inference_c_shared ARGS --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models) @@ -1000,6 +1019,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) SRCS trt_rebind_stream_test.cc EXTRA_DEPS + common paddle_inference_shared ARGS --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models) @@ -1017,6 +1037,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) SRCS trt_quant_int8_test.cc EXTRA_DEPS + common paddle_inference_shared ARGS --infer_model=${TRT_MODEL_QUANT_RESNET_DIR}) @@ -1033,6 +1054,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) SRCS trt_quant_int8_yolov3_r50_test.cc EXTRA_DEPS + common paddle_inference_shared ARGS --infer_model=${TRT_MODEL_QUANT_YOLOV3_DIR}) @@ -1057,6 +1079,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) SRCS trt_dynamic_shape_test.cc EXTRA_DEPS + common paddle_inference_shared ARGS --infer_model=${TRT_MODEL_INSTALL_DIR}) @@ -1073,6 +1096,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) SRCS trt_dynamic_shape_ernie_test.cc EXTRA_DEPS + common paddle_inference_shared ARGS --infer_model=${TEST_TRT_ERNIE_MODEL}/ernie_model_4) @@ -1091,6 +1115,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) trt_dynamic_shape_transformer_prune_test.cc EXTRA_DEPS paddle_inference_shared + common ARGS --infer_model=${TEST_TRT_TRANSFORMER_PRUNE_MODEL}/transformer_prune) @@ -1105,6 +1130,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) SRCS trt_dynamic_shape_ernie_serialize_deserialize_test.cc EXTRA_DEPS + common paddle_inference_shared ARGS --infer_model=${TEST_TRT_ERNIE_MODEL}/ernie_model_4_unserialized) @@ -1120,6 +1146,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) SRCS trt_dynamic_shape_ernie_fp16_serialize_deserialize_test.cc EXTRA_DEPS + common paddle_inference_shared ARGS --infer_model=${TEST_TRT_ERNIE_MODEL}/ernie_model_4_fp16_unserialized) @@ -1134,6 +1161,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) SRCS lite_mul_model_test.cc EXTRA_DEPS + common paddle_inference_shared ARGS --infer_model=${LITE_MODEL_INSTALL_DIR}) @@ -1142,6 +1170,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) SRCS lite_resnet50_test.cc EXTRA_DEPS + common paddle_inference_shared ARGS --infer_model=${RESNET50_MODEL_DIR}) @@ -1151,6 +1180,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) SRCS analyzer_capi_exp_tester.cc EXTRA_DEPS + common paddle_inference_c_shared ARGS --infer_model=${RESNET50_MODEL_DIR}/model) @@ -1160,6 +1190,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) SRCS analyzer_capi_exp_pd_config_tester.cc EXTRA_DEPS + common paddle_inference_c_shared ARGS --infer_model=${MOBILENET_INSTALL_DIR}/model) @@ -1169,6 +1200,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) SRCS analyzer_capi_exp_pd_tensor_tester.cc EXTRA_DEPS + common paddle_inference_c_shared ARGS --infer_model=${MOBILENET_INSTALL_DIR}/model) @@ -1179,6 +1211,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) SRCS analyzer_capi_exp_pd_threads_tester.cc EXTRA_DEPS + common paddle_inference_c_shared ARGS --infer_model=${MOBILENET_INSTALL_DIR}/model) @@ -1189,6 +1222,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) SRCS analyzer_zerocopy_tensor_tester.cc EXTRA_DEPS + common paddle_inference_shared ARGS --infer_model=${OCR_INSTALL_DIR}/model) @@ -1199,6 +1233,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) SRCS analyzer_dist_model_tester.cc EXTRA_DEPS + common paddle_inference_shared ARGS --infer_model=${OCR_INSTALL_DIR}/model) @@ -1213,6 +1248,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) SRCS analyzer_dist_model_xpu_tester.cc EXTRA_DEPS + common paddle_inference_shared ARGS --infer_model=${OCR_INSTALL_DIR}/model) @@ -1223,6 +1259,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) SRCS analyzer_paddle_tensor_tester.cc EXTRA_DEPS + common paddle_inference_shared ARGS --infer_model=${OCR_INSTALL_DIR}/model @@ -1235,6 +1272,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) SRCS analyzer_capi_exp_int_tester.cc EXTRA_DEPS + common paddle_inference_c_shared ARGS --infer_model=${INT8_DATA_DIR}/resnet50/model) @@ -1245,6 +1283,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) SRCS analyzer_capi_exp_ner_tester.cc EXTRA_DEPS + common paddle_inference_c_shared ARGS --infer_model=${CHINESE_NER_INSTALL_DIR}/model) @@ -1255,6 +1294,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) SRCS paddle_infer_api_test.cc EXTRA_DEPS + common paddle_inference_shared ARGS --infer_model=${RESNET50_MODEL_DIR}) @@ -1264,6 +1304,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) SRCS paddle_infer_api_copy_tensor_tester.cc EXTRA_DEPS + common paddle_inference_shared ARGS --infer_model=${RESNET50_MODEL_DIR}) @@ -1274,7 +1315,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) cc_test( paddle_infer_api_errors_test SRCS paddle_infer_api_errors_tester.cc - DEPS ${inference_api_tester_deps}) + DEPS ${inference_api_tester_deps} common) if(WITH_GPU AND TENSORRT_FOUND) set_tests_properties(trt_quant_int8_yolov3_r50_test PROPERTIES TIMEOUT 400) @@ -1345,6 +1386,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) SRCS ipu_word2vec_sample.cc EXTRA_DEPS + common paddle_inference_shared ARGS --infer_model=${WORD2VEC_INSTALL_DIR}) @@ -1352,11 +1394,23 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) # ERNIE set(ERNIE_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/Ernie") inference_analysis_api_test( - ipu_ernie_test ${ERNIE_INSTALL_DIR} ipu_ernie_test.cc ARGS --warmup=true - --repeat=10) + ipu_ernie_test + ${ERNIE_INSTALL_DIR} + ipu_ernie_test.cc + ARGS + --warmup=true + --repeat=10 + EXTRA_DEPS + common) inference_analysis_api_test( - ipu_ernie_fp16_test ${ERNIE_INSTALL_DIR} ipu_ernie_fp16_test.cc ARGS - --warmup=true --repeat=10) + ipu_ernie_fp16_test + ${ERNIE_INSTALL_DIR} + ipu_ernie_fp16_test.cc + ARGS + --warmup=true + --repeat=10 + EXTRA_DEPS + common) # Resnet50 set(RESNET50_MODEL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/resnet50") @@ -1365,6 +1419,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) SRCS ipu_resnet50_test.cc EXTRA_DEPS + common paddle_inference_shared ARGS --infer_model=${RESNET50_MODEL_DIR} @@ -1375,6 +1430,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) SRCS ipu_resnet50_fp16_test.cc EXTRA_DEPS + common paddle_inference_shared ARGS --infer_model=${RESNET50_MODEL_DIR} @@ -1390,7 +1446,9 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) --model_name="Resnet50" --infer_model=${RESNET50_MODEL_DIR} --warmup=true - --repeat=10) + --repeat=10 + EXTRA_DEPS + common) endif() if(WITH_XPU) @@ -1399,6 +1457,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) SRCS xpu_config_resnet50_test.cc EXTRA_DEPS + common paddle_inference_shared ARGS --infer_model=${RESNET50_MODEL_DIR}) @@ -1407,6 +1466,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) SRCS xpu_runtime_config_resnet50_test.cc EXTRA_DEPS + common paddle_inference_shared ARGS --infer_model=${RESNET50_MODEL_DIR}) @@ -1419,6 +1479,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) SRCS api_impl_tester.cc DEPS + common paddle_inference_shared ARGS --word2vec_dirname=${WORD2VEC_MODEL_DIR} @@ -1433,6 +1494,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) analysis_predictor_tester.cc DEPS paddle_inference_shared + common ARGS --dirname=${WORD2VEC_MODEL_DIR}) endif() @@ -1445,6 +1507,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) mkldnn_quantizer_tester.cc DEPS paddle_inference_shared + common ARGS --dirname=${WORD2VEC_MODEL_DIR}) endif() diff --git a/test/cpp/inference/api/api_impl_tester.cc b/test/cpp/inference/api/api_impl_tester.cc index 78e908189cc1d4..535a4995665ed2 100644 --- a/test/cpp/inference/api/api_impl_tester.cc +++ b/test/cpp/inference/api/api_impl_tester.cc @@ -54,7 +54,7 @@ PaddleTensor LodTensorToPaddleTensor(phi::DenseTensor* t) { PADDLE_THROW(platform::errors::Unimplemented( "Unsupported tensor date type. Now only supports INT64, FP32, INT32.")); } - pt.shape = phi::vectorize(t->dims()); + pt.shape = common::vectorize(t->dims()); return pt; } @@ -135,7 +135,7 @@ void MainImageClassification(const ::paddle::PaddlePlace& place) { // Use normilized image pixels as input data, // which should be in the range [0.0, 1.0]. feed_target_shapes[0][0] = batch_size; - framework::DDim input_dims = phi::make_ddim(feed_target_shapes[0]); + framework::DDim input_dims = common::make_ddim(feed_target_shapes[0]); SetupTensor( &input, input_dims, static_cast(0), static_cast(1)); std::vector cpu_feeds; @@ -243,7 +243,7 @@ void MainThreadsImageClassification(const ::paddle::PaddlePlace& place) { std::vector> feed_target_shapes = GetFeedTargetShapes(config.model_dir, /*is_combined*/ false); feed_target_shapes[0][0] = batch_size; - framework::DDim input_dims = phi::make_ddim(feed_target_shapes[0]); + framework::DDim input_dims = common::make_ddim(feed_target_shapes[0]); SetupTensor(&jobs[i], input_dims, 0.f, 1.f); paddle_tensor_feeds[i].push_back(LodTensorToPaddleTensor(&jobs[i])); diff --git a/test/cpp/inference/api/mkldnn_quantizer_tester.cc b/test/cpp/inference/api/mkldnn_quantizer_tester.cc index 8edad9fe27127a..28840dbbb0fb40 100644 --- a/test/cpp/inference/api/mkldnn_quantizer_tester.cc +++ b/test/cpp/inference/api/mkldnn_quantizer_tester.cc @@ -108,7 +108,7 @@ TEST_F(MkldnnQuantizerTest, histogram_inverted_min_max) { auto max_val = *std::max_element(values.begin(), values.end()); phi::DenseTensor var_tensor; - var_tensor.Resize(phi::make_dim(values.size())); + var_tensor.Resize(common::make_dim(values.size())); std::copy(begin(values), end(values), var_tensor.mutable_data(phi::CPUPlace())); @@ -124,7 +124,7 @@ TEST_F(MkldnnQuantizerTest, histogram_non_negative_to_3) { auto max_val = *std::max_element(values.begin(), values.end()); phi::DenseTensor var_tensor; - var_tensor.Resize(phi::make_dim(values.size())); + var_tensor.Resize(common::make_dim(values.size())); std::copy(begin(values), end(values), var_tensor.mutable_data(phi::CPUPlace())); @@ -148,7 +148,7 @@ TEST_F(MkldnnQuantizerTest, histogram_positive_and_negative_to_3) { auto max_val = *std::max_element(values.begin(), values.end()); phi::DenseTensor var_tensor; - var_tensor.Resize(phi::make_dim(values.size())); + var_tensor.Resize(common::make_dim(values.size())); std::copy(begin(values), end(values), var_tensor.mutable_data(phi::CPUPlace())); @@ -172,7 +172,7 @@ TEST_F(MkldnnQuantizerTest, histogram_zero_bins) { auto max_val = *std::max_element(values.begin(), values.end()); phi::DenseTensor var_tensor; - var_tensor.Resize(phi::make_dim(values.size())); + var_tensor.Resize(common::make_dim(values.size())); std::copy(begin(values), end(values), var_tensor.mutable_data(phi::CPUPlace())); @@ -197,7 +197,7 @@ TEST_F(MkldnnQuantizerTest, kl_scaling_factor_signed) { const auto& values = positive_and_negative_values; phi::DenseTensor var_tensor; - var_tensor.Resize(phi::make_dim(values.size())); + var_tensor.Resize(common::make_dim(values.size())); std::copy(begin(values), end(values), var_tensor.mutable_data(phi::CPUPlace())); @@ -217,7 +217,7 @@ TEST_F(MkldnnQuantizerTest, max_scaling_factor_signed) { auto max_val = *std::max_element(values.begin(), values.end()); phi::DenseTensor var_tensor; - var_tensor.Resize(phi::make_dim(values.size())); + var_tensor.Resize(common::make_dim(values.size())); std::copy(begin(values), end(values), var_tensor.mutable_data(phi::CPUPlace())); @@ -237,7 +237,7 @@ TEST_F(MkldnnQuantizerTest, max_scaling_factor_unsigned) { auto max_val = *std::max_element(values.begin(), values.end()); phi::DenseTensor var_tensor; - var_tensor.Resize(phi::make_dim(values.size())); + var_tensor.Resize(common::make_dim(values.size())); std::copy(begin(values), end(values), var_tensor.mutable_data(phi::CPUPlace())); @@ -258,7 +258,7 @@ TEST_F(MkldnnQuantizerTest, max_scaling_factor_chwise_unsigned) { int channels = 3; phi::DenseTensor var_tensor; - var_tensor.Resize(phi::make_dim(channels, 1, 1, values.size())); + var_tensor.Resize(common::make_dim(channels, 1, 1, values.size())); for (int i = 0; i < channels; i++) std::copy( begin(values), @@ -281,7 +281,7 @@ TEST_F(MkldnnQuantizerTest, kl_scaling_factor_unsigned) { const auto& values = non_negative_values; phi::DenseTensor var_tensor; - var_tensor.Resize(phi::make_dim(values.size())); + var_tensor.Resize(common::make_dim(values.size())); std::copy(begin(values), end(values), var_tensor.mutable_data(phi::CPUPlace())); @@ -307,14 +307,14 @@ const std::vector> wh = { TEST_F(MkldnnQuantizerTest, max_ch_gru_scaling_factor) { phi::DenseTensor wx_tensor, wh_tensor, lod_tensor; - wx_tensor.Resize(phi::make_dim(wx.size(), wx[0].size())); + wx_tensor.Resize(common::make_dim(wx.size(), wx[0].size())); for (size_t i = 0; i < wx.size(); i++) std::copy( begin(wx[i]), end(wx[i]), wx_tensor.mutable_data(phi::CPUPlace()) + i * wx[0].size()); - wh_tensor.Resize(phi::make_dim(wh.size(), wh[0].size())); + wh_tensor.Resize(common::make_dim(wh.size(), wh[0].size())); for (size_t i = 0; i < wh.size(); i++) std::copy( begin(wh[i]), @@ -337,14 +337,14 @@ TEST_F(MkldnnQuantizerTest, max_ch_gru_scaling_factor) { TEST_F(MkldnnQuantizerTest, max_ch_lstm_scaling_factor) { phi::DenseTensor wx_tensor, wh_tensor, lod_tensor; - wx_tensor.Resize(phi::make_dim(wx.size(), wx[0].size())); + wx_tensor.Resize(common::make_dim(wx.size(), wx[0].size())); for (size_t i = 0; i < wx.size(); i++) std::copy( begin(wx[i]), end(wx[i]), wx_tensor.mutable_data(phi::CPUPlace()) + i * wx[0].size()); - wh_tensor.Resize(phi::make_dim(wh.size(), wh[0].size())); + wh_tensor.Resize(common::make_dim(wh.size(), wh[0].size())); for (size_t i = 0; i < wh.size(); i++) std::copy( begin(wh[i]), diff --git a/test/cpp/inference/api/tester_helper.h b/test/cpp/inference/api/tester_helper.h index 8e5a9cd5034238..a410df859fe450 100644 --- a/test/cpp/inference/api/tester_helper.h +++ b/test/cpp/inference/api/tester_helper.h @@ -1098,8 +1098,8 @@ static bool CompareShape(const std::vector &a, static bool CompareTensorData(const phi::DenseTensor &a, const phi::DenseTensor &b) { - auto a_shape = phi::vectorize(a.dims()); - auto b_shape = phi::vectorize(b.dims()); + auto a_shape = common::vectorize(a.dims()); + auto b_shape = common::vectorize(b.dims()); size_t a_size = std::accumulate( a_shape.begin(), a_shape.end(), size_t{1}, [](int a, int b) { return a * b; @@ -1147,7 +1147,7 @@ static bool CompareTensor(const phi::DenseTensor &a, if (!CompareLoD(a.lod(), b.lod())) { return false; } - if (!CompareShape(phi::vectorize(a.dims()), phi::vectorize(b.dims()))) { + if (!CompareShape(common::vectorize(a.dims()), common::vectorize(b.dims()))) { return false; } diff --git a/test/cpp/inference/test_helper.h b/test/cpp/inference/test_helper.h index f66712401858ab..0107654d349b17 100644 --- a/test/cpp/inference/test_helper.h +++ b/test/cpp/inference/test_helper.h @@ -59,7 +59,7 @@ template void SetupTensor(phi::DenseTensor* input, paddle::framework::DDim dims, const std::vector& data) { - CHECK_EQ(phi::product(dims), static_cast(data.size())); + CHECK_EQ(common::product(dims), static_cast(data.size())); T* input_ptr = input->mutable_data(dims, paddle::platform::CPUPlace()); memcpy(input_ptr, data.data(), input->numel() * sizeof(T)); } diff --git a/test/cpp/jit/CMakeLists.txt b/test/cpp/jit/CMakeLists.txt index ee1d5c94a9a17d..b5247b1902c425 100644 --- a/test/cpp/jit/CMakeLists.txt +++ b/test/cpp/jit/CMakeLists.txt @@ -8,6 +8,7 @@ if(WITH_TESTING AND NOT WIN32) WORKING_DIRECTORY "${CC_TESTS_DIR}") set(JIT_DEPS phi + common elementwise_add_op activation_op reduce_mean_op @@ -21,13 +22,4 @@ if(WITH_TESTING AND NOT WIN32) layer_test SRCS layer_test.cc DEPS ${JIT_DEPS}) - # add_dependencies(layer_test jit_download_program) - - cc_test( - layer_test_new - SRCS layer_test.cc - DEPS ${JIT_DEPS}) - # add_dependencies(layer_test_new jit_download_program) - # set_tests_properties(layer_test_new PROPERTIES ENVIRONMENT - # "FLAGS_jit_engine_type=New") endif() diff --git a/test/cpp/jit/layer_test.cc b/test/cpp/jit/layer_test.cc index c163f3c50d9dd3..1c3e76dbc6b15d 100644 --- a/test/cpp/jit/layer_test.cc +++ b/test/cpp/jit/layer_test.cc @@ -64,7 +64,7 @@ std::vector PrepareInputs(const phi::Place& place) { auto& dev_ctx = *pool.Get(place); DenseTensor t; - t.Resize(phi::make_ddim({2, 4})); + t.Resize(common::make_ddim({2, 4})); t.mutable_data(place); phi::funcs::set_constant(dev_ctx, &t, 2.); diff --git a/test/cpp/new_executor/CMakeLists.txt b/test/cpp/new_executor/CMakeLists.txt index c5906fc0f263e3..cd80ee4944a533 100644 --- a/test/cpp/new_executor/CMakeLists.txt +++ b/test/cpp/new_executor/CMakeLists.txt @@ -1,7 +1,8 @@ # skip win32 since wget is not installed by default on windows machine. if(NOT WIN32) - paddle_test(standalone_executor_pir_test SRCS standalone_executor_pir_test.cc) + paddle_test(standalone_executor_pir_test SRCS standalone_executor_pir_test.cc + DEPS common) endif() set(OPS @@ -41,7 +42,7 @@ if(WITH_GPU # all operators used in the program # All deps of the operators above, part of GLOB_OPERATOR_DEPS. - set(OP_DEPS phi concat_and_split cross_entropy) + set(OP_DEPS phi common concat_and_split cross_entropy) cc_test(standalone_executor_test SRCS standalone_executor_test.cc) # add_dependencies(standalone_executor_test download_program) diff --git a/test/cpp/new_executor/standalone_executor_test.cc b/test/cpp/new_executor/standalone_executor_test.cc index e25f8e0aec99d2..727cb895c5e6b4 100644 --- a/test/cpp/new_executor/standalone_executor_test.cc +++ b/test/cpp/new_executor/standalone_executor_test.cc @@ -289,7 +289,7 @@ TEST(InterpreterCore, workqueue_multiplexing) { float data_a[] = {0, 1, 2, 3}; float data_b[] = {0.0, 0.1, 0.2, 0.3}; - phi::DDim dims = phi::make_ddim({2, 2}); + phi::DDim dims = common::make_ddim({2, 2}); const platform::CPUPlace place = platform::CPUPlace(); phi::DenseTensor tensor_a = phi::DenseTensor(); diff --git a/test/cpp/phi/api/CMakeLists.txt b/test/cpp/phi/api/CMakeLists.txt index fd06e6d460df97..c0b392b347fb9b 100644 --- a/test/cpp/phi/api/CMakeLists.txt +++ b/test/cpp/phi/api/CMakeLists.txt @@ -1,4 +1,4 @@ -set(COMMON_API_TEST_DEPS phi) +set(COMMON_API_TEST_DEPS phi common) if(WITH_GPU) nv_test( @@ -8,11 +8,11 @@ if(WITH_GPU) nv_test( test_allocator SRCS test_allocator.cu - DEPS place device_context phi) + DEPS place device_context phi common) nv_test( test_cuda_stream SRCS test_cuda_stream.cu - DEPS phi) + DEPS phi common) nv_test( test_from_blob SRCS test_from_blob.cc @@ -25,11 +25,11 @@ elseif(WITH_ROCM) hip_test( test_allocator SRCS test_allocator.cu - DEPS place device_context phi) + DEPS place device_context phi common) hip_test( test_cuda_stream SRCS test_cuda_stream.cu - DEPS phi) + DEPS phi common) hip_test( test_from_blob SRCS test_from_blob.cc diff --git a/test/cpp/phi/api/test_phi_exception.cc b/test/cpp/phi/api/test_phi_exception.cc index 99576a0254b5b5..7d0fdf1b57fab0 100644 --- a/test/cpp/phi/api/test_phi_exception.cc +++ b/test/cpp/phi/api/test_phi_exception.cc @@ -13,7 +13,7 @@ limitations under the License. */ #include #include "gtest/gtest.h" -#include "paddle/phi/api/ext/exception.h" +#include "paddle/common/exception.h" namespace paddle { namespace tests { diff --git a/test/cpp/phi/api/test_phi_tensor.cc b/test/cpp/phi/api/test_phi_tensor.cc index 67fdc4d0e53947..419e8a10e167d8 100644 --- a/test/cpp/phi/api/test_phi_tensor.cc +++ b/test/cpp/phi/api/test_phi_tensor.cc @@ -227,7 +227,7 @@ void TestDataInterface() { std::vector rows = {0}; std::shared_ptr selected_rows = std::make_shared(rows, 1); - selected_rows->mutable_value()->Resize(phi::make_ddim({1, 1})); + selected_rows->mutable_value()->Resize(common::make_ddim({1, 1})); selected_rows->mutable_value()->mutable_data(phi::CPUPlace())[0] = static_cast(10.0f); paddle::Tensor sr_tensor = paddle::Tensor(selected_rows); diff --git a/test/cpp/phi/api/test_strings_empty_api.cc b/test/cpp/phi/api/test_strings_empty_api.cc index 02c7705735170b..5160acf73f10cb 100644 --- a/test/cpp/phi/api/test_strings_empty_api.cc +++ b/test/cpp/phi/api/test_strings_empty_api.cc @@ -43,7 +43,7 @@ TEST(API, strings_empty) { auto dense_shape = std::make_shared( alloc.get(), phi::DenseTensorMeta( - phi::DataType::INT64, phi::make_ddim({2}), phi::DataLayout::NCHW)); + phi::DataType::INT64, common::make_ddim({2}), phi::DataLayout::NCHW)); auto* dev_ctx = phi::DeviceContextPool::Instance().GetByPlace(phi::CPUPlace()); auto* shape_data = dev_ctx->template Alloc(dense_shape.get()); diff --git a/test/cpp/phi/api/test_to_api.cc b/test/cpp/phi/api/test_to_api.cc index 7a83003e118cf0..beef25a5fb9bce 100644 --- a/test/cpp/phi/api/test_to_api.cc +++ b/test/cpp/phi/api/test_to_api.cc @@ -33,8 +33,9 @@ paddle::Tensor CreateInputTensor() { std::make_unique(phi::CPUPlace()); auto dense_x = std::make_shared( alloc.get(), - phi::DenseTensorMeta( - phi::DataType::INT64, phi::make_ddim({3, 4}), phi::DataLayout::NCHW)); + phi::DenseTensorMeta(phi::DataType::INT64, + common::make_ddim({3, 4}), + phi::DataLayout::NCHW)); auto* dev_ctx = phi::DeviceContextPool::Instance().GetByPlace(phi::CPUPlace()); auto* dense_x_data = dev_ctx->template Alloc(dense_x.get()); diff --git a/test/cpp/phi/common/CMakeLists.txt b/test/cpp/phi/common/CMakeLists.txt index b40e7e9f5a41e7..854a870420fcc0 100644 --- a/test/cpp/phi/common/CMakeLists.txt +++ b/test/cpp/phi/common/CMakeLists.txt @@ -13,32 +13,32 @@ cc_test( cc_test( phi_test_place SRCS test_place.cc - DEPS phi) + DEPS phi common) cc_test( phi_test_int_array SRCS test_int_array.cc - DEPS phi) + DEPS phi common) cc_test( phi_test_scalar_cpu SRCS test_scalar.cc - DEPS phi) + DEPS phi common) if(WITH_GPU) nv_test( phi_test_scalar SRCS test_scalar.cu - DEPS phi) + DEPS phi common) nv_test( transform_test SRCS transform_test.cu - DEPS memory place phi) + DEPS memory place phi common) endif() if(WITH_ROCM) hip_test( phi_test_scalar SRCS test_scalar.cu - DEPS phi) + DEPS phi common) hip_test( transform_test SRCS transform_test.cu - DEPS memory place phi) + DEPS memory place phi common) endif() diff --git a/test/cpp/phi/common/test_backend.cc b/test/cpp/phi/common/test_backend.cc index 516deeee34af20..97b5336dc17bfc 100644 --- a/test/cpp/phi/common/test_backend.cc +++ b/test/cpp/phi/common/test_backend.cc @@ -16,7 +16,7 @@ limitations under the License. */ #include -#include "paddle/phi/api/ext/exception.h" +#include "paddle/common/exception.h" #include "paddle/phi/common/backend.h" namespace phi { diff --git a/test/cpp/phi/common/test_data_layout.cc b/test/cpp/phi/common/test_data_layout.cc index 889dfe07860c56..e267ea389f07a1 100644 --- a/test/cpp/phi/common/test_data_layout.cc +++ b/test/cpp/phi/common/test_data_layout.cc @@ -17,8 +17,8 @@ limitations under the License. */ #include #include -#include "paddle/phi/api/ext/exception.h" -#include "paddle/phi/common/layout.h" +#include "paddle/common/exception.h" +#include "paddle/common/layout.h" namespace phi { namespace tests { diff --git a/test/cpp/phi/common/test_data_type.cc b/test/cpp/phi/common/test_data_type.cc index 4d3d1de64924da..3bc2935b5abc82 100644 --- a/test/cpp/phi/common/test_data_type.cc +++ b/test/cpp/phi/common/test_data_type.cc @@ -17,7 +17,7 @@ limitations under the License. */ #include #include -#include "paddle/phi/api/ext/exception.h" +#include "paddle/common/exception.h" #include "paddle/phi/common/data_type.h" #include "paddle/phi/common/type_traits.h" diff --git a/test/cpp/phi/common/test_scalar.cu b/test/cpp/phi/common/test_scalar.cu index 9fbcb99cece136..b1748e957c565e 100644 --- a/test/cpp/phi/common/test_scalar.cu +++ b/test/cpp/phi/common/test_scalar.cu @@ -40,10 +40,10 @@ TEST(Scalar, ConstructFromDenseTensor1) { // 1. create tensor const auto alloc = std::make_unique(phi::CPUPlace()); - phi::DenseTensor dense_x( - alloc.get(), - phi::DenseTensorMeta( - phi::DataType::FLOAT16, phi::make_ddim({1}), phi::DataLayout::NCHW)); + phi::DenseTensor dense_x(alloc.get(), + phi::DenseTensorMeta(phi::DataType::FLOAT16, + common::make_ddim({1}), + phi::DataLayout::NCHW)); phi::DeviceContextPool& pool = phi::DeviceContextPool::Instance(); auto* dev_ctx = reinterpret_cast(pool.Get(phi::CPUPlace())); @@ -60,7 +60,7 @@ TEST(Scalar, ConstructFromDenseTensor2) { phi::DenseTensor dense_x( alloc.get(), phi::DenseTensorMeta( - phi::DataType::INT16, phi::make_ddim({1}), phi::DataLayout::NCHW)); + phi::DataType::INT16, common::make_ddim({1}), phi::DataLayout::NCHW)); phi::DeviceContextPool& pool = phi::DeviceContextPool::Instance(); auto* dev_ctx = reinterpret_cast(pool.Get(phi::CPUPlace())); @@ -77,7 +77,7 @@ TEST(Scalar, ConstructFromDenseTensor3) { phi::DenseTensor dense_x( alloc.get(), phi::DenseTensorMeta( - phi::DataType::INT8, phi::make_ddim({1}), phi::DataLayout::NCHW)); + phi::DataType::INT8, common::make_ddim({1}), phi::DataLayout::NCHW)); phi::DeviceContextPool& pool = phi::DeviceContextPool::Instance(); auto* dev_ctx = reinterpret_cast(pool.Get(phi::CPUPlace())); @@ -94,7 +94,7 @@ TEST(Scalar, ConstructFromDenseTensor4) { phi::DenseTensor dense_x( alloc.get(), phi::DenseTensorMeta( - phi::DataType::BOOL, phi::make_ddim({1}), phi::DataLayout::NCHW)); + phi::DataType::BOOL, common::make_ddim({1}), phi::DataLayout::NCHW)); phi::DeviceContextPool& pool = phi::DeviceContextPool::Instance(); auto* dev_ctx = reinterpret_cast(pool.Get(phi::CPUPlace())); @@ -110,7 +110,7 @@ TEST(Scalar, ConstructFromDenseTensor5) { std::make_unique(phi::CPUPlace()); phi::DenseTensor dense_x(alloc.get(), phi::DenseTensorMeta(phi::DataType::COMPLEX64, - phi::make_ddim({1}), + common::make_ddim({1}), phi::DataLayout::NCHW)); phi::DeviceContextPool& pool = phi::DeviceContextPool::Instance(); auto* dev_ctx = reinterpret_cast(pool.Get(phi::CPUPlace())); @@ -128,7 +128,7 @@ TEST(Scalar, ConstructFromDenseTensor6) { std::make_unique(phi::CPUPlace()); phi::DenseTensor dense_x(alloc.get(), phi::DenseTensorMeta(phi::DataType::COMPLEX128, - phi::make_ddim({1}), + common::make_ddim({1}), phi::DataLayout::NCHW)); phi::DeviceContextPool& pool = phi::DeviceContextPool::Instance(); auto* dev_ctx = reinterpret_cast(pool.Get(phi::CPUPlace())); @@ -144,10 +144,10 @@ TEST(Scalar, ConstructFromDenseTensor7) { // 1. create tensor const auto alloc = std::make_unique(phi::GPUPlace()); - phi::DenseTensor dense_x( - alloc.get(), - phi::DenseTensorMeta( - phi::DataType::FLOAT32, phi::make_ddim({1}), phi::DataLayout::NCHW)); + phi::DenseTensor dense_x(alloc.get(), + phi::DenseTensorMeta(phi::DataType::FLOAT32, + common::make_ddim({1}), + phi::DataLayout::NCHW)); phi::DeviceContextPool& pool = phi::DeviceContextPool::Instance(); auto* dev_ctx = reinterpret_cast(pool.Get(phi::GPUPlace())); @@ -164,8 +164,9 @@ TEST(Scalar, ConstructFromTensor) { std::make_unique(phi::GPUPlace()); auto dense_x = std::make_shared( alloc.get(), - phi::DenseTensorMeta( - phi::DataType::FLOAT32, phi::make_ddim({1}), phi::DataLayout::NCHW)); + phi::DenseTensorMeta(phi::DataType::FLOAT32, + common::make_ddim({1}), + phi::DataLayout::NCHW)); phi::DeviceContextPool& pool = phi::DeviceContextPool::Instance(); auto* dev_ctx = reinterpret_cast(pool.Get(phi::GPUPlace())); diff --git a/test/cpp/phi/core/CMakeLists.txt b/test/cpp/phi/core/CMakeLists.txt index be16b8c4e3508c..b5d83f69aabf8b 100644 --- a/test/cpp/phi/core/CMakeLists.txt +++ b/test/cpp/phi/core/CMakeLists.txt @@ -1,25 +1,25 @@ cc_test( test_custom_kernel SRCS test_custom_kernel.cc - DEPS phi) + DEPS phi common) cc_test( test_dense_tensor SRCS test_dense_tensor.cc - DEPS phi) + DEPS phi common) cc_test(test_intrusive_ptr SRCS test_intrusive_ptr.cc) cc_test(test_type_info SRCS test_type_info.cc) cc_test( test_kernel_factory SRCS test_kernel_factory.cc - DEPS phi) + DEPS phi common) cc_test( test_sparse_coo_tensor SRCS test_sparse_coo_tensor.cc - DEPS phi) + DEPS phi common) cc_test( test_sparse_csr_tensor SRCS test_sparse_csr_tensor.cc - DEPS phi) + DEPS phi common) cc_test( test_op_utils SRCS test_op_utils.cc @@ -27,28 +27,28 @@ cc_test( cc_test( test_meta_fn_utils SRCS test_meta_fn_utils.cc - DEPS phi) + DEPS phi common) cc_test( test_ddim SRCS test_ddim.cc - DEPS phi) + DEPS phi common) if(WITH_GPU) nv_test( test_dim SRCS test_dim.cu - DEPS phi) + DEPS phi common) elseif(WITH_ROCM) hip_test( test_dim SRCS test_dim.cu - DEPS phi) + DEPS phi common) endif() cc_test( selected_rows_test SRCS test_selected_rows.cc - DEPS phi) + DEPS phi common) if(WITH_TESTING AND TEST selected_rows_test) set_tests_properties(selected_rows_test PROPERTIES TIMEOUT 120) endif() @@ -58,27 +58,27 @@ endif() cc_test( test_string_tensor SRCS test_string_tensor.cc - DEPS phi) + DEPS phi common) cc_test(unroll_array_ops_test SRCS unroll_array_ops_test.cc) cc_test( test_tensor_array SRCS test_tensor_array.cc - DEPS phi) + DEPS phi common) if(WITH_GPU) nv_test( test_mixed_vector SRCS test_mixed_vector.cc test_mixed_vector.cu - DEPS place memory phi tensor) + DEPS place memory phi common tensor) elseif(WITH_ROCM) hip_test( test_mixed_vector SRCS test_mixed_vector.cc test_mixed_vector.cu - DEPS place memory phi tensor) + DEPS place memory phi common tensor) else() cc_test( test_mixed_vector SRCS test_mixed_vector.cc - DEPS place memory phi tensor) + DEPS place memory phi common tensor) endif() diff --git a/test/cpp/phi/core/test_custom_kernel.cc b/test/cpp/phi/core/test_custom_kernel.cc index 38f59589f72477..b4a9e9da619135 100644 --- a/test/cpp/phi/core/test_custom_kernel.cc +++ b/test/cpp/phi/core/test_custom_kernel.cc @@ -200,15 +200,17 @@ TEST(CustomKernel, custom_kernel_dot) { std::make_unique(phi::CPUPlace()); auto dense_x = std::make_shared( alloc.get(), - phi::DenseTensorMeta( - phi::DataType::UINT8, phi::make_ddim({2, 3}), phi::DataLayout::NCHW)); + phi::DenseTensorMeta(phi::DataType::UINT8, + common::make_ddim({2, 3}), + phi::DataLayout::NCHW)); auto* dev_ctx = phi::DeviceContextPool::Instance().Get(phi::CPUPlace()); auto* dense_x_data = dev_ctx->template Alloc(dense_x.get()); auto dense_y = std::make_shared( alloc.get(), - phi::DenseTensorMeta( - phi::DataType::UINT8, phi::make_ddim({2, 3}), phi::DataLayout::NCHW)); + phi::DenseTensorMeta(phi::DataType::UINT8, + common::make_ddim({2, 3}), + phi::DataLayout::NCHW)); auto* dense_y_data = dev_ctx->template Alloc(dense_y.get()); // dot x,y and result diff --git a/test/cpp/phi/core/test_ddim.cc b/test/cpp/phi/core/test_ddim.cc index a58d86e62aa403..78d8deebdca3d0 100644 --- a/test/cpp/phi/core/test_ddim.cc +++ b/test/cpp/phi/core/test_ddim.cc @@ -15,7 +15,7 @@ #include #include "gtest/gtest.h" -#include "paddle/phi/core/ddim.h" +#include "paddle/common/ddim.h" namespace phi { namespace tests { @@ -27,19 +27,19 @@ TEST(DDim, Equality) { EXPECT_EQ(default_ddim[0], 0); // construct a zero-DDim - phi::DDim zero_ddim = phi::make_ddim({}); + phi::DDim zero_ddim = common::make_ddim({}); EXPECT_EQ(arity(zero_ddim), 0); EXPECT_EQ(zero_ddim.size(), 0); - EXPECT_EQ(phi::product(zero_ddim), 1); + EXPECT_EQ(common::product(zero_ddim), 1); std::vector zero_vec; - phi::DDim zero_ddim1 = phi::make_ddim(zero_vec); + phi::DDim zero_ddim1 = common::make_ddim(zero_vec); EXPECT_EQ(arity(zero_ddim1), 0); EXPECT_EQ(zero_ddim1.size(), 0); - EXPECT_EQ(phi::product(zero_ddim1), 1); + EXPECT_EQ(common::product(zero_ddim1), 1); // zero-DDim to vector - std::vector zero_ddim_vec = phi::vectorize(zero_ddim); + std::vector zero_ddim_vec = common::vectorize(zero_ddim); EXPECT_EQ(zero_ddim_vec.size(), size_t(0)); // reshape zero-DDim @@ -47,16 +47,16 @@ TEST(DDim, Equality) { phi::DDim reshape_ddim = zero_ddim.reshape(reshape_vec); EXPECT_EQ(arity(reshape_ddim), 1); EXPECT_EQ(reshape_ddim.size(), 1); - EXPECT_EQ(phi::product(reshape_ddim), 1); + EXPECT_EQ(common::product(reshape_ddim), 1); // construct a DDim from an initialization list - phi::DDim ddim = phi::make_ddim({9, 1, 5}); + phi::DDim ddim = common::make_ddim({9, 1, 5}); EXPECT_EQ(ddim[0], 9); EXPECT_EQ(ddim[1], 1); EXPECT_EQ(ddim[2], 5); // arity of a DDim - EXPECT_EQ(phi::arity(ddim), 3); + EXPECT_EQ(common::arity(ddim), 3); EXPECT_EQ(ddim.size(), 3); // mutate a DDim @@ -67,35 +67,35 @@ TEST(DDim, Equality) { // construct a DDim from a vector std::vector vec({9, 1, 5}); - phi::DDim vddim = phi::make_ddim(vec); + phi::DDim vddim = common::make_ddim(vec); EXPECT_EQ(vddim[0], 9); EXPECT_EQ(vddim[1], 1); EXPECT_EQ(vddim[2], 5); // vectorize a DDim - std::vector res_vec = phi::vectorize(vddim); + std::vector res_vec = common::vectorize(vddim); EXPECT_EQ(res_vec[0], 9); EXPECT_EQ(res_vec[1], 1); EXPECT_EQ(res_vec[2], 5); phi::Dim<3> d(3, 2, 1); - res_vec = phi::vectorize(phi::DDim(d)); + res_vec = common::vectorize(phi::DDim(d)); EXPECT_EQ(res_vec[0], 3); EXPECT_EQ(res_vec[1], 2); EXPECT_EQ(res_vec[2], 1); // product of a DDim - EXPECT_EQ(phi::product(vddim), 45); - EXPECT_EQ(phi::product(phi::make_ddim({3, 2, 5, 3})), 90); + EXPECT_EQ(common::product(vddim), 45); + EXPECT_EQ(common::product(common::make_ddim({3, 2, 5, 3})), 90); // slice a DDim - phi::DDim ddim2 = phi::make_ddim({1, 2, 3, 4, 5, 6}); - phi::DDim slice_dim1 = phi::slice_ddim(ddim2, 2, 5); + phi::DDim ddim2 = common::make_ddim({1, 2, 3, 4, 5, 6}); + phi::DDim slice_dim1 = common::slice_ddim(ddim2, 2, 5); EXPECT_EQ(arity(slice_dim1), 3); EXPECT_EQ(slice_dim1[0], 3); EXPECT_EQ(slice_dim1[1], 4); EXPECT_EQ(slice_dim1[2], 5); - phi::DDim slice_dim2 = phi::slice_ddim(ddim2, 0, 6); + phi::DDim slice_dim2 = common::slice_ddim(ddim2, 0, 6); EXPECT_EQ(arity(slice_dim2), 6); EXPECT_EQ(slice_dim2[0], 1); EXPECT_EQ(slice_dim2[1], 2); @@ -104,22 +104,22 @@ TEST(DDim, Equality) { EXPECT_EQ(slice_dim2[4], 5); EXPECT_EQ(slice_dim2[5], 6); - phi::DDim slice_dim3 = phi::slice_ddim(ddim2, 1, 1); + phi::DDim slice_dim3 = common::slice_ddim(ddim2, 1, 1); EXPECT_EQ(arity(slice_dim3), 0); EXPECT_EQ(slice_dim3.size(), 0); - EXPECT_EQ(phi::product(slice_dim3), 1); + EXPECT_EQ(common::product(slice_dim3), 1); } TEST(DDim, Print) { // print a DDim std::stringstream ss1; - phi::DDim ddim = phi::make_ddim({2, 3, 4}); + phi::DDim ddim = common::make_ddim({2, 3, 4}); ss1 << ddim; EXPECT_EQ("2, 3, 4", ss1.str()); // print a zero-DDim std::stringstream ss2; - phi::DDim zero_ddim = phi::make_ddim({}); + phi::DDim zero_ddim = common::make_ddim({}); ss2 << zero_ddim; EXPECT_EQ("", ss2.str()); } @@ -127,7 +127,7 @@ TEST(DDim, Print) { TEST(DDim, Hash) { // hash a DDim std::size_t h = 0; - phi::DDim ddim = phi::make_ddim({2, 3, 4}); + phi::DDim ddim = common::make_ddim({2, 3, 4}); h = std::hash()(ddim); EXPECT_EQ(h, 0xa16fb2b2967ul); } diff --git a/test/cpp/phi/core/test_dim.cu b/test/cpp/phi/core/test_dim.cu index 2a449191367b4e..cf7196dadd3550 100644 --- a/test/cpp/phi/core/test_dim.cu +++ b/test/cpp/phi/core/test_dim.cu @@ -17,21 +17,21 @@ #include #include "gtest/gtest.h" -#include "paddle/phi/core/utils/dim.h" +#include "paddle/common/dim.h" namespace phi { namespace tests { -__global__ void test(phi::Dim<2>* o) { o[0] = phi::make_dim(5, 6); } +__global__ void test(phi::Dim<2>* o) { o[0] = common::make_dim(5, 6); } __global__ void dyn_idx_gpu(int64_t* o) { - auto d = phi::make_dim(5, 6); + auto d = common::make_dim(5, 6); o[0] = d[1]; } TEST(Dim, Equality) { // construct a Dim on the CPU - auto a = phi::make_dim(3, 4); + auto a = common::make_dim(3, 4); EXPECT_EQ(a[0], 3); EXPECT_EQ(a[1], 4); @@ -48,10 +48,10 @@ TEST(Dim, Equality) { EXPECT_EQ(a[1], 6); // product - EXPECT_EQ(phi::product(a), 30); + EXPECT_EQ(common::product(a), 30); // mutate a Dim - auto b = phi::make_dim(7, 8); + auto b = common::make_dim(7, 8); b[1] = 10; EXPECT_EQ(b[0], 7); EXPECT_EQ(b[1], 10); @@ -74,9 +74,9 @@ TEST(Dim, Equality) { } TEST(Dim, Bool) { - auto a = phi::make_dim(3, 4); - auto b = phi::make_dim(5, 6); - auto c = phi::make_dim(3, 4); + auto a = common::make_dim(3, 4); + auto b = common::make_dim(5, 6); + auto c = common::make_dim(3, 4); // comparison EXPECT_TRUE(a == a); @@ -87,13 +87,13 @@ TEST(Dim, Bool) { TEST(Dim, Print) { { std::stringstream ss; - auto a = phi::make_dim(2, 3); + auto a = common::make_dim(2, 3); ss << a; EXPECT_EQ(ss.str(), "2, 3"); } { std::stringstream ss; - ss << phi::make_dim(8); + ss << common::make_dim(8); EXPECT_EQ(ss.str(), "8"); } } diff --git a/test/cpp/phi/core/test_meta_fn_utils.cc b/test/cpp/phi/core/test_meta_fn_utils.cc index 6c26d38a95a75e..7112f332abe2dd 100644 --- a/test/cpp/phi/core/test_meta_fn_utils.cc +++ b/test/cpp/phi/core/test_meta_fn_utils.cc @@ -24,7 +24,7 @@ namespace tests { TEST(MetaFnFactory, InferMetaFnExists) { phi::DenseTensor dense_x; - dense_x.Resize(phi::make_ddim({3, 4})); + dense_x.Resize(common::make_ddim({3, 4})); phi::MetaTensor meta_x(&dense_x); phi::DenseTensor dense_out1; diff --git a/test/cpp/phi/core/test_selected_rows.cc b/test/cpp/phi/core/test_selected_rows.cc index 1f56d851a7b5bc..e55266279d22bb 100644 --- a/test/cpp/phi/core/test_selected_rows.cc +++ b/test/cpp/phi/core/test_selected_rows.cc @@ -31,7 +31,8 @@ class SelectedRowsTester : public ::testing::Test { phi::DenseTensor* value = selected_rows_->mutable_value(); auto* data = value->mutable_data( - phi::make_ddim({static_cast(rows.size()), row_numel}), place_); + common::make_ddim({static_cast(rows.size()), row_numel}), + place_); for (int64_t i = 0; i < value->numel(); ++i) { data[i] = static_cast(i); } @@ -45,11 +46,11 @@ class SelectedRowsTester : public ::testing::Test { TEST_F(SelectedRowsTester, height) { ASSERT_EQ(selected_rows_->height(), 10); } TEST_F(SelectedRowsTester, dims) { - ASSERT_EQ(selected_rows_->value().dims(), phi::make_ddim({3, 100})); + ASSERT_EQ(selected_rows_->value().dims(), common::make_ddim({3, 100})); } TEST_F(SelectedRowsTester, complete_dims) { - ASSERT_EQ(selected_rows_->GetCompleteDims(), phi::make_ddim({10, 100})); + ASSERT_EQ(selected_rows_->GetCompleteDims(), common::make_ddim({10, 100})); } TEST(SelectedRows, SparseTable) { @@ -59,7 +60,8 @@ TEST(SelectedRows, SparseTable) { int64_t table_size = 100; int64_t embedding_width = 8; // initialize a sparse table - table.mutable_value()->Resize(phi::make_ddim({table_size, embedding_width})); + table.mutable_value()->Resize( + common::make_ddim({table_size, embedding_width})); auto* data = table.mutable_value()->mutable_data(cpu); for (int64_t i = 0; i < table_size; ++i) { for (int64_t j = 0; j < embedding_width; ++j) { @@ -80,7 +82,7 @@ TEST(SelectedRows, SparseTable) { ASSERT_EQ(table.rows().size(), 3UL); phi::DenseTensor ids; - ids.Resize(phi::make_ddim({4})); + ids.Resize(common::make_ddim({4})); auto* ids_data = ids.mutable_data(cpu); ids_data[0] = static_cast(6); ids_data[1] = static_cast(6); @@ -88,8 +90,8 @@ TEST(SelectedRows, SparseTable) { ids_data[3] = static_cast(10); phi::DenseTensor get_value; - auto* value_data = - get_value.mutable_data(phi::make_ddim({4, embedding_width}), cpu); + auto* value_data = get_value.mutable_data( + common::make_ddim({4, embedding_width}), cpu); table.Get(ids, &get_value); for (int j = 0; j < embedding_width; ++j) { @@ -157,7 +159,8 @@ TEST(SelectedRows, MultiThreadAutoIndex) { int64_t table_size = 100000; int64_t embedding_width = 8; // initialize a sparse table - table.mutable_value()->Resize(phi::make_ddim({table_size, embedding_width})); + table.mutable_value()->Resize( + common::make_ddim({table_size, embedding_width})); auto* data = table.mutable_value()->mutable_data(cpu); for (int64_t i = 0; i < table_size; ++i) { for (int64_t j = 0; j < embedding_width; ++j) { diff --git a/test/cpp/phi/core/test_sparse_coo_tensor.cc b/test/cpp/phi/core/test_sparse_coo_tensor.cc index e6d134ffb52350..d3e46fba334a6e 100644 --- a/test/cpp/phi/core/test_sparse_coo_tensor.cc +++ b/test/cpp/phi/core/test_sparse_coo_tensor.cc @@ -23,20 +23,21 @@ namespace tests { TEST(sparse_coo_tensor, construct) { phi::CPUPlace cpu; - auto dense_dims = phi::make_ddim({3, 3}); + auto dense_dims = common::make_ddim({3, 3}); std::vector non_zero_data = {1.0, 2.0, 3.0}; std::vector indices_data = {0, 1, 2, 0, 2, 1}; auto fancy_allocator = std::unique_ptr(new FancyAllocator); auto* alloc = fancy_allocator.get(); auto indices_dims = - phi::make_ddim({2, static_cast(non_zero_data.size())}); + common::make_ddim({2, static_cast(non_zero_data.size())}); DenseTensorMeta indices_meta(DataType::INT64, indices_dims, DataLayout::NCHW); DenseTensor indices(alloc, indices_meta); memcpy(indices.mutable_data(cpu), &indices_data[0], indices_data.size() * sizeof(int64_t)); - auto elements_dims = phi::make_ddim({static_cast(non_zero_data.size())}); + auto elements_dims = + common::make_ddim({static_cast(non_zero_data.size())}); DenseTensorMeta elements_meta( DataType::FLOAT32, elements_dims, DataLayout::NCHW); DenseTensor elements(alloc, elements_meta); @@ -58,13 +59,13 @@ TEST(sparse_coo_tensor, construct) { TEST(sparse_coo_tensor, other_function) { auto fancy_allocator = std::unique_ptr(new FancyAllocator); auto* alloc = fancy_allocator.get(); - auto dense_dims = phi::make_ddim({4, 4}); + auto dense_dims = common::make_ddim({4, 4}); const int non_zero_num = 2; - auto indices_dims = phi::make_ddim({2, non_zero_num}); + auto indices_dims = common::make_ddim({2, non_zero_num}); DenseTensorMeta indices_meta(DataType::INT64, indices_dims, DataLayout::NCHW); DenseTensor indices(alloc, indices_meta); - auto elements_dims = phi::make_ddim({non_zero_num}); + auto elements_dims = common::make_ddim({non_zero_num}); DenseTensorMeta elements_meta( DataType::FLOAT32, elements_dims, DataLayout::NCHW); DenseTensor elements(alloc, elements_meta); @@ -74,7 +75,7 @@ TEST(sparse_coo_tensor, other_function) { CHECK_EQ(coo.dims(), dense_dims); // Test Resize - auto dense_dims_3d = phi::make_ddim({2, 4, 4}); + auto dense_dims_3d = common::make_ddim({2, 4, 4}); coo.Resize(dense_dims_3d, 1, 3); CHECK_EQ(coo.nnz(), 3); diff --git a/test/cpp/phi/core/test_sparse_csr_tensor.cc b/test/cpp/phi/core/test_sparse_csr_tensor.cc index 56f671a7fc7e9e..78f19a1ba580d1 100644 --- a/test/cpp/phi/core/test_sparse_csr_tensor.cc +++ b/test/cpp/phi/core/test_sparse_csr_tensor.cc @@ -24,7 +24,7 @@ namespace tests { TEST(sparse_csr_tensor, construct) { phi::CPUPlace cpu; - auto dense_dims = phi::make_ddim({3, 3}); + auto dense_dims = common::make_ddim({3, 3}); std::vector non_zero_data = {1.0, 2.0, 3.0}; std::vector crows_data = {0, 1, 1, 3}; std::vector cols_data = {1, 0, 2}; @@ -32,7 +32,7 @@ TEST(sparse_csr_tensor, construct) { auto fancy_allocator = std::unique_ptr(new FancyAllocator); auto alloc = fancy_allocator.get(); // create non_zero_crows - auto crows_dims = phi::make_ddim({static_cast(crows_data.size())}); + auto crows_dims = common::make_ddim({static_cast(crows_data.size())}); DenseTensorMeta crows_meta(DataType::INT64, crows_dims, DataLayout::NCHW); DenseTensor crows(alloc, crows_meta); memcpy(crows.mutable_data(cpu), @@ -40,7 +40,7 @@ TEST(sparse_csr_tensor, construct) { crows_data.size() * sizeof(int64_t)); // create non_zero_cols - auto cols_dims = phi::make_ddim({static_cast(cols_data.size())}); + auto cols_dims = common::make_ddim({static_cast(cols_data.size())}); DenseTensorMeta cols_meta(DataType::INT64, cols_dims, DataLayout::NCHW); DenseTensor cols(alloc, cols_meta); memcpy(cols.mutable_data(cpu), @@ -48,7 +48,8 @@ TEST(sparse_csr_tensor, construct) { cols_data.size() * sizeof(int64_t)); // create non_zero_elements - auto elements_dims = phi::make_ddim({static_cast(non_zero_data.size())}); + auto elements_dims = + common::make_ddim({static_cast(non_zero_data.size())}); DenseTensorMeta elements_meta( DataType::FLOAT32, elements_dims, DataLayout::NCHW); DenseTensor elements(alloc, elements_meta); @@ -70,13 +71,13 @@ TEST(sparse_csr_tensor, construct) { TEST(sparse_csr_tensor, other_function) { auto fancy_allocator = std::unique_ptr(new FancyAllocator); auto alloc = fancy_allocator.get(); - auto dense_dims = phi::make_ddim({4, 4}); - auto crows_dims = phi::make_ddim({dense_dims[0] + 1}); + auto dense_dims = common::make_ddim({4, 4}); + auto crows_dims = common::make_ddim({dense_dims[0] + 1}); DenseTensorMeta crows_meta(DataType::INT64, crows_dims, DataLayout::NCHW); DenseTensor crows(alloc, crows_meta); const int64_t non_zero_num = 5; - auto cols_dims = phi::make_ddim({non_zero_num}); + auto cols_dims = common::make_ddim({non_zero_num}); DenseTensorMeta cols_meta(DataType::INT64, cols_dims, DataLayout::NCHW); DenseTensor cols(alloc, cols_meta); DenseTensorMeta values_meta(DataType::FLOAT32, cols_dims, DataLayout::NCHW); @@ -87,7 +88,7 @@ TEST(sparse_csr_tensor, other_function) { CHECK_EQ(csr.dims(), dense_dims); // Test Resize - auto dense_dims_3d = phi::make_ddim({2, 4, 4}); + auto dense_dims_3d = common::make_ddim({2, 4, 4}); csr.Resize(dense_dims_3d, 2); CHECK_EQ(csr.non_zero_cols().numel(), 2); diff --git a/test/cpp/phi/core/test_tensor_array.cc b/test/cpp/phi/core/test_tensor_array.cc index 201790a7bc0e10..ae2685d6fc98e7 100644 --- a/test/cpp/phi/core/test_tensor_array.cc +++ b/test/cpp/phi/core/test_tensor_array.cc @@ -17,9 +17,9 @@ limitations under the License. */ #include #include "gtest/gtest.h" +#include "paddle/common/errors.h" #include "paddle/phi/backends/all_context.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/errors.h" #include "paddle/phi/core/tensor_array.h" #include "test/cpp/phi/core/allocator.h" diff --git a/test/cpp/phi/core/unroll_array_ops_test.cc b/test/cpp/phi/core/unroll_array_ops_test.cc index ddcf48844a7ad3..65d00dace78cc6 100644 --- a/test/cpp/phi/core/unroll_array_ops_test.cc +++ b/test/cpp/phi/core/unroll_array_ops_test.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/phi/core/utils/unroll_array_ops.h" +#include "paddle/common/unroll_array_ops.h" #include @@ -32,7 +32,7 @@ bool FillConstantTestMain() { std::array arr; arr.fill(0); - UnrollFillConstant::Run(arr.data(), 1); + common::UnrollFillConstant::Run(arr.data(), 1); return CheckEquality(arr.data(), D2, 1) && CheckEquality(arr.data() + D2, arr.size() - D2, 0); } @@ -47,7 +47,7 @@ TEST(unroll_ops, fill_constant) { TEST(unroll_ops, assign) { const int a[] = {1, 2, 3, 4, 5}; // NOLINT int b[] = {0, 0, 0, 0, 0}; // NOLINT - UnrollAssign<3>::Run(a, b); + common::UnrollAssign<3>::Run(a, b); EXPECT_EQ(b[0], 1); EXPECT_EQ(b[1], 2); EXPECT_EQ(b[2], 3); @@ -57,7 +57,7 @@ TEST(unroll_ops, assign) { TEST(unroll_ops, var_args_assign) { int a[] = {0, 0, 0}; // NOLINT - UnrollVarArgsAssign::Run(a, 1, 2); + common::UnrollVarArgsAssign::Run(a, 1, 2); EXPECT_EQ(a[0], 1); EXPECT_EQ(a[1], 2); EXPECT_EQ(a[2], 0); @@ -66,17 +66,17 @@ TEST(unroll_ops, var_args_assign) { TEST(unroll_ops, compare) { int a[] = {1, 2, 3}; // NOLINT int b[] = {1, 2, 4}; // NOLINT - EXPECT_TRUE(UnrollCompare<2>::Run(a, b)); - EXPECT_FALSE(UnrollCompare<3>::Run(a, b)); + EXPECT_TRUE(common::UnrollCompare<2>::Run(a, b)); + EXPECT_FALSE(common::UnrollCompare<3>::Run(a, b)); b[0] = -1; - EXPECT_TRUE(UnrollCompare<0>::Run(a, b)); - EXPECT_FALSE(UnrollCompare<1>::Run(a, b)); + EXPECT_TRUE(common::UnrollCompare<0>::Run(a, b)); + EXPECT_FALSE(common::UnrollCompare<1>::Run(a, b)); } TEST(unroll_ops, product) { int a[] = {2, 3, 4}; // NOLINT - EXPECT_EQ(UnrollProduct<3>::Run(a), a[0] * a[1] * a[2]); + EXPECT_EQ(common::UnrollProduct<3>::Run(a), a[0] * a[1] * a[2]); } } // namespace framework diff --git a/test/cpp/phi/kernels/CMakeLists.txt b/test/cpp/phi/kernels/CMakeLists.txt index a4906b3d1a879c..36e7a0b10310b8 100644 --- a/test/cpp/phi/kernels/CMakeLists.txt +++ b/test/cpp/phi/kernels/CMakeLists.txt @@ -1,12 +1,12 @@ cc_test( test_math_function SRCS test_math_function.cc - DEPS phi) + DEPS phi common) if(WITH_GPU) nv_test( test_math_function_gpu SRCS test_math_function.cu - DEPS phi) + DEPS phi common) nv_test( test_broadcast_gpu SRCS test_ternary_broadcast.cu @@ -16,56 +16,56 @@ if(WITH_ROCM) hip_test( test_math_function_gpu SRCS test_math_function.cu - DEPS phi) + DEPS phi common) endif() cc_test( test_cpu_vec SRCS test_cpu_vec.cc - DEPS phi) + DEPS phi common) # For String Kernels cc_test( test_strings_lower_upper_dev_api SRCS test_strings_lower_upper_dev_api.cc - DEPS phi) + DEPS phi common) if(WITH_GPU) nv_test( test_strings_lower_upper_dev_gpu_api SRCS test_strings_lower_upper_dev_api.cu - DEPS phi) + DEPS phi common) elseif(WITH_ROCM) hip_test( test_strings_lower_upper_dev_gpu_api SRCS test_strings_lower_upper_dev_api.cu - DEPS phi) + DEPS phi common) endif() cc_test( test_strings_copy_dev_api SRCS test_strings_copy_dev_api.cc - DEPS phi) + DEPS phi common) if(WITH_GPU) nv_test( test_strings_copy_dev_gpu_api SRCS test_strings_copy_dev_api.cu - DEPS phi) + DEPS phi common) elseif(WITH_ROCM) hip_test( test_strings_copy_dev_gpu_api SRCS test_strings_copy_dev_api.cu - DEPS phi) + DEPS phi common) endif() cc_test( test_memcpy_dev_api SRCS test_memcpy_dev_api.cc - DEPS phi) + DEPS phi common) cc_test( test_transfer_layout_dev_api SRCS test_transfer_layout_dev_api.cc - DEPS phi) + DEPS phi common) if(WITH_GPU) nv_test( @@ -79,7 +79,7 @@ if(WITH_GPU) cc_test( test_fused_adam_kernel SRCS test_fused_adam_kernel.cc - DEPS gtest phi) + DEPS gtest phi common) elseif(WITH_ROCM) hip_test( test_gpu_timer @@ -94,19 +94,19 @@ endif() cc_test( test_cache SRCS test_cache.cc - DEPS gtest phi) + DEPS gtest phi common) cc_test( strided_memcpy_test SRCS strided_memcpy_test.cc - DEPS phi memory) + DEPS phi common memory) cc_test( sequence_padding_test SRCS sequence_padding_test.cc - DEPS phi) + DEPS phi common) cc_test( sequence_pooling_test SRCS sequence_pooling_test.cc - DEPS phi) + DEPS phi common) diff --git a/test/cpp/phi/kernels/sequence_padding_test.cc b/test/cpp/phi/kernels/sequence_padding_test.cc index 015d6f354c5beb..dab519337536e3 100644 --- a/test/cpp/phi/kernels/sequence_padding_test.cc +++ b/test/cpp/phi/kernels/sequence_padding_test.cc @@ -31,8 +31,8 @@ void TestSequencePadding(const DeviceContext &context, phi::DenseTensor pad_value; const size_t level = lod.size() - 1; - auto seq_dims = phi::make_ddim({static_cast(lod[level].back()), - static_cast(sequence_width)}); + auto seq_dims = common::make_ddim({static_cast(lod[level].back()), + static_cast(sequence_width)}); cpu_seq.set_lod(lod); auto *dev_ctx = static_cast( @@ -55,9 +55,10 @@ void TestSequencePadding(const DeviceContext &context, const size_t max_sequence_length = phi::funcs::MaximumSequenceLength(lod[level]); const size_t num_sequences = lod[level].size() - 1; - auto padding_dims = phi::make_ddim({static_cast(max_sequence_length), - static_cast(num_sequences), - static_cast(sequence_width)}); + auto padding_dims = + common::make_ddim({static_cast(max_sequence_length), + static_cast(num_sequences), + static_cast(sequence_width)}); padding.Resize(padding_dims); context.template Alloc(&padding); diff --git a/test/cpp/phi/kernels/sequence_pooling_test.cc b/test/cpp/phi/kernels/sequence_pooling_test.cc index 037ad314890c5f..2df2ffa12f969d 100644 --- a/test/cpp/phi/kernels/sequence_pooling_test.cc +++ b/test/cpp/phi/kernels/sequence_pooling_test.cc @@ -31,7 +31,7 @@ void TestSequencePoolingSum(const DeviceContext &context, // construct out_grad's tensor in cpu const size_t out_first_dim = lod[0].size() - 1; auto out_dims = - phi::make_ddim({static_cast(out_first_dim), second_dim}); + common::make_ddim({static_cast(out_first_dim), second_dim}); cpu_out_grad.mutable_data(out_dims, phi::CPUPlace()); for (int64_t i = 0; i < cpu_out_grad.numel(); ++i) { @@ -49,7 +49,7 @@ void TestSequencePoolingSum(const DeviceContext &context, // construct in_grad in_grad.set_lod(lod); auto in_dims = - phi::make_ddim({static_cast(lod[0].back()), second_dim}); + common::make_ddim({static_cast(lod[0].back()), second_dim}); in_grad.mutable_data(in_dims, place); // check tensor contruction result diff --git a/test/cpp/phi/kernels/test_auto_tune.cu b/test/cpp/phi/kernels/test_auto_tune.cu index 302f8809d2d575..ecdb4a0311bc17 100644 --- a/test/cpp/phi/kernels/test_auto_tune.cu +++ b/test/cpp/phi/kernels/test_auto_tune.cu @@ -83,12 +83,14 @@ TEST(AutoTune, sum) { std::make_unique(phi::CPUPlace()); auto in1 = std::make_shared( alloc_cpu.get(), - phi::DenseTensorMeta( - phi::DataType::FLOAT32, phi::make_ddim({N}), phi::DataLayout::NCHW)); + phi::DenseTensorMeta(phi::DataType::FLOAT32, + common::make_ddim({N}), + phi::DataLayout::NCHW)); auto in2 = std::make_shared( alloc_cpu.get(), - phi::DenseTensorMeta( - phi::DataType::FLOAT32, phi::make_ddim({N}), phi::DataLayout::NCHW)); + phi::DenseTensorMeta(phi::DataType::FLOAT32, + common::make_ddim({N}), + phi::DataLayout::NCHW)); float* in1_data = in1->data(); float* in2_data = in2->data(); @@ -106,12 +108,14 @@ TEST(AutoTune, sum) { auto d_in1 = std::make_shared( alloc_cuda.get(), - phi::DenseTensorMeta( - phi::DataType::FLOAT32, phi::make_ddim({N}), phi::DataLayout::NCHW)); + phi::DenseTensorMeta(phi::DataType::FLOAT32, + common::make_ddim({N}), + phi::DataLayout::NCHW)); auto d_in2 = std::make_shared( alloc_cuda.get(), - phi::DenseTensorMeta( - phi::DataType::FLOAT32, phi::make_ddim({N}), phi::DataLayout::NCHW)); + phi::DenseTensorMeta(phi::DataType::FLOAT32, + common::make_ddim({N}), + phi::DataLayout::NCHW)); phi::Copy(*dev_ctx, *in1.get(), phi::GPUPlace(), false, d_in1.get()); phi::Copy(*dev_ctx, *in2.get(), phi::GPUPlace(), false, d_in2.get()); diff --git a/test/cpp/phi/kernels/test_fused_adam_kernel.cc b/test/cpp/phi/kernels/test_fused_adam_kernel.cc index 7084b85ba73882..b4edd6b0c19770 100644 --- a/test/cpp/phi/kernels/test_fused_adam_kernel.cc +++ b/test/cpp/phi/kernels/test_fused_adam_kernel.cc @@ -366,8 +366,12 @@ auto MaxDiff(const Context &ctx, diff_reduced.Resize({1}); ctx.template Alloc(&diff_reduced); - MaxRawKernel( - ctx, diff, vectorize(x.dims()), false, true, &diff_reduced); + MaxRawKernel(ctx, + diff, + common::vectorize(x.dims()), + false, + true, + &diff_reduced); diff_reduced_cpu.Resize(diff_reduced.dims()); ctx.template HostAlloc(&diff_reduced_cpu); diff --git a/test/cpp/phi/kernels/test_memcpy_dev_api.cc b/test/cpp/phi/kernels/test_memcpy_dev_api.cc index a7f65e5b6dd6f7..2d06dcab602ea1 100644 --- a/test/cpp/phi/kernels/test_memcpy_dev_api.cc +++ b/test/cpp/phi/kernels/test_memcpy_dev_api.cc @@ -37,7 +37,7 @@ TEST(DEV_API, memcpy_d2h) { std::make_unique(phi::CPUPlace()); phi::DenseTensor x_cpu(cpu_alloc.get(), phi::DenseTensorMeta(phi::DataType::FLOAT32, - phi::make_ddim({3, 2, 2, 3}), + common::make_ddim({3, 2, 2, 3}), phi::DataLayout::NCHW)); auto& pool = phi::DeviceContextPool::Instance(); auto* cpu_ctx = pool.GetByPlace(phi::CPUPlace()); diff --git a/test/cpp/phi/kernels/test_ternary_broadcast.cu b/test/cpp/phi/kernels/test_ternary_broadcast.cu index 959b79725f07ae..137416df764673 100644 --- a/test/cpp/phi/kernels/test_ternary_broadcast.cu +++ b/test/cpp/phi/kernels/test_ternary_broadcast.cu @@ -102,10 +102,10 @@ TEST(Broadcast, add) { size_t times = 10; do { - auto dim1 = phi::make_ddim({1, 2048, 3584}); - auto dim2 = phi::make_ddim({1, 2048, 1}); - auto dim3 = phi::make_ddim({1, 1, 3584}); - auto dim_out = phi::make_ddim({1, 2048, 3584}); + auto dim1 = common::make_ddim({1, 2048, 3584}); + auto dim2 = common::make_ddim({1, 2048, 1}); + auto dim3 = common::make_ddim({1, 1, 3584}); + auto dim_out = common::make_ddim({1, 2048, 3584}); TestCase( *dev_ctx, dim1, dim2, dim3, dim_out, times, AddTernary_1()); TestCase(*dev_ctx, @@ -141,10 +141,10 @@ TEST(Broadcast, add) { } while (0); do { - auto dim1 = phi::make_ddim({1, 256, 4, 256, 256}); - auto dim2 = phi::make_ddim({1, 256, 1, 1, 256}); - auto dim3 = phi::make_ddim({1, 1, 4, 256, 256}); - auto dim_out = phi::make_ddim({1, 256, 4, 256, 256}); + auto dim1 = common::make_ddim({1, 256, 4, 256, 256}); + auto dim2 = common::make_ddim({1, 256, 1, 1, 256}); + auto dim3 = common::make_ddim({1, 1, 4, 256, 256}); + auto dim_out = common::make_ddim({1, 256, 4, 256, 256}); TestCase( *dev_ctx, dim1, dim2, dim3, dim_out, times, AddTernary_2()); TestCase(*dev_ctx, @@ -180,10 +180,10 @@ TEST(Broadcast, add) { } while (0); do { - auto dim1 = phi::make_ddim({1, 256, 256}); - auto dim2 = phi::make_ddim({1, 1, 256}); - auto dim3 = phi::make_ddim({1, 256, 1}); - auto dim_out = phi::make_ddim({1, 256, 256}); + auto dim1 = common::make_ddim({1, 256, 256}); + auto dim2 = common::make_ddim({1, 1, 256}); + auto dim3 = common::make_ddim({1, 256, 1}); + auto dim_out = common::make_ddim({1, 256, 256}); TestCase( *dev_ctx, dim1, dim2, dim3, dim_out, times, AddTernary_3()); TestCase(*dev_ctx, diff --git a/test/cpp/phi/kernels/test_transfer_layout_dev_api.cc b/test/cpp/phi/kernels/test_transfer_layout_dev_api.cc index f656ee9f59829d..b7da7dc397cd48 100644 --- a/test/cpp/phi/kernels/test_transfer_layout_dev_api.cc +++ b/test/cpp/phi/kernels/test_transfer_layout_dev_api.cc @@ -41,7 +41,7 @@ TEST(DEV_API, transfer_layout) { MetaTensor meta_x(&x); meta_x.set_dtype(DataType::FLOAT32); meta_x.set_layout(DataLayout::ONEDNN); - meta_x.set_dims(make_ddim({n, c, h, w})); + meta_x.set_dims(common::make_ddim({n, c, h, w})); DenseTensor out; @@ -63,7 +63,7 @@ TEST(DEV_API, transfer_layout) { // 3. check result std::vector expect_shape = {12, 3}; - ASSERT_EQ(out.dims(), make_ddim({n, h, w, c})); + ASSERT_EQ(out.dims(), common::make_ddim({n, h, w, c})); ASSERT_EQ(out.dims().size(), 4); ASSERT_EQ(out.meta().dtype, DataType::FLOAT32); ASSERT_EQ(out.meta().layout, DataLayout::NHWC); diff --git a/test/cpp/phi/ops/CMakeLists.txt b/test/cpp/phi/ops/CMakeLists.txt index 4e6cf31f75cdd1..978dad086c877f 100644 --- a/test/cpp/phi/ops/CMakeLists.txt +++ b/test/cpp/phi/ops/CMakeLists.txt @@ -1,4 +1,4 @@ cc_test( test_op_signature SRCS test_op_signature.cc - DEPS phi) + DEPS phi common) diff --git a/test/cpp/pir/cinn/CMakeLists.txt b/test/cpp/pir/cinn/CMakeLists.txt index a312a422254c00..7bcc9746e2f43e 100644 --- a/test/cpp/pir/cinn/CMakeLists.txt +++ b/test/cpp/pir/cinn/CMakeLists.txt @@ -39,8 +39,8 @@ if(WITH_TESTING AND WITH_CINN) DEPS drr pd_to_cinn_pass - op_dialect_vjp cinn_op_dialect + op_dialect_vjp pir_transforms pir) set_tests_properties(test_sub_graph_extract PROPERTIES LABELS "RUN_TYPE=CINN") @@ -51,7 +51,6 @@ if(WITH_TESTING AND WITH_CINN) ir_op_fusion_test.cc DEPS op_with_group_merge_pass - op_dialect_vjp cinn_op_dialect pir) set_tests_properties(ir_op_fusion_test PROPERTIES LABELS "RUN_TYPE=CINN") diff --git a/test/cpp/pir/cinn/group_op_test.cc b/test/cpp/pir/cinn/group_op_test.cc index 75379d69c733be..20897dcbb4e2d6 100644 --- a/test/cpp/pir/cinn/group_op_test.cc +++ b/test/cpp/pir/cinn/group_op_test.cc @@ -56,7 +56,7 @@ std::shared_ptr<::pir::Program> BuildGroupProgram() { const float value_one = 1.0; const std::vector shape = {64, 128}; auto group_op1 = builder.Build( - CreateDenseTensorTypes(phi::make_ddim(shape))); + CreateDenseTensorTypes(common::make_ddim(shape))); pir::Block* block1 = group_op1.block(); builder.SetInsertionPointToEnd(block1); auto full_op_x = builder.Build( @@ -65,7 +65,7 @@ std::shared_ptr<::pir::Program> BuildGroupProgram() { builder.SetInsertionPointToEnd(program->block()); auto group_op2 = builder.Build( - CreateDenseTensorTypes(phi::make_ddim(shape))); + CreateDenseTensorTypes(common::make_ddim(shape))); pir::Block* block2 = group_op2.block(); builder.SetInsertionPointToEnd(block2); @@ -168,7 +168,7 @@ std::shared_ptr<::pir::Program> BuildGroupProgramForLowering() { shape, value, phi::DataType::FLOAT32, phi::GPUPlace()); auto group_op1 = builder.Build( - CreateDenseTensorTypes(phi::make_ddim(shape))); + CreateDenseTensorTypes(common::make_ddim(shape))); pir::Block* block1 = group_op1.block(); builder.SetInsertionPointToEnd(block1); auto sin = builder.Build(full_x->result(0)); @@ -179,7 +179,7 @@ std::shared_ptr<::pir::Program> BuildGroupProgramForLowering() { builder.SetInsertionPointToEnd(program->block()); auto group_op2 = builder.Build( - CreateDenseTensorTypes(phi::make_ddim(shape))); + CreateDenseTensorTypes(common::make_ddim(shape))); pir::Block* block2 = group_op2.block(); builder.SetInsertionPointToEnd(block2); auto cos_op = builder.Build(full_y->result(0)); @@ -187,7 +187,7 @@ std::shared_ptr<::pir::Program> BuildGroupProgramForLowering() { builder.SetInsertionPointToEnd(program->block()); auto group_op3 = builder.Build( - CreateDenseTensorTypes(phi::make_ddim(shape))); + CreateDenseTensorTypes(common::make_ddim(shape))); pir::Block* block3 = group_op3.block(); builder.SetInsertionPointToEnd(block3); auto add = builder.Build(group_op1->result(0), diff --git a/test/cpp/pir/core/CMakeLists.txt b/test/cpp/pir/core/CMakeLists.txt index 42c331c59fb70d..5a5981fccee931 100644 --- a/test/cpp/pir/core/CMakeLists.txt +++ b/test/cpp/pir/core/CMakeLists.txt @@ -1,7 +1,10 @@ -paddle_test(type_test SRCS type_test.cc DEPS pir op_dialect_vjp) +cc_test( + type_test + SRCS type_test.cc + DEPS pir op_dialect_vjp) cc_test_old(ir_attribute_test SRCS ir_attribute_test.cc DEPS pir gtest) cc_test_old(ir_value_test SRCS ir_value_test.cc DEPS pir gtest) -paddle_test( +cc_test_old( ir_op_test SRCS ir_op_test.cc @@ -19,6 +22,7 @@ cc_test_old( op_dialect_vjp pir phi + common gtest) cc_test_old( @@ -29,6 +33,7 @@ cc_test_old( op_dialect_vjp pir phi + common gtest) cc_test_old( diff --git a/test/cpp/pir/core/ir_op_test.cc b/test/cpp/pir/core/ir_op_test.cc index 9ae7b8b5c17953..bfd1e95dd98b7a 100644 --- a/test/cpp/pir/core/ir_op_test.cc +++ b/test/cpp/pir/core/ir_op_test.cc @@ -15,6 +15,7 @@ #include #include +#include "paddle/common/enforce.h" #include "paddle/fluid/pir/dialect/operator/ir/op_dialect.h" #include "paddle/phi/core/tensor_meta.h" #include "paddle/pir/core/block.h" @@ -22,7 +23,6 @@ #include "paddle/pir/core/builtin_attribute.h" #include "paddle/pir/core/builtin_op.h" #include "paddle/pir/core/dialect.h" -#include "paddle/pir/core/enforce.h" #include "paddle/pir/core/ir_context.h" #include "paddle/pir/core/ir_printer.h" #include "paddle/pir/core/op_base.h" diff --git a/test/cpp/pir/core/ir_program_test.cc b/test/cpp/pir/core/ir_program_test.cc index 6e702b9f333b6f..045d9ed6815aa0 100644 --- a/test/cpp/pir/core/ir_program_test.cc +++ b/test/cpp/pir/core/ir_program_test.cc @@ -278,7 +278,7 @@ TEST(program_test, builder) { EXPECT_EQ( full_op_output.dyn_cast().offset() == 0, true); - for (auto dim : phi::vectorize( + for (auto dim : common::vectorize( full_op_output.dyn_cast() .dims())) { EXPECT_EQ(dim == 2, true); diff --git a/test/cpp/pir/core/type_interface_test.cc b/test/cpp/pir/core/type_interface_test.cc index e3bd38b8adf6b0..7a7af415823ee4 100644 --- a/test/cpp/pir/core/type_interface_test.cc +++ b/test/cpp/pir/core/type_interface_test.cc @@ -51,7 +51,7 @@ TEST(shapedtype_test, shapedtype_test) { EXPECT_EQ( dense_tensor_type_interface.GetElementType().isa(), true); - EXPECT_EQ(dense_tensor_type_interface.GetDyShape(), phi::vectorize(dims)); + EXPECT_EQ(dense_tensor_type_interface.GetDyShape(), common::vectorize(dims)); EXPECT_EQ(dense_tensor_type_interface.kDynamic, std::numeric_limits::min()); EXPECT_EQ(dense_tensor_type_interface.GetRank(), 2); diff --git a/test/cpp/pir/kernel_dialect/CMakeLists.txt b/test/cpp/pir/kernel_dialect/CMakeLists.txt index aea05a2bfeb199..938bf8c21339c3 100644 --- a/test/cpp/pir/kernel_dialect/CMakeLists.txt +++ b/test/cpp/pir/kernel_dialect/CMakeLists.txt @@ -1,4 +1,10 @@ cc_test( ir_kernel_dialect_pass_test SRCS ir_kernel_dialect_pass_test.cc - DEPS pir_transforms program_translator op_dialect pir phi gtest) + DEPS pir_transforms + program_translator + op_dialect + pir + phi + common + gtest) diff --git a/test/cpp/pir/pass/CMakeLists.txt b/test/cpp/pir/pass/CMakeLists.txt index fb9f37e080f388..0cfd60a2a020f4 100644 --- a/test/cpp/pir/pass/CMakeLists.txt +++ b/test/cpp/pir/pass/CMakeLists.txt @@ -6,4 +6,5 @@ cc_test_old( pir op_dialect_vjp phi + common gtest) diff --git a/test/cpp/pir/pattern_rewrite/pattern_rewrite_test.cc b/test/cpp/pir/pattern_rewrite/pattern_rewrite_test.cc index 401de5a7425805..9daec3a19bc807 100644 --- a/test/cpp/pir/pattern_rewrite/pattern_rewrite_test.cc +++ b/test/cpp/pir/pattern_rewrite/pattern_rewrite_test.cc @@ -32,13 +32,13 @@ #include "paddle/fluid/pir/transforms/fusion/conv2d_bn_fuse_pass.h" #include "paddle/fluid/pir/transforms/transform_general_functions.h" +#include "paddle/common/enforce.h" #include "paddle/pir/core/builder.h" #include "paddle/pir/core/builtin_attribute.h" #include "paddle/pir/core/builtin_dialect.h" #include "paddle/pir/core/builtin_op.h" #include "paddle/pir/core/cast_utils.h" #include "paddle/pir/core/dialect.h" -#include "paddle/pir/core/enforce.h" #include "paddle/pir/core/ir_context.h" #include "paddle/pir/core/op_info.h" #include "paddle/pir/core/parameter.h" @@ -51,8 +51,8 @@ #include "paddle/pir/pattern_rewrite/pattern_match.h" #include "paddle/pir/pattern_rewrite/pattern_rewrite_driver.h" +#include "paddle/common/ddim.h" #include "paddle/phi/common/place.h" -#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/kernel_registry.h" PD_DECLARE_KERNEL(full, CPU, ALL_LAYOUT); diff --git a/test/cpp/pir/shape_dialect/CMakeLists.txt b/test/cpp/pir/shape_dialect/CMakeLists.txt index ec80962c1cb3a2..3815531ded5db0 100644 --- a/test/cpp/pir/shape_dialect/CMakeLists.txt +++ b/test/cpp/pir/shape_dialect/CMakeLists.txt @@ -1,20 +1,12 @@ -paddle_test( +cc_test( shape_op_test - SRCS - shape_op_test.cc - DEPS - op_dialect_vjp - pir - gtest) + SRCS shape_op_test.cc + DEPS op_dialect_vjp pir gtest) -paddle_test( +cc_test( shape_struct_test - SRCS - shape_struct_test.cc - DEPS - op_dialect_vjp - pir - gtest) + SRCS shape_struct_test.cc + DEPS op_dialect_vjp pir gtest) paddle_test( constraint_pass_test diff --git a/test/cpp/pir/shape_dialect/constraint_pass_test.cc b/test/cpp/pir/shape_dialect/constraint_pass_test.cc index 3a78dc07faab4d..7ce7f405c76911 100644 --- a/test/cpp/pir/shape_dialect/constraint_pass_test.cc +++ b/test/cpp/pir/shape_dialect/constraint_pass_test.cc @@ -20,6 +20,7 @@ #include #include +#include "paddle/common/enforce.h" #include "paddle/fluid/pir/dialect/operator/ir/op_dialect.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/pir/core/builder.h" @@ -29,7 +30,6 @@ #include "paddle/pir/core/builtin_type_interfaces.h" #include "paddle/pir/core/cast_utils.h" #include "paddle/pir/core/dialect.h" -#include "paddle/pir/core/enforce.h" #include "paddle/pir/core/ir_context.h" #include "paddle/pir/core/op_info.h" #include "paddle/pir/core/parameter.h" diff --git a/test/cpp/pir/tools/test_interface.h b/test/cpp/pir/tools/test_interface.h index a2de7e1bb6972e..4f1eaca6ae7798 100644 --- a/test/cpp/pir/tools/test_interface.h +++ b/test/cpp/pir/tools/test_interface.h @@ -15,13 +15,13 @@ #include #include +#include "paddle/common/enforce.h" #include "paddle/pir/core/block.h" #include "paddle/pir/core/builder.h" #include "paddle/pir/core/builtin_attribute.h" #include "paddle/pir/core/builtin_op.h" #include "paddle/pir/core/builtin_type.h" #include "paddle/pir/core/dialect.h" -#include "paddle/pir/core/enforce.h" #include "paddle/pir/core/ir_context.h" #include "paddle/pir/core/ir_printer.h" #include "paddle/pir/core/op_base.h" diff --git a/test/cpp/pir/tools/test_op.cc b/test/cpp/pir/tools/test_op.cc index d8ecbb3a2af385..cb2bf74293103d 100644 --- a/test/cpp/pir/tools/test_op.cc +++ b/test/cpp/pir/tools/test_op.cc @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. #include "test/cpp/pir/tools/test_op.h" +#include "paddle/common/enforce.h" #include "paddle/pir/core/builtin_attribute.h" -#include "paddle/pir/core/enforce.h" namespace test { diff --git a/test/cpp/pir/tools/test_trait.cc b/test/cpp/pir/tools/test_trait.cc index 1fa5dd0bba9118..431998b11c0cef 100644 --- a/test/cpp/pir/tools/test_trait.cc +++ b/test/cpp/pir/tools/test_trait.cc @@ -14,7 +14,7 @@ #include "test/cpp/pir/tools/test_trait.h" #include "glog/logging.h" -#include "paddle/pir/core/enforce.h" +#include "paddle/common/enforce.h" namespace test { void OneRegionTrait::Verify(pir::Operation *op) { diff --git a/test/cpp/prim/CMakeLists.txt b/test/cpp/prim/CMakeLists.txt index 3436ee702cce79..a844c99a6ff700 100644 --- a/test/cpp/prim/CMakeLists.txt +++ b/test/cpp/prim/CMakeLists.txt @@ -1,5 +1,6 @@ set(prim_eager_deps phi + common hook_utils utils global_utils @@ -15,7 +16,7 @@ set(prim_eager_deps set(prim_generated_deps final_dygraph_function final_dygraph_node dygraph_function dygraph_node) -paddle_test(test_comp_static SRCS test_static_prim.cc) +paddle_test(test_comp_static SRCS test_static_prim.cc DEPS common) if(NOT (NOT WITH_PYTHON AND ON_INFER)) if(WITH_CINN) @@ -24,7 +25,8 @@ if(NOT (NOT WITH_PYTHON AND ON_INFER)) cc_library(init_env_utils SRCS init_env_utils.cc) target_compile_definitions(init_env_utils PUBLIC PADDLE_DLL_EXPORT) - paddle_test(test_comp_eager SRCS test_eager_prim.cc DEPS init_env_utils) + paddle_test(test_comp_eager SRCS test_eager_prim.cc DEPS init_env_utils + common) endif() # skip win32 since wget is not installed by default on windows machine. diff --git a/test/cpp/prim/test_eager_prim.cc b/test/cpp/prim/test_eager_prim.cc index 3a5ba8aea829af..f451e229784c22 100644 --- a/test/cpp/prim/test_eager_prim.cc +++ b/test/cpp/prim/test_eager_prim.cc @@ -38,7 +38,7 @@ TEST(EagerPrim, TanhBackwardTest) { FLAGS_tensor_operants_mode = "eager"; paddle::prim::InitTensorOperants(); // 2. pre - paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32}); + paddle::framework::DDim ddim = common::make_ddim({4, 16, 16, 32}); paddle::Tensor tensor0 = eager_test::CreateTensorWithValue(ddim, paddle::platform::CPUPlace(), @@ -95,7 +95,7 @@ TEST(EagerPrim, LogicalOperantsTest) { FLAGS_tensor_operants_mode = "eager"; paddle::prim::InitTensorOperants(); // 2. pre - paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32}); + paddle::framework::DDim ddim = common::make_ddim({4, 16, 16, 32}); paddle::Tensor tensor0 = eager_test::CreateTensorWithValue(ddim, paddle::platform::CPUPlace(), @@ -133,7 +133,7 @@ TEST(EagerPrim, CompareOperantsTest) { FLAGS_tensor_operants_mode = "eager"; paddle::prim::InitTensorOperants(); // 2. pre - paddle::framework::DDim ddim = phi::make_ddim({4, 16, 16, 32}); + paddle::framework::DDim ddim = common::make_ddim({4, 16, 16, 32}); paddle::Tensor tensor0 = eager_test::CreateTensorWithValue(ddim, paddle::platform::CPUPlace(),